Stage 2 #1: SFTP destinations CRUD + connection test

Foundation for the move/quarantine pipeline. Lets users register one or
more remote SFTP destinations through the API, store credentials at rest
under /data/sftp/{id}.{password|key} (mode 600), and verify connectivity
+ write access via a test endpoint.

Endpoints:
  GET    /api/sftp/destinations
  POST   /api/sftp/destinations             — create
  PUT    /api/sftp/destinations/{id}        — update
  DELETE /api/sftp/destinations/{id}
  POST   /api/sftp/destinations/{id}/test   — connect, stat base_path, mkdir probe
  POST   /api/sftp/keypair                  — generate ED25519 keypair

Host keys pinned per-destination on first connect (TOFU); subsequent
mismatches are rejected. paramiko added to requirements.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Carlos
2026-04-26 20:04:42 -04:00
parent 8b0fee0055
commit 7436b23db3
5 changed files with 386 additions and 1 deletions

View File

@@ -20,6 +20,7 @@ from fastapi.templating import Jinja2Templates
from pydantic import BaseModel
import scanner as sc
import sftp as sftp_mod
app = FastAPI(title="Duplicate Finder")
@@ -882,3 +883,171 @@ def export_csv():
media_type="text/csv",
headers={"Content-Disposition": "attachment; filename=dup-finder-export.csv"},
)
# ── SFTP destinations ────────────────────────────────────────────────────────
class SFTPDestBody(BaseModel):
name: str
host: str
port: int = 22
username: str
auth_method: str # 'password' | 'key'
base_path: str
mirror_structure: bool = True
# Either password (for password auth) or private_key (for key auth).
# Optional on update — omit to leave existing credential untouched.
password: Optional[str] = None
private_key: Optional[str] = None
def _dest_row_to_dict(row) -> dict:
return {
"id": row["id"],
"name": row["name"],
"host": row["host"],
"port": row["port"],
"username": row["username"],
"auth_method": row["auth_method"],
"base_path": row["base_path"],
"mirror_structure": bool(row["mirror_structure"]),
"enabled": bool(row["enabled"]),
"created_at": row["created_at"],
"last_tested_at": row["last_tested_at"],
"last_test_result": row["last_test_result"],
"has_credentials": sftp_mod.has_credentials(row["id"], row["auth_method"]),
}
@app.get("/api/sftp/destinations")
def list_destinations():
con = get_db()
cur = con.cursor()
cur.execute("SELECT * FROM sftp_destinations ORDER BY name")
out = [_dest_row_to_dict(r) for r in cur.fetchall()]
con.close()
return out
@app.post("/api/sftp/destinations", status_code=201)
def create_destination(body: SFTPDestBody):
if body.auth_method not in ("password", "key"):
raise HTTPException(400, "auth_method must be 'password' or 'key'")
if body.auth_method == "password" and not body.password:
raise HTTPException(400, "password required for password auth")
if body.auth_method == "key" and not body.private_key:
raise HTTPException(400, "private_key required for key auth")
con = get_db()
cur = con.cursor()
try:
cur.execute("""
INSERT INTO sftp_destinations
(name, host, port, username, auth_method, base_path, mirror_structure)
VALUES (?, ?, ?, ?, ?, ?, ?)
""", (body.name, body.host, body.port, body.username,
body.auth_method, body.base_path, 1 if body.mirror_structure else 0))
dest_id = cur.lastrowid
con.commit()
except sqlite3.IntegrityError:
con.close()
raise HTTPException(409, f"Destination name already in use: {body.name}")
if body.auth_method == "password":
sftp_mod.write_password(dest_id, body.password)
else:
sftp_mod.write_private_key(dest_id, body.private_key)
cur.execute("SELECT * FROM sftp_destinations WHERE id=?", (dest_id,))
out = _dest_row_to_dict(cur.fetchone())
con.close()
return out
@app.put("/api/sftp/destinations/{dest_id}")
def update_destination(dest_id: int, body: SFTPDestBody):
con = get_db()
cur = con.cursor()
cur.execute("SELECT * FROM sftp_destinations WHERE id=?", (dest_id,))
row = cur.fetchone()
if not row:
con.close()
raise HTTPException(404, "Destination not found")
cur.execute("""
UPDATE sftp_destinations
SET name=?, host=?, port=?, username=?, auth_method=?,
base_path=?, mirror_structure=?
WHERE id=?
""", (body.name, body.host, body.port, body.username,
body.auth_method, body.base_path,
1 if body.mirror_structure else 0, dest_id))
# If auth method changed, drop old creds
if row["auth_method"] != body.auth_method:
sftp_mod.delete_credentials(dest_id)
if body.auth_method == "password" and body.password:
sftp_mod.write_password(dest_id, body.password)
elif body.auth_method == "key" and body.private_key:
sftp_mod.write_private_key(dest_id, body.private_key)
con.commit()
cur.execute("SELECT * FROM sftp_destinations WHERE id=?", (dest_id,))
out = _dest_row_to_dict(cur.fetchone())
con.close()
return out
@app.delete("/api/sftp/destinations/{dest_id}", status_code=204)
def delete_destination(dest_id: int):
con = get_db()
cur = con.cursor()
cur.execute("DELETE FROM sftp_destinations WHERE id=?", (dest_id,))
if cur.rowcount == 0:
con.close()
raise HTTPException(404, "Destination not found")
con.commit()
con.close()
sftp_mod.delete_credentials(dest_id)
return Response(status_code=204)
@app.post("/api/sftp/destinations/{dest_id}/test")
def test_destination(dest_id: int):
con = get_db()
cur = con.cursor()
cur.execute("SELECT * FROM sftp_destinations WHERE id=?", (dest_id,))
row = cur.fetchone()
if not row:
con.close()
raise HTTPException(404, "Destination not found")
dest = _dest_row_to_dict(row)
if not dest["has_credentials"]:
con.close()
raise HTTPException(400, "No credentials stored for this destination")
ok, message = sftp_mod.test_connection(dest)
cur.execute("""
UPDATE sftp_destinations
SET last_tested_at=CURRENT_TIMESTAMP, last_test_result=?
WHERE id=?
""", ("ok" if ok else message, dest_id))
con.commit()
cur.execute("SELECT * FROM sftp_destinations WHERE id=?", (dest_id,))
out = _dest_row_to_dict(cur.fetchone())
con.close()
return {"ok": ok, "message": message, "destination": out}
@app.post("/api/sftp/keypair")
def generate_keypair():
"""Generate a fresh ED25519 keypair. Returns the private + public halves;
the caller is expected to paste the private key into a destination's
private_key field on create/update."""
private_pem, public_openssh, fingerprint = sftp_mod.generate_keypair()
return {
"private_key": private_pem,
"public_key": public_openssh,
"fingerprint": fingerprint,
}

View File

@@ -139,6 +139,21 @@ def init_db():
suggested INTEGER DEFAULT 0
);
CREATE TABLE IF NOT EXISTS sftp_destinations (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT UNIQUE NOT NULL,
host TEXT NOT NULL,
port INTEGER NOT NULL DEFAULT 22,
username TEXT NOT NULL,
auth_method TEXT NOT NULL, -- 'password' | 'key'
base_path TEXT NOT NULL,
mirror_structure INTEGER NOT NULL DEFAULT 1,
enabled INTEGER NOT NULL DEFAULT 1,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
last_tested_at TIMESTAMP,
last_test_result TEXT
);
CREATE TABLE IF NOT EXISTS decisions (
id INTEGER PRIMARY KEY AUTOINCREMENT,
file_id INTEGER NOT NULL,

200
app/sftp.py Normal file
View File

@@ -0,0 +1,200 @@
"""
SFTP destination management — connection helpers and credential storage.
Credentials live at /data/sftp/{id}.password (mode 600) or /data/sftp/{id}.key
(also mode 600). Public host keys are pinned at /data/sftp/{id}.host_keys after
the first successful connection (TOFU); subsequent connections fail loudly if
the host key changes.
"""
import io
import os
import stat
import errno
from contextlib import contextmanager
from typing import Optional
import paramiko
CRED_DIR = "/data/sftp"
# ── Credential storage ───────────────────────────────────────────────────────
def _ensure_cred_dir() -> None:
os.makedirs(CRED_DIR, mode=0o700, exist_ok=True)
def _password_path(dest_id: int) -> str:
return os.path.join(CRED_DIR, f"{dest_id}.password")
def _key_path(dest_id: int) -> str:
return os.path.join(CRED_DIR, f"{dest_id}.key")
def _host_keys_path(dest_id: int) -> str:
return os.path.join(CRED_DIR, f"{dest_id}.host_keys")
def write_password(dest_id: int, password: str) -> None:
_ensure_cred_dir()
p = _password_path(dest_id)
with open(p, "w") as f:
f.write(password)
os.chmod(p, 0o600)
def write_private_key(dest_id: int, key_text: str) -> None:
_ensure_cred_dir()
p = _key_path(dest_id)
with open(p, "w") as f:
f.write(key_text if key_text.endswith("\n") else key_text + "\n")
os.chmod(p, 0o600)
def delete_credentials(dest_id: int) -> None:
"""Best-effort cleanup of all stored secrets for a destination."""
for p in (_password_path(dest_id), _key_path(dest_id), _host_keys_path(dest_id)):
try:
if os.path.exists(p):
os.unlink(p)
except Exception:
pass
def has_credentials(dest_id: int, auth_method: str) -> bool:
if auth_method == "password":
return os.path.isfile(_password_path(dest_id))
if auth_method == "key":
return os.path.isfile(_key_path(dest_id))
return False
# ── Keypair generation ──────────────────────────────────────────────────────
def generate_keypair() -> tuple[str, str, str]:
"""Generate an ED25519 keypair. Returns (private_pem, public_openssh, fingerprint)."""
key = paramiko.Ed25519Key.generate()
priv_buf = io.StringIO()
key.write_private_key(priv_buf)
private_pem = priv_buf.getvalue()
public_openssh = f"{key.get_name()} {key.get_base64()} dupfinder@miaai"
fingerprint = key.fingerprint # SHA-256:base64
return private_pem, public_openssh, fingerprint
# ── Connection ──────────────────────────────────────────────────────────────
@contextmanager
def open_sftp(dest: dict, timeout: int = 15):
"""Open an SFTP session against the given destination dict.
`dest` must contain: id, host, port, username, auth_method.
Yields a paramiko.SFTPClient. Raises on any failure.
"""
client = paramiko.SSHClient()
# Pin host key on first success (TOFU). Reject on mismatch afterwards.
hk_path = _host_keys_path(dest["id"])
if os.path.isfile(hk_path):
client.load_host_keys(hk_path)
client.set_missing_host_key_policy(paramiko.RejectPolicy())
else:
# First connection — accept and persist
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
auth_kwargs = {}
if dest["auth_method"] == "password":
with open(_password_path(dest["id"])) as f:
auth_kwargs["password"] = f.read()
auth_kwargs["look_for_keys"] = False
auth_kwargs["allow_agent"] = False
elif dest["auth_method"] == "key":
try:
pkey = paramiko.Ed25519Key.from_private_key_file(_key_path(dest["id"]))
except paramiko.SSHException:
# Try RSA as fallback for user-pasted keys
pkey = paramiko.RSAKey.from_private_key_file(_key_path(dest["id"]))
auth_kwargs["pkey"] = pkey
auth_kwargs["look_for_keys"] = False
auth_kwargs["allow_agent"] = False
else:
raise ValueError(f"Unknown auth_method: {dest['auth_method']}")
try:
client.connect(
hostname=dest["host"],
port=int(dest.get("port") or 22),
username=dest["username"],
timeout=timeout,
banner_timeout=timeout,
auth_timeout=timeout,
**auth_kwargs,
)
# Persist host key after first successful connect
if not os.path.isfile(hk_path):
_ensure_cred_dir()
client.save_host_keys(hk_path)
sftp = client.open_sftp()
try:
yield sftp
finally:
try:
sftp.close()
except Exception:
pass
finally:
try:
client.close()
except Exception:
pass
def test_connection(dest: dict) -> tuple[bool, str]:
"""Try to connect, chdir to base_path, list it. Returns (ok, message)."""
try:
with open_sftp(dest) as sftp:
try:
sftp.stat(dest["base_path"])
except FileNotFoundError:
return False, f"Base path does not exist: {dest['base_path']}"
except IOError as e:
if e.errno == errno.EACCES:
return False, f"No permission to access {dest['base_path']}"
raise
# Quick write probe — try to mkdir a temp dir, then remove it
probe = f"{dest['base_path'].rstrip('/')}/.dupfinder_probe"
try:
sftp.mkdir(probe)
sftp.rmdir(probe)
except IOError:
return False, f"Connected, but {dest['base_path']} is not writable"
return True, "ok"
except paramiko.AuthenticationException:
return False, "Authentication failed"
except paramiko.BadHostKeyException as e:
return False, f"Host key mismatch (possible MITM): {e}"
except paramiko.SSHException as e:
return False, f"SSH error: {e}"
except (TimeoutError, ConnectionError, OSError) as e:
return False, f"Connection failed: {e}"
except Exception as e:
return False, f"Unexpected error: {e}"
# ── Path helpers ────────────────────────────────────────────────────────────
def remote_path_for(source_path: str, dest: dict, photos_root: str = "/photos") -> str:
"""Compute the remote destination path for a given source file.
If mirror_structure is true, preserves the path under photos_root.
Otherwise, lands flat in base_path with the source basename.
"""
base = dest["base_path"].rstrip("/")
if dest.get("mirror_structure", 1):
rel = os.path.relpath(source_path, photos_root)
# On Windows os.path.relpath uses backslashes; force forward
rel = rel.replace("\\", "/")
return f"{base}/{rel}"
return f"{base}/{os.path.basename(source_path)}"

2
debian/build-deb.sh vendored
View File

@@ -13,7 +13,7 @@ BUILD_DIR="$REPO_ROOT/build/deb"
# ── Config ────────────────────────────────────────────────────────────────────
PKG_NAME="dupfinder"
PKG_VERSION="1.0.11"
PKG_VERSION="1.1.0"
PKG_ARCH="amd64"
DEB_FILE="${PKG_NAME}_${PKG_VERSION}_${PKG_ARCH}.deb"

View File

@@ -10,3 +10,4 @@ pillow-heif==0.21.0
jinja2==3.1.4
aiofiles==24.1.0
numpy==1.26.4
paramiko==3.5.0