diff --git a/app/main.py b/app/main.py index a41cd95..7b72049 100644 --- a/app/main.py +++ b/app/main.py @@ -20,6 +20,7 @@ from fastapi.templating import Jinja2Templates from pydantic import BaseModel import scanner as sc +import sftp as sftp_mod app = FastAPI(title="Duplicate Finder") @@ -882,3 +883,171 @@ def export_csv(): media_type="text/csv", headers={"Content-Disposition": "attachment; filename=dup-finder-export.csv"}, ) + + +# ── SFTP destinations ──────────────────────────────────────────────────────── + +class SFTPDestBody(BaseModel): + name: str + host: str + port: int = 22 + username: str + auth_method: str # 'password' | 'key' + base_path: str + mirror_structure: bool = True + # Either password (for password auth) or private_key (for key auth). + # Optional on update — omit to leave existing credential untouched. + password: Optional[str] = None + private_key: Optional[str] = None + + +def _dest_row_to_dict(row) -> dict: + return { + "id": row["id"], + "name": row["name"], + "host": row["host"], + "port": row["port"], + "username": row["username"], + "auth_method": row["auth_method"], + "base_path": row["base_path"], + "mirror_structure": bool(row["mirror_structure"]), + "enabled": bool(row["enabled"]), + "created_at": row["created_at"], + "last_tested_at": row["last_tested_at"], + "last_test_result": row["last_test_result"], + "has_credentials": sftp_mod.has_credentials(row["id"], row["auth_method"]), + } + + +@app.get("/api/sftp/destinations") +def list_destinations(): + con = get_db() + cur = con.cursor() + cur.execute("SELECT * FROM sftp_destinations ORDER BY name") + out = [_dest_row_to_dict(r) for r in cur.fetchall()] + con.close() + return out + + +@app.post("/api/sftp/destinations", status_code=201) +def create_destination(body: SFTPDestBody): + if body.auth_method not in ("password", "key"): + raise HTTPException(400, "auth_method must be 'password' or 'key'") + if body.auth_method == "password" and not body.password: + raise HTTPException(400, "password required for password auth") + if body.auth_method == "key" and not body.private_key: + raise HTTPException(400, "private_key required for key auth") + + con = get_db() + cur = con.cursor() + try: + cur.execute(""" + INSERT INTO sftp_destinations + (name, host, port, username, auth_method, base_path, mirror_structure) + VALUES (?, ?, ?, ?, ?, ?, ?) + """, (body.name, body.host, body.port, body.username, + body.auth_method, body.base_path, 1 if body.mirror_structure else 0)) + dest_id = cur.lastrowid + con.commit() + except sqlite3.IntegrityError: + con.close() + raise HTTPException(409, f"Destination name already in use: {body.name}") + + if body.auth_method == "password": + sftp_mod.write_password(dest_id, body.password) + else: + sftp_mod.write_private_key(dest_id, body.private_key) + + cur.execute("SELECT * FROM sftp_destinations WHERE id=?", (dest_id,)) + out = _dest_row_to_dict(cur.fetchone()) + con.close() + return out + + +@app.put("/api/sftp/destinations/{dest_id}") +def update_destination(dest_id: int, body: SFTPDestBody): + con = get_db() + cur = con.cursor() + cur.execute("SELECT * FROM sftp_destinations WHERE id=?", (dest_id,)) + row = cur.fetchone() + if not row: + con.close() + raise HTTPException(404, "Destination not found") + + cur.execute(""" + UPDATE sftp_destinations + SET name=?, host=?, port=?, username=?, auth_method=?, + base_path=?, mirror_structure=? + WHERE id=? + """, (body.name, body.host, body.port, body.username, + body.auth_method, body.base_path, + 1 if body.mirror_structure else 0, dest_id)) + + # If auth method changed, drop old creds + if row["auth_method"] != body.auth_method: + sftp_mod.delete_credentials(dest_id) + + if body.auth_method == "password" and body.password: + sftp_mod.write_password(dest_id, body.password) + elif body.auth_method == "key" and body.private_key: + sftp_mod.write_private_key(dest_id, body.private_key) + + con.commit() + cur.execute("SELECT * FROM sftp_destinations WHERE id=?", (dest_id,)) + out = _dest_row_to_dict(cur.fetchone()) + con.close() + return out + + +@app.delete("/api/sftp/destinations/{dest_id}", status_code=204) +def delete_destination(dest_id: int): + con = get_db() + cur = con.cursor() + cur.execute("DELETE FROM sftp_destinations WHERE id=?", (dest_id,)) + if cur.rowcount == 0: + con.close() + raise HTTPException(404, "Destination not found") + con.commit() + con.close() + sftp_mod.delete_credentials(dest_id) + return Response(status_code=204) + + +@app.post("/api/sftp/destinations/{dest_id}/test") +def test_destination(dest_id: int): + con = get_db() + cur = con.cursor() + cur.execute("SELECT * FROM sftp_destinations WHERE id=?", (dest_id,)) + row = cur.fetchone() + if not row: + con.close() + raise HTTPException(404, "Destination not found") + dest = _dest_row_to_dict(row) + if not dest["has_credentials"]: + con.close() + raise HTTPException(400, "No credentials stored for this destination") + + ok, message = sftp_mod.test_connection(dest) + cur.execute(""" + UPDATE sftp_destinations + SET last_tested_at=CURRENT_TIMESTAMP, last_test_result=? + WHERE id=? + """, ("ok" if ok else message, dest_id)) + con.commit() + cur.execute("SELECT * FROM sftp_destinations WHERE id=?", (dest_id,)) + out = _dest_row_to_dict(cur.fetchone()) + con.close() + return {"ok": ok, "message": message, "destination": out} + + +@app.post("/api/sftp/keypair") +def generate_keypair(): + """Generate a fresh ED25519 keypair. Returns the private + public halves; + the caller is expected to paste the private key into a destination's + private_key field on create/update.""" + private_pem, public_openssh, fingerprint = sftp_mod.generate_keypair() + return { + "private_key": private_pem, + "public_key": public_openssh, + "fingerprint": fingerprint, + } diff --git a/app/scanner.py b/app/scanner.py index 24f7d9a..1f67125 100644 --- a/app/scanner.py +++ b/app/scanner.py @@ -139,6 +139,21 @@ def init_db(): suggested INTEGER DEFAULT 0 ); + CREATE TABLE IF NOT EXISTS sftp_destinations ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT UNIQUE NOT NULL, + host TEXT NOT NULL, + port INTEGER NOT NULL DEFAULT 22, + username TEXT NOT NULL, + auth_method TEXT NOT NULL, -- 'password' | 'key' + base_path TEXT NOT NULL, + mirror_structure INTEGER NOT NULL DEFAULT 1, + enabled INTEGER NOT NULL DEFAULT 1, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + last_tested_at TIMESTAMP, + last_test_result TEXT + ); + CREATE TABLE IF NOT EXISTS decisions ( id INTEGER PRIMARY KEY AUTOINCREMENT, file_id INTEGER NOT NULL, diff --git a/app/sftp.py b/app/sftp.py new file mode 100644 index 0000000..a16294d --- /dev/null +++ b/app/sftp.py @@ -0,0 +1,200 @@ +""" +SFTP destination management — connection helpers and credential storage. + +Credentials live at /data/sftp/{id}.password (mode 600) or /data/sftp/{id}.key +(also mode 600). Public host keys are pinned at /data/sftp/{id}.host_keys after +the first successful connection (TOFU); subsequent connections fail loudly if +the host key changes. +""" + +import io +import os +import stat +import errno +from contextlib import contextmanager +from typing import Optional + +import paramiko + +CRED_DIR = "/data/sftp" + + +# ── Credential storage ─────────────────────────────────────────────────────── + +def _ensure_cred_dir() -> None: + os.makedirs(CRED_DIR, mode=0o700, exist_ok=True) + + +def _password_path(dest_id: int) -> str: + return os.path.join(CRED_DIR, f"{dest_id}.password") + + +def _key_path(dest_id: int) -> str: + return os.path.join(CRED_DIR, f"{dest_id}.key") + + +def _host_keys_path(dest_id: int) -> str: + return os.path.join(CRED_DIR, f"{dest_id}.host_keys") + + +def write_password(dest_id: int, password: str) -> None: + _ensure_cred_dir() + p = _password_path(dest_id) + with open(p, "w") as f: + f.write(password) + os.chmod(p, 0o600) + + +def write_private_key(dest_id: int, key_text: str) -> None: + _ensure_cred_dir() + p = _key_path(dest_id) + with open(p, "w") as f: + f.write(key_text if key_text.endswith("\n") else key_text + "\n") + os.chmod(p, 0o600) + + +def delete_credentials(dest_id: int) -> None: + """Best-effort cleanup of all stored secrets for a destination.""" + for p in (_password_path(dest_id), _key_path(dest_id), _host_keys_path(dest_id)): + try: + if os.path.exists(p): + os.unlink(p) + except Exception: + pass + + +def has_credentials(dest_id: int, auth_method: str) -> bool: + if auth_method == "password": + return os.path.isfile(_password_path(dest_id)) + if auth_method == "key": + return os.path.isfile(_key_path(dest_id)) + return False + + +# ── Keypair generation ────────────────────────────────────────────────────── + +def generate_keypair() -> tuple[str, str, str]: + """Generate an ED25519 keypair. Returns (private_pem, public_openssh, fingerprint).""" + key = paramiko.Ed25519Key.generate() + priv_buf = io.StringIO() + key.write_private_key(priv_buf) + private_pem = priv_buf.getvalue() + public_openssh = f"{key.get_name()} {key.get_base64()} dupfinder@miaai" + fingerprint = key.fingerprint # SHA-256:base64 + return private_pem, public_openssh, fingerprint + + +# ── Connection ────────────────────────────────────────────────────────────── + +@contextmanager +def open_sftp(dest: dict, timeout: int = 15): + """Open an SFTP session against the given destination dict. + + `dest` must contain: id, host, port, username, auth_method. + Yields a paramiko.SFTPClient. Raises on any failure. + """ + client = paramiko.SSHClient() + + # Pin host key on first success (TOFU). Reject on mismatch afterwards. + hk_path = _host_keys_path(dest["id"]) + if os.path.isfile(hk_path): + client.load_host_keys(hk_path) + client.set_missing_host_key_policy(paramiko.RejectPolicy()) + else: + # First connection — accept and persist + client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + + auth_kwargs = {} + if dest["auth_method"] == "password": + with open(_password_path(dest["id"])) as f: + auth_kwargs["password"] = f.read() + auth_kwargs["look_for_keys"] = False + auth_kwargs["allow_agent"] = False + elif dest["auth_method"] == "key": + try: + pkey = paramiko.Ed25519Key.from_private_key_file(_key_path(dest["id"])) + except paramiko.SSHException: + # Try RSA as fallback for user-pasted keys + pkey = paramiko.RSAKey.from_private_key_file(_key_path(dest["id"])) + auth_kwargs["pkey"] = pkey + auth_kwargs["look_for_keys"] = False + auth_kwargs["allow_agent"] = False + else: + raise ValueError(f"Unknown auth_method: {dest['auth_method']}") + + try: + client.connect( + hostname=dest["host"], + port=int(dest.get("port") or 22), + username=dest["username"], + timeout=timeout, + banner_timeout=timeout, + auth_timeout=timeout, + **auth_kwargs, + ) + # Persist host key after first successful connect + if not os.path.isfile(hk_path): + _ensure_cred_dir() + client.save_host_keys(hk_path) + sftp = client.open_sftp() + try: + yield sftp + finally: + try: + sftp.close() + except Exception: + pass + finally: + try: + client.close() + except Exception: + pass + + +def test_connection(dest: dict) -> tuple[bool, str]: + """Try to connect, chdir to base_path, list it. Returns (ok, message).""" + try: + with open_sftp(dest) as sftp: + try: + sftp.stat(dest["base_path"]) + except FileNotFoundError: + return False, f"Base path does not exist: {dest['base_path']}" + except IOError as e: + if e.errno == errno.EACCES: + return False, f"No permission to access {dest['base_path']}" + raise + # Quick write probe — try to mkdir a temp dir, then remove it + probe = f"{dest['base_path'].rstrip('/')}/.dupfinder_probe" + try: + sftp.mkdir(probe) + sftp.rmdir(probe) + except IOError: + return False, f"Connected, but {dest['base_path']} is not writable" + return True, "ok" + except paramiko.AuthenticationException: + return False, "Authentication failed" + except paramiko.BadHostKeyException as e: + return False, f"Host key mismatch (possible MITM): {e}" + except paramiko.SSHException as e: + return False, f"SSH error: {e}" + except (TimeoutError, ConnectionError, OSError) as e: + return False, f"Connection failed: {e}" + except Exception as e: + return False, f"Unexpected error: {e}" + + +# ── Path helpers ──────────────────────────────────────────────────────────── + +def remote_path_for(source_path: str, dest: dict, photos_root: str = "/photos") -> str: + """Compute the remote destination path for a given source file. + + If mirror_structure is true, preserves the path under photos_root. + Otherwise, lands flat in base_path with the source basename. + """ + base = dest["base_path"].rstrip("/") + if dest.get("mirror_structure", 1): + rel = os.path.relpath(source_path, photos_root) + # On Windows os.path.relpath uses backslashes; force forward + rel = rel.replace("\\", "/") + return f"{base}/{rel}" + return f"{base}/{os.path.basename(source_path)}" diff --git a/debian/build-deb.sh b/debian/build-deb.sh index 3c739a6..b9e534f 100644 --- a/debian/build-deb.sh +++ b/debian/build-deb.sh @@ -13,7 +13,7 @@ BUILD_DIR="$REPO_ROOT/build/deb" # ── Config ──────────────────────────────────────────────────────────────────── PKG_NAME="dupfinder" -PKG_VERSION="1.0.11" +PKG_VERSION="1.1.0" PKG_ARCH="amd64" DEB_FILE="${PKG_NAME}_${PKG_VERSION}_${PKG_ARCH}.deb" diff --git a/requirements.txt b/requirements.txt index 6c151d3..598b2dc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,4 @@ pillow-heif==0.21.0 jinja2==3.1.4 aiofiles==24.1.0 numpy==1.26.4 +paramiko==3.5.0