Files
duplicate-finder/app/sftp.py
Carlos 7436b23db3 Stage 2 #1: SFTP destinations CRUD + connection test
Foundation for the move/quarantine pipeline. Lets users register one or
more remote SFTP destinations through the API, store credentials at rest
under /data/sftp/{id}.{password|key} (mode 600), and verify connectivity
+ write access via a test endpoint.

Endpoints:
  GET    /api/sftp/destinations
  POST   /api/sftp/destinations             — create
  PUT    /api/sftp/destinations/{id}        — update
  DELETE /api/sftp/destinations/{id}
  POST   /api/sftp/destinations/{id}/test   — connect, stat base_path, mkdir probe
  POST   /api/sftp/keypair                  — generate ED25519 keypair

Host keys pinned per-destination on first connect (TOFU); subsequent
mismatches are rejected. paramiko added to requirements.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-26 20:04:42 -04:00

201 lines
7.2 KiB
Python

"""
SFTP destination management — connection helpers and credential storage.
Credentials live at /data/sftp/{id}.password (mode 600) or /data/sftp/{id}.key
(also mode 600). Public host keys are pinned at /data/sftp/{id}.host_keys after
the first successful connection (TOFU); subsequent connections fail loudly if
the host key changes.
"""
import io
import os
import stat
import errno
from contextlib import contextmanager
from typing import Optional
import paramiko
CRED_DIR = "/data/sftp"
# ── Credential storage ───────────────────────────────────────────────────────
def _ensure_cred_dir() -> None:
os.makedirs(CRED_DIR, mode=0o700, exist_ok=True)
def _password_path(dest_id: int) -> str:
return os.path.join(CRED_DIR, f"{dest_id}.password")
def _key_path(dest_id: int) -> str:
return os.path.join(CRED_DIR, f"{dest_id}.key")
def _host_keys_path(dest_id: int) -> str:
return os.path.join(CRED_DIR, f"{dest_id}.host_keys")
def write_password(dest_id: int, password: str) -> None:
_ensure_cred_dir()
p = _password_path(dest_id)
with open(p, "w") as f:
f.write(password)
os.chmod(p, 0o600)
def write_private_key(dest_id: int, key_text: str) -> None:
_ensure_cred_dir()
p = _key_path(dest_id)
with open(p, "w") as f:
f.write(key_text if key_text.endswith("\n") else key_text + "\n")
os.chmod(p, 0o600)
def delete_credentials(dest_id: int) -> None:
"""Best-effort cleanup of all stored secrets for a destination."""
for p in (_password_path(dest_id), _key_path(dest_id), _host_keys_path(dest_id)):
try:
if os.path.exists(p):
os.unlink(p)
except Exception:
pass
def has_credentials(dest_id: int, auth_method: str) -> bool:
if auth_method == "password":
return os.path.isfile(_password_path(dest_id))
if auth_method == "key":
return os.path.isfile(_key_path(dest_id))
return False
# ── Keypair generation ──────────────────────────────────────────────────────
def generate_keypair() -> tuple[str, str, str]:
"""Generate an ED25519 keypair. Returns (private_pem, public_openssh, fingerprint)."""
key = paramiko.Ed25519Key.generate()
priv_buf = io.StringIO()
key.write_private_key(priv_buf)
private_pem = priv_buf.getvalue()
public_openssh = f"{key.get_name()} {key.get_base64()} dupfinder@miaai"
fingerprint = key.fingerprint # SHA-256:base64
return private_pem, public_openssh, fingerprint
# ── Connection ──────────────────────────────────────────────────────────────
@contextmanager
def open_sftp(dest: dict, timeout: int = 15):
"""Open an SFTP session against the given destination dict.
`dest` must contain: id, host, port, username, auth_method.
Yields a paramiko.SFTPClient. Raises on any failure.
"""
client = paramiko.SSHClient()
# Pin host key on first success (TOFU). Reject on mismatch afterwards.
hk_path = _host_keys_path(dest["id"])
if os.path.isfile(hk_path):
client.load_host_keys(hk_path)
client.set_missing_host_key_policy(paramiko.RejectPolicy())
else:
# First connection — accept and persist
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
auth_kwargs = {}
if dest["auth_method"] == "password":
with open(_password_path(dest["id"])) as f:
auth_kwargs["password"] = f.read()
auth_kwargs["look_for_keys"] = False
auth_kwargs["allow_agent"] = False
elif dest["auth_method"] == "key":
try:
pkey = paramiko.Ed25519Key.from_private_key_file(_key_path(dest["id"]))
except paramiko.SSHException:
# Try RSA as fallback for user-pasted keys
pkey = paramiko.RSAKey.from_private_key_file(_key_path(dest["id"]))
auth_kwargs["pkey"] = pkey
auth_kwargs["look_for_keys"] = False
auth_kwargs["allow_agent"] = False
else:
raise ValueError(f"Unknown auth_method: {dest['auth_method']}")
try:
client.connect(
hostname=dest["host"],
port=int(dest.get("port") or 22),
username=dest["username"],
timeout=timeout,
banner_timeout=timeout,
auth_timeout=timeout,
**auth_kwargs,
)
# Persist host key after first successful connect
if not os.path.isfile(hk_path):
_ensure_cred_dir()
client.save_host_keys(hk_path)
sftp = client.open_sftp()
try:
yield sftp
finally:
try:
sftp.close()
except Exception:
pass
finally:
try:
client.close()
except Exception:
pass
def test_connection(dest: dict) -> tuple[bool, str]:
"""Try to connect, chdir to base_path, list it. Returns (ok, message)."""
try:
with open_sftp(dest) as sftp:
try:
sftp.stat(dest["base_path"])
except FileNotFoundError:
return False, f"Base path does not exist: {dest['base_path']}"
except IOError as e:
if e.errno == errno.EACCES:
return False, f"No permission to access {dest['base_path']}"
raise
# Quick write probe — try to mkdir a temp dir, then remove it
probe = f"{dest['base_path'].rstrip('/')}/.dupfinder_probe"
try:
sftp.mkdir(probe)
sftp.rmdir(probe)
except IOError:
return False, f"Connected, but {dest['base_path']} is not writable"
return True, "ok"
except paramiko.AuthenticationException:
return False, "Authentication failed"
except paramiko.BadHostKeyException as e:
return False, f"Host key mismatch (possible MITM): {e}"
except paramiko.SSHException as e:
return False, f"SSH error: {e}"
except (TimeoutError, ConnectionError, OSError) as e:
return False, f"Connection failed: {e}"
except Exception as e:
return False, f"Unexpected error: {e}"
# ── Path helpers ────────────────────────────────────────────────────────────
def remote_path_for(source_path: str, dest: dict, photos_root: str = "/photos") -> str:
"""Compute the remote destination path for a given source file.
If mirror_structure is true, preserves the path under photos_root.
Otherwise, lands flat in base_path with the source basename.
"""
base = dest["base_path"].rstrip("/")
if dest.get("mirror_structure", 1):
rel = os.path.relpath(source_path, photos_root)
# On Windows os.path.relpath uses backslashes; force forward
rel = rel.replace("\\", "/")
return f"{base}/{rel}"
return f"{base}/{os.path.basename(source_path)}"