diff --git a/app/main.py b/app/main.py index d7ff8f4..6046c18 100644 --- a/app/main.py +++ b/app/main.py @@ -411,6 +411,7 @@ def decide(group_id: int, body: DecideBody): ) status = "keeper" if is_k else "redundant" cur.execute("UPDATE files SET status=? WHERE id=?", (status, fid)) + sc.log_decision(cur, fid, group_id, status, "manual") cur.execute("UPDATE duplicate_groups SET reviewed=1 WHERE id=?", (group_id,)) con.commit() @@ -425,6 +426,9 @@ def skip_group(group_id: int): cur.execute("SELECT id FROM duplicate_groups WHERE id=?", (group_id,)) if not cur.fetchone(): raise HTTPException(404, "Group not found") + cur.execute("SELECT file_id FROM duplicate_members WHERE group_id=?", (group_id,)) + for r in cur.fetchall(): + sc.log_decision(cur, r["file_id"], group_id, "skip", "manual") cur.execute("UPDATE duplicate_groups SET reviewed=1 WHERE id=?", (group_id,)) con.commit() con.close() @@ -445,6 +449,7 @@ def keep_all(group_id: int): (group_id, r["file_id"]), ) cur.execute("UPDATE files SET status='keeper' WHERE id=?", (r["file_id"],)) + sc.log_decision(cur, r["file_id"], group_id, "keeper", "keep-all") cur.execute("UPDATE duplicate_groups SET reviewed=1 WHERE id=?", (group_id,)) con.commit() con.close() @@ -465,6 +470,7 @@ def unreview_group(group_id: int): (group_id, r["file_id"]), ) cur.execute("UPDATE files SET status='pending' WHERE id=?", (r["file_id"],)) + sc.log_decision(cur, r["file_id"], group_id, "unreview", "manual") cur.execute("UPDATE duplicate_groups SET reviewed=0 WHERE id=?", (group_id,)) con.commit() con.close() @@ -504,6 +510,11 @@ def auto_resolve_exact(): "UPDATE files SET status=? WHERE id=?", ("keeper" if is_k else "redundant", m["id"]), ) + sc.log_decision( + cur, m["id"], gid, + "keeper" if is_k else "redundant", + "auto-resolve-exact", + ) cur.execute("UPDATE duplicate_groups SET reviewed=1 WHERE id=?", (gid,)) resolved += 1 diff --git a/app/scanner.py b/app/scanner.py index 3e57798..2ca4841 100644 --- a/app/scanner.py +++ b/app/scanner.py @@ -68,6 +68,22 @@ def get_db() -> sqlite3.Connection: return con +def log_decision(cur, file_id: int, group_id: int | None, action: str, reason: str): + """Append a row to the decisions audit log. + + Captures the file's sha256 at decision time so a future move/delete tool + can detect when a file has changed since the user reviewed it. + """ + cur.execute("SELECT sha256 FROM files WHERE id=?", (file_id,)) + row = cur.fetchone() + sha = row["sha256"] if row else None + cur.execute( + "INSERT INTO decisions (file_id, group_id, action, reason, sha256_at_decision) " + "VALUES (?, ?, ?, ?, ?)", + (file_id, group_id, action, reason, sha), + ) + + def init_db(): con = get_db() cur = con.cursor() @@ -122,12 +138,27 @@ def init_db(): suggested INTEGER DEFAULT 0 ); - CREATE INDEX IF NOT EXISTS idx_sha256 ON files(sha256); - CREATE INDEX IF NOT EXISTS idx_phash ON files(phash); - CREATE INDEX IF NOT EXISTS idx_exif_dt ON files(exif_datetime, exif_device); - CREATE INDEX IF NOT EXISTS idx_size_dim ON files(file_size, width, height); - CREATE INDEX IF NOT EXISTS idx_status ON files(status); + CREATE TABLE IF NOT EXISTS decisions ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + file_id INTEGER NOT NULL, + group_id INTEGER, + action TEXT NOT NULL, + reason TEXT, + sha256_at_decision TEXT, + decided_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE, + FOREIGN KEY (group_id) REFERENCES duplicate_groups(id) ON DELETE SET NULL + ); + + CREATE INDEX IF NOT EXISTS idx_sha256 ON files(sha256); + CREATE INDEX IF NOT EXISTS idx_phash ON files(phash); + CREATE INDEX IF NOT EXISTS idx_exif_dt ON files(exif_datetime, exif_device); + CREATE INDEX IF NOT EXISTS idx_size_dim ON files(file_size, width, height); + CREATE INDEX IF NOT EXISTS idx_status ON files(status); + CREATE INDEX IF NOT EXISTS idx_decisions_file ON decisions(file_id); + CREATE INDEX IF NOT EXISTS idx_decisions_group ON decisions(group_id); """) + # Migration: add new columns to scans if upgrading from older schema for col, defn in [ ("files_indexed", "INTEGER DEFAULT 0"), @@ -944,6 +975,11 @@ def run_scan(folder_path: str, scan_id: int, mode: str = "incremental"): "UPDATE files SET status=? WHERE id=?", ("keeper" if is_k else "redundant", fid), ) + log_decision( + cur, fid, gid, + "keeper" if is_k else "redundant", + "rescan-restore", + ) restored += 1 con.commit() scan_state["message"] = f"Restored {restored:,} prior review decisions"