Add decisions audit log for future move/delete tool

Captures every review action (keeper, redundant, skip, unreview, auto-resolve,
rescan-restore) with sha256 at decision time so a downstream tool can detect
stale decisions before touching disk.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Carlos
2026-04-24 01:40:54 -04:00
parent 79ab0dbb05
commit 6a4134762c
2 changed files with 52 additions and 5 deletions

View File

@@ -411,6 +411,7 @@ def decide(group_id: int, body: DecideBody):
)
status = "keeper" if is_k else "redundant"
cur.execute("UPDATE files SET status=? WHERE id=?", (status, fid))
sc.log_decision(cur, fid, group_id, status, "manual")
cur.execute("UPDATE duplicate_groups SET reviewed=1 WHERE id=?", (group_id,))
con.commit()
@@ -425,6 +426,9 @@ def skip_group(group_id: int):
cur.execute("SELECT id FROM duplicate_groups WHERE id=?", (group_id,))
if not cur.fetchone():
raise HTTPException(404, "Group not found")
cur.execute("SELECT file_id FROM duplicate_members WHERE group_id=?", (group_id,))
for r in cur.fetchall():
sc.log_decision(cur, r["file_id"], group_id, "skip", "manual")
cur.execute("UPDATE duplicate_groups SET reviewed=1 WHERE id=?", (group_id,))
con.commit()
con.close()
@@ -445,6 +449,7 @@ def keep_all(group_id: int):
(group_id, r["file_id"]),
)
cur.execute("UPDATE files SET status='keeper' WHERE id=?", (r["file_id"],))
sc.log_decision(cur, r["file_id"], group_id, "keeper", "keep-all")
cur.execute("UPDATE duplicate_groups SET reviewed=1 WHERE id=?", (group_id,))
con.commit()
con.close()
@@ -465,6 +470,7 @@ def unreview_group(group_id: int):
(group_id, r["file_id"]),
)
cur.execute("UPDATE files SET status='pending' WHERE id=?", (r["file_id"],))
sc.log_decision(cur, r["file_id"], group_id, "unreview", "manual")
cur.execute("UPDATE duplicate_groups SET reviewed=0 WHERE id=?", (group_id,))
con.commit()
con.close()
@@ -504,6 +510,11 @@ def auto_resolve_exact():
"UPDATE files SET status=? WHERE id=?",
("keeper" if is_k else "redundant", m["id"]),
)
sc.log_decision(
cur, m["id"], gid,
"keeper" if is_k else "redundant",
"auto-resolve-exact",
)
cur.execute("UPDATE duplicate_groups SET reviewed=1 WHERE id=?", (gid,))
resolved += 1

View File

@@ -68,6 +68,22 @@ def get_db() -> sqlite3.Connection:
return con
def log_decision(cur, file_id: int, group_id: int | None, action: str, reason: str):
"""Append a row to the decisions audit log.
Captures the file's sha256 at decision time so a future move/delete tool
can detect when a file has changed since the user reviewed it.
"""
cur.execute("SELECT sha256 FROM files WHERE id=?", (file_id,))
row = cur.fetchone()
sha = row["sha256"] if row else None
cur.execute(
"INSERT INTO decisions (file_id, group_id, action, reason, sha256_at_decision) "
"VALUES (?, ?, ?, ?, ?)",
(file_id, group_id, action, reason, sha),
)
def init_db():
con = get_db()
cur = con.cursor()
@@ -122,12 +138,27 @@ def init_db():
suggested INTEGER DEFAULT 0
);
CREATE TABLE IF NOT EXISTS decisions (
id INTEGER PRIMARY KEY AUTOINCREMENT,
file_id INTEGER NOT NULL,
group_id INTEGER,
action TEXT NOT NULL,
reason TEXT,
sha256_at_decision TEXT,
decided_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE,
FOREIGN KEY (group_id) REFERENCES duplicate_groups(id) ON DELETE SET NULL
);
CREATE INDEX IF NOT EXISTS idx_sha256 ON files(sha256);
CREATE INDEX IF NOT EXISTS idx_phash ON files(phash);
CREATE INDEX IF NOT EXISTS idx_exif_dt ON files(exif_datetime, exif_device);
CREATE INDEX IF NOT EXISTS idx_size_dim ON files(file_size, width, height);
CREATE INDEX IF NOT EXISTS idx_status ON files(status);
CREATE INDEX IF NOT EXISTS idx_decisions_file ON decisions(file_id);
CREATE INDEX IF NOT EXISTS idx_decisions_group ON decisions(group_id);
""")
# Migration: add new columns to scans if upgrading from older schema
for col, defn in [
("files_indexed", "INTEGER DEFAULT 0"),
@@ -944,6 +975,11 @@ def run_scan(folder_path: str, scan_id: int, mode: str = "incremental"):
"UPDATE files SET status=? WHERE id=?",
("keeper" if is_k else "redundant", fid),
)
log_decision(
cur, fid, gid,
"keeper" if is_k else "redundant",
"rescan-restore",
)
restored += 1
con.commit()
scan_state["message"] = f"Restored {restored:,} prior review decisions"