feat: replace Cancel with Pause/Resume — survives server restarts

- scanner.py: replace cancel_requested with pause_requested throughout;
  pause during walk drains in-flight futures gracefully then saves state;
  phash phase processes in 500-image chunks with pause check between each;
  _save_pause_state() persists files_indexed/phashes_done/last_phase to DB;
  init_db() already detects killed-mid-scan (running→paused) on startup

- main.py: add POST /api/scan/pause and POST /api/scan/resume endpoints;
  /api/scan/cancel kept as alias; scan_status now returns folder_path,
  files_indexed, phashes_done; scan_reset clears all new fields

- index.html: "Cancel" → "⏸ Pause" button; new #paused-area banner shows
  folder, files indexed, phashes done with "▶ Resume" and "Full reset"
  buttons; updateScanUI handles paused status; pauseScan()/resumeScan()
  JS functions added; chip gains .paused amber style

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
tocmo
2026-04-05 02:11:00 -04:00
parent f37bd76fed
commit 356f922940
3 changed files with 255 additions and 67 deletions

View File

@@ -105,11 +105,14 @@ def scan_start(body: ScanStartBody):
sc.scan_state.update( sc.scan_state.update(
scan_id=scan_id, scan_id=scan_id,
status="running", status="running",
phase="discovery", phase="takeout",
progress=0, progress=0,
total=0, total=0,
message="Starting...", message="Starting...",
cancel_requested=False, pause_requested=False,
files_indexed=0,
phashes_done=0,
folder_path=body.folder_path,
stats={}, stats={},
) )
@@ -146,24 +149,76 @@ def scan_status():
con.close() con.close()
return { return {
"scan_id": state["scan_id"], "scan_id": state["scan_id"],
"status": state["status"], "status": state["status"],
"phase": state["phase"], "phase": state["phase"],
"progress": state["progress"], "progress": state["progress"],
"total": state["total"], "total": state["total"],
"message": state["message"], "message": state["message"],
"stats": stats, "folder_path": state.get("folder_path"),
"files_indexed": state.get("files_indexed", 0),
"phashes_done": state.get("phashes_done", 0),
"stats": stats,
} }
@app.post("/api/scan/cancel") @app.post("/api/scan/pause")
def scan_cancel(): def scan_pause():
if sc.scan_state["status"] != "running": if sc.scan_state["status"] != "running":
raise HTTPException(400, "No scan is currently running") raise HTTPException(400, "No scan is currently running")
sc.scan_state["cancel_requested"] = True sc.scan_state["pause_requested"] = True
return {"success": True} return {"success": True}
# Keep /cancel as an alias so any lingering clients still work
@app.post("/api/scan/cancel")
def scan_cancel():
return scan_pause()
@app.post("/api/scan/resume")
def scan_resume():
if sc.scan_state["status"] != "paused":
raise HTTPException(400, "No paused scan to resume")
folder_path = sc.scan_state.get("folder_path")
if not folder_path:
raise HTTPException(400, "No folder path saved — please start a new scan")
con = get_db()
cur = con.cursor()
cur.execute(
"INSERT INTO scans (folder_path, status) VALUES (?, 'running')",
(folder_path,),
)
scan_id = cur.lastrowid
con.commit()
con.close()
sc.scan_state.update(
scan_id=scan_id,
status="running",
phase="takeout",
progress=0,
total=0,
message="Resuming scan...",
pause_requested=False,
files_indexed=0,
phashes_done=0,
folder_path=folder_path,
stats={},
)
thread = threading.Thread(
target=sc.run_scan,
args=(folder_path, scan_id, "incremental"),
daemon=True,
)
thread.start()
return {"scan_id": scan_id}
@app.delete("/api/scan/reset") @app.delete("/api/scan/reset")
def scan_reset(confirm: str = Query("")): def scan_reset(confirm: str = Query("")):
if confirm != "RESET": if confirm != "RESET":
@@ -178,7 +233,9 @@ def scan_reset(confirm: str = Query("")):
con.close() con.close()
sc.scan_state.update( sc.scan_state.update(
scan_id=None, status="idle", phase="idle", scan_id=None, status="idle", phase="idle",
progress=0, total=0, message="", stats={}, progress=0, total=0, message="",
pause_requested=False, files_indexed=0,
phashes_done=0, folder_path=None, stats={},
) )
return {"success": True} return {"success": True}

View File

@@ -44,14 +44,17 @@ DB_PATH = str(_DATA_DIR / "dupfinder.db")
# Shared scan state (updated by background thread, read by status endpoint) # Shared scan state (updated by background thread, read by status endpoint)
scan_state = { scan_state = {
"scan_id": None, "scan_id": None,
"status": "idle", # idle | running | complete | error | cancelled "status": "idle", # idle|running|paused|complete|error
"phase": "idle", # discovery | takeout | indexing | phash | grouping | done "phase": "idle", # takeout|indexing|phash|grouping|done
"progress": 0, "progress": 0,
"total": 0, "total": 0,
"message": "", "message": "",
"cancel_requested": False, "folder_path": None, # persists so resume knows where to continue
"stats": {}, "pause_requested": False,
"files_indexed": 0, # cumulative across phases
"phashes_done": 0,
"stats": {},
} }
@@ -92,12 +95,15 @@ def init_db():
); );
CREATE TABLE IF NOT EXISTS scans ( CREATE TABLE IF NOT EXISTS scans (
id INTEGER PRIMARY KEY AUTOINCREMENT, id INTEGER PRIMARY KEY AUTOINCREMENT,
folder_path TEXT NOT NULL, folder_path TEXT NOT NULL,
started_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, started_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
completed_at TIMESTAMP, completed_at TIMESTAMP,
total_files INTEGER DEFAULT 0, total_files INTEGER DEFAULT 0,
status TEXT DEFAULT 'running' files_indexed INTEGER DEFAULT 0,
phashes_done INTEGER DEFAULT 0,
last_phase TEXT DEFAULT 'indexing',
status TEXT DEFAULT 'running'
); );
CREATE TABLE IF NOT EXISTS duplicate_groups ( CREATE TABLE IF NOT EXISTS duplicate_groups (
@@ -122,7 +128,48 @@ def init_db():
CREATE INDEX IF NOT EXISTS idx_size_dim ON files(file_size, width, height); CREATE INDEX IF NOT EXISTS idx_size_dim ON files(file_size, width, height);
CREATE INDEX IF NOT EXISTS idx_status ON files(status); CREATE INDEX IF NOT EXISTS idx_status ON files(status);
""") """)
# Migration: add new columns to scans if upgrading from older schema
for col, defn in [
("files_indexed", "INTEGER DEFAULT 0"),
("phashes_done", "INTEGER DEFAULT 0"),
("last_phase", "TEXT DEFAULT 'indexing'"),
]:
try:
cur.execute(f"ALTER TABLE scans ADD COLUMN {col} {defn}")
except Exception:
pass # column already exists
con.commit() con.commit()
# ── Detect interrupted scans from previous run ────────────────────────────
# Any scan left as 'running' means the server was killed mid-scan.
# Mark them 'paused' so the UI offers a resume button.
cur.execute("""
UPDATE scans SET status = 'paused'
WHERE status = 'running'
""")
con.commit()
# Restore scan_state if there's a paused scan
cur.execute("""
SELECT id, folder_path, files_indexed, phashes_done, last_phase
FROM scans WHERE status = 'paused'
ORDER BY started_at DESC LIMIT 1
""")
row = cur.fetchone()
if row:
scan_state.update(
scan_id=row["id"],
status="paused",
phase=row["last_phase"] or "indexing",
folder_path=row["folder_path"],
files_indexed=row["files_indexed"] or 0,
phashes_done=row["phashes_done"] or 0,
message=(
f"Paused — {row['files_indexed']:,} files indexed, "
f"{row['phashes_done']:,} phashes done"
),
)
con.close() con.close()
@@ -473,11 +520,27 @@ def _run_filesize_pass(con: sqlite3.Connection, scan_id: int):
) )
# ── Pause helpers ────────────────────────────────────────────────────────────
def _save_pause_state(cur, scan_id: int, phase: str,
files_indexed: int, phashes_done: int):
"""Persist pause progress so the scan survives a server restart."""
cur.execute("""
UPDATE scans SET
status = 'paused',
last_phase = ?,
files_indexed = ?,
phashes_done = ?
WHERE id = ?
""", (phase, files_indexed, phashes_done, scan_id))
# ── Main scan entry point ───────────────────────────────────────────────────── # ── Main scan entry point ─────────────────────────────────────────────────────
def run_scan(folder_path: str, scan_id: int, mode: str = "incremental"): def run_scan(folder_path: str, scan_id: int, mode: str = "incremental"):
"""Main scan function — runs in background thread.""" """Main scan function — runs in background thread."""
global scan_state global scan_state
scan_state["folder_path"] = folder_path # persist so resume knows where to continue
con = get_db() con = get_db()
cur = con.cursor() cur = con.cursor()
@@ -498,10 +561,13 @@ def run_scan(folder_path: str, scan_id: int, mode: str = "incremental"):
else: else:
scan_state["message"] = "Not a Takeout folder — skipping" scan_state["message"] = "Not a Takeout folder — skipping"
if scan_state["cancel_requested"]: if scan_state["pause_requested"]:
_mark_scan(cur, scan_id, "cancelled") _save_pause_state(cur, scan_id, "takeout", 0, 0)
con.commit() con.commit()
scan_state["status"] = "cancelled" scan_state.update(
status="paused", pause_requested=False,
message="Paused during Takeout check",
)
return return
# ── Phases: discovery + indexing (pipelined) ────────────────────── # ── Phases: discovery + indexing (pipelined) ──────────────────────
@@ -530,6 +596,7 @@ def run_scan(folder_path: str, scan_id: int, mode: str = "incremental"):
_discovered = [0] # total files found by walker so far _discovered = [0] # total files found by walker so far
_done = [0] # files fully indexed (skipped + processed) _done = [0] # files fully indexed (skipped + processed)
_walk_done = [False] _walk_done = [False]
_pause_at_end = False # set True when pause requested mid-walk
all_files: list[str] = [] all_files: list[str] = []
to_skip: list[str] = [] to_skip: list[str] = []
changed_ids: list[int] = [] changed_ids: list[int] = []
@@ -608,12 +675,9 @@ def run_scan(folder_path: str, scan_id: int, mode: str = "incremental"):
for root, dirs, files in os.walk(folder_path): for root, dirs, files in os.walk(folder_path):
dirs[:] = [d for d in dirs if not d.startswith(".")] dirs[:] = [d for d in dirs if not d.startswith(".")]
if scan_state["cancel_requested"]: if scan_state["pause_requested"]:
pool.shutdown(wait=False, cancel_futures=True) _pause_at_end = True
_mark_scan(cur, scan_id, "cancelled") break # stop walking; in-flight futures drain normally
con.commit()
scan_state["status"] = "cancelled"
return
for fname in files: for fname in files:
if fname.endswith(".json"): if fname.endswith(".json"):
@@ -671,12 +735,6 @@ def run_scan(folder_path: str, scan_id: int, mode: str = "incremental"):
# ── Wait for remaining futures ──────────────────────────────── # ── Wait for remaining futures ────────────────────────────────
scan_state["total"] = len(all_files) scan_state["total"] = len(all_files)
for future in as_completed(pending): for future in as_completed(pending):
if scan_state["cancel_requested"]:
pool.shutdown(wait=False, cancel_futures=True)
_mark_scan(cur, scan_id, "cancelled")
con.commit()
scan_state["status"] = "cancelled"
return
path, existing = pending[future] path, existing = pending[future]
_write_result(path, future.result(), existing) _write_result(path, future.result(), existing)
with _lock: with _lock:
@@ -691,6 +749,17 @@ def run_scan(folder_path: str, scan_id: int, mode: str = "incremental"):
con.commit() con.commit()
# ── Pause checkpoint: after indexing ──────────────────────────────
scan_state["files_indexed"] = _done[0]
if _pause_at_end:
_save_pause_state(cur, scan_id, "indexing", _done[0], 0)
con.commit()
scan_state.update(
status="paused", pause_requested=False,
message=f"Paused — {_done[0]:,} files indexed",
)
return
# ── Phase: phash ────────────────────────────────────────────────── # ── Phase: phash ──────────────────────────────────────────────────
phasher = get_phasher() phasher = get_phasher()
hw_label = "GPU" if phasher.using_gpu else "CPU" hw_label = "GPU" if phasher.using_gpu else "CPU"
@@ -709,29 +778,34 @@ def run_scan(folder_path: str, scan_id: int, mode: str = "incremental"):
scan_state["total"] = len(photo_rows) scan_state["total"] = len(photo_rows)
if photo_rows: if photo_rows:
# Build id lookup so we can write results back efficiently
path_to_id = {row["path"]: row["id"] for row in photo_rows} path_to_id = {row["path"]: row["id"] for row in photo_rows}
all_paths = list(path_to_id.keys()) all_paths = list(path_to_id.keys())
def _phash_progress(n_done: int): # Process in chunks so pause requests are honoured between batches
if scan_state["cancel_requested"]: PHASH_CHUNK = 500
return phashes_written = 0
scan_state["progress"] = n_done
scan_state["message"] = (
f"Phash ({hw_label}): {n_done:,} / {len(all_paths):,}"
)
results = phasher.hash_files(all_paths, progress_cb=_phash_progress) for chunk_start in range(0, len(all_paths), PHASH_CHUNK):
if scan_state["pause_requested"]:
# Bulk write to DB in chunks of 500 _save_pause_state(
items = list(results.items()) cur, scan_id, "phash",
for chunk_start in range(0, len(items), 500): scan_state["files_indexed"], phashes_written,
if scan_state["cancel_requested"]: )
_mark_scan(cur, scan_id, "cancelled")
con.commit() con.commit()
scan_state["status"] = "cancelled" scan_state.update(
status="paused", pause_requested=False,
phashes_done=phashes_written,
message=(
f"Paused — {phashes_written:,} / {len(all_paths):,} "
"perceptual hashes computed"
),
)
return return
for path, ph in items[chunk_start : chunk_start + 500]:
chunk = all_paths[chunk_start : chunk_start + PHASH_CHUNK]
chunk_results = phasher.hash_files(chunk, progress_cb=None)
for path, ph in chunk_results.items():
fid = path_to_id.get(path) fid = path_to_id.get(path)
if fid and ph: if fid and ph:
cur.execute( cur.execute(
@@ -739,6 +813,13 @@ def run_scan(folder_path: str, scan_id: int, mode: str = "incremental"):
) )
con.commit() con.commit()
phashes_written += len(chunk)
scan_state["phashes_done"] = phashes_written
scan_state["progress"] = phashes_written
scan_state["message"] = (
f"Phash ({hw_label}): {phashes_written:,} / {len(all_paths):,}"
)
con.commit() con.commit()
# ── Phase: grouping ─────────────────────────────────────────────── # ── Phase: grouping ───────────────────────────────────────────────

View File

@@ -61,6 +61,7 @@
#scan-chip.complete { border-color: var(--success); color: var(--success); } #scan-chip.complete { border-color: var(--success); color: var(--success); }
#scan-chip.error { border-color: var(--danger); color: var(--danger); } #scan-chip.error { border-color: var(--danger); color: var(--danger); }
#scan-chip.cancelled { border-color: var(--warning); color: var(--warning); } #scan-chip.cancelled { border-color: var(--warning); color: var(--warning); }
#scan-chip.paused { border-color: var(--warning); color: var(--warning); }
#topbar-stats { margin-left: auto; display: flex; gap: 20px; font-size: 12px; color: var(--text-dim); } #topbar-stats { margin-left: auto; display: flex; gap: 20px; font-size: 12px; color: var(--text-dim); }
#topbar-stats span b { color: var(--text); } #topbar-stats span b { color: var(--text); }
@@ -242,6 +243,20 @@
/* ── Rescan buttons ── */ /* ── Rescan buttons ── */
#rescan-area { display: none; margin-top: 16px; } #rescan-area { display: none; margin-top: 16px; }
#rescan-area.show { display: block; } #rescan-area.show { display: block; }
#paused-area { display: none; margin-top: 16px; }
#paused-area.show { display: block; }
.pause-banner {
display: flex; align-items: flex-start; gap: 12px;
background: rgba(226,164,58,.1);
border: 1px solid rgba(226,164,58,.35);
border-radius: var(--radius);
padding: 12px 14px;
margin-bottom: 10px;
}
.pause-icon { font-size: 22px; line-height: 1; }
.pause-title { font-weight: 600; color: var(--warning); margin-bottom: 4px; }
.pause-details { font-size: 12px; color: var(--text-dim); line-height: 1.6; }
.rescan-info { font-size: 12px; color: var(--text-dim); margin-bottom: 10px; } .rescan-info { font-size: 12px; color: var(--text-dim); margin-bottom: 10px; }
.rescan-buttons { .rescan-buttons {
display: flex; display: flex;
@@ -765,7 +780,21 @@
<span class="phase-pill" data-phase="grouping">Grouping</span> <span class="phase-pill" data-phase="grouping">Grouping</span>
</div> </div>
<div class="mt8"> <div class="mt8">
<button class="btn-secondary btn-sm" onclick="cancelScan()">Cancel</button> <button class="btn-secondary btn-sm" onclick="pauseScan()">&#9646;&#9646; Pause</button>
</div>
</div>
<div id="paused-area">
<div class="pause-banner">
<div class="pause-icon">&#9646;&#9646;</div>
<div class="pause-info">
<div class="pause-title">Scan paused</div>
<div id="pause-details" class="pause-details"></div>
</div>
</div>
<div style="display:flex;gap:8px;flex-wrap:wrap;">
<button class="btn-primary btn-sm" onclick="resumeScan()">&#9654; Resume</button>
<button class="btn-danger btn-sm" onclick="confirmFullReset()">Full reset &#9888;</button>
</div> </div>
</div> </div>
@@ -1058,9 +1087,11 @@ function updateScanUI(s) {
chip.classList.add(s.status); chip.classList.add(s.status);
const isRunning = s.status === 'running'; const isRunning = s.status === 'running';
const isPaused = s.status === 'paused';
el('progress-area').classList.toggle('show', isRunning); el('progress-area').classList.toggle('show', isRunning);
el('first-scan-ui').style.display = (s.scan_id || isRunning) ? 'none' : ''; el('paused-area').classList.toggle('show', isPaused);
el('rescan-area').classList.toggle('show', !isRunning && !!s.scan_id); el('first-scan-ui').style.display = (s.scan_id || isRunning || isPaused) ? 'none' : '';
el('rescan-area').classList.toggle('show', !isRunning && !isPaused && !!s.scan_id);
if (isRunning) { if (isRunning) {
el('progress-msg').textContent = s.message || ''; el('progress-msg').textContent = s.message || '';
@@ -1081,7 +1112,16 @@ function updateScanUI(s) {
}); });
} }
if (s.scan_id && !isRunning) { if (isPaused) {
const parts = [];
if (s.folder_path) parts.push(`Folder: ${s.folder_path}`);
if (s.files_indexed) parts.push(`${fmt(s.files_indexed)} files indexed`);
if (s.phashes_done) parts.push(`${fmt(s.phashes_done)} phashes computed`);
if (s.message) parts.push(s.message);
el('pause-details').textContent = parts.join(' · ') || 'Progress saved';
}
if (s.scan_id && !isRunning && !isPaused) {
// populate rescan folder from last scan // populate rescan folder from last scan
el('rescan-folder-input').value = el('folder-input').value || '/photos'; el('rescan-folder-input').value = el('folder-input').value || '/photos';
} }
@@ -1114,11 +1154,20 @@ async function startScan(mode) {
} }
} }
async function cancelScan() { async function pauseScan() {
try { try {
await api('POST', '/api/scan/cancel'); await api('POST', '/api/scan/pause');
showToast('Cancelling scan...'); showToast('Pausing scan — finishing in-flight work...');
} catch(e) {} } catch(e) { showToast('Error: ' + e.message, 3000); }
}
async function resumeScan() {
try {
await api('POST', '/api/scan/resume');
state.scanStatus = 'running';
showToast('Resuming scan...');
startPoller();
} catch(e) { showToast('Error: ' + e.message, 4000); }
} }
function confirmFullReset() { function confirmFullReset() {
@@ -1548,6 +1597,7 @@ async function init() {
try { try {
const s = await api('GET', '/api/scan/status'); const s = await api('GET', '/api/scan/status');
updateScanUI(s); updateScanUI(s);
state.scanStatus = s.status;
if (s.status === 'running') startPoller(); if (s.status === 'running') startPoller();
} catch(e) {} } catch(e) {}
} }