feat: replace Cancel with Pause/Resume — survives server restarts

- scanner.py: replace cancel_requested with pause_requested throughout;
  pause during walk drains in-flight futures gracefully then saves state;
  phash phase processes in 500-image chunks with pause check between each;
  _save_pause_state() persists files_indexed/phashes_done/last_phase to DB;
  init_db() already detects killed-mid-scan (running→paused) on startup

- main.py: add POST /api/scan/pause and POST /api/scan/resume endpoints;
  /api/scan/cancel kept as alias; scan_status now returns folder_path,
  files_indexed, phashes_done; scan_reset clears all new fields

- index.html: "Cancel" → "⏸ Pause" button; new #paused-area banner shows
  folder, files indexed, phashes done with "▶ Resume" and "Full reset"
  buttons; updateScanUI handles paused status; pauseScan()/resumeScan()
  JS functions added; chip gains .paused amber style

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
tocmo
2026-04-05 02:11:00 -04:00
parent f37bd76fed
commit 356f922940
3 changed files with 255 additions and 67 deletions

View File

@@ -105,11 +105,14 @@ def scan_start(body: ScanStartBody):
sc.scan_state.update(
scan_id=scan_id,
status="running",
phase="discovery",
phase="takeout",
progress=0,
total=0,
message="Starting...",
cancel_requested=False,
pause_requested=False,
files_indexed=0,
phashes_done=0,
folder_path=body.folder_path,
stats={},
)
@@ -146,24 +149,76 @@ def scan_status():
con.close()
return {
"scan_id": state["scan_id"],
"status": state["status"],
"phase": state["phase"],
"progress": state["progress"],
"total": state["total"],
"message": state["message"],
"stats": stats,
"scan_id": state["scan_id"],
"status": state["status"],
"phase": state["phase"],
"progress": state["progress"],
"total": state["total"],
"message": state["message"],
"folder_path": state.get("folder_path"),
"files_indexed": state.get("files_indexed", 0),
"phashes_done": state.get("phashes_done", 0),
"stats": stats,
}
@app.post("/api/scan/cancel")
def scan_cancel():
@app.post("/api/scan/pause")
def scan_pause():
if sc.scan_state["status"] != "running":
raise HTTPException(400, "No scan is currently running")
sc.scan_state["cancel_requested"] = True
sc.scan_state["pause_requested"] = True
return {"success": True}
# Keep /cancel as an alias so any lingering clients still work
@app.post("/api/scan/cancel")
def scan_cancel():
return scan_pause()
@app.post("/api/scan/resume")
def scan_resume():
if sc.scan_state["status"] != "paused":
raise HTTPException(400, "No paused scan to resume")
folder_path = sc.scan_state.get("folder_path")
if not folder_path:
raise HTTPException(400, "No folder path saved — please start a new scan")
con = get_db()
cur = con.cursor()
cur.execute(
"INSERT INTO scans (folder_path, status) VALUES (?, 'running')",
(folder_path,),
)
scan_id = cur.lastrowid
con.commit()
con.close()
sc.scan_state.update(
scan_id=scan_id,
status="running",
phase="takeout",
progress=0,
total=0,
message="Resuming scan...",
pause_requested=False,
files_indexed=0,
phashes_done=0,
folder_path=folder_path,
stats={},
)
thread = threading.Thread(
target=sc.run_scan,
args=(folder_path, scan_id, "incremental"),
daemon=True,
)
thread.start()
return {"scan_id": scan_id}
@app.delete("/api/scan/reset")
def scan_reset(confirm: str = Query("")):
if confirm != "RESET":
@@ -178,7 +233,9 @@ def scan_reset(confirm: str = Query("")):
con.close()
sc.scan_state.update(
scan_id=None, status="idle", phase="idle",
progress=0, total=0, message="", stats={},
progress=0, total=0, message="",
pause_requested=False, files_indexed=0,
phashes_done=0, folder_path=None, stats={},
)
return {"success": True}

View File

@@ -44,14 +44,17 @@ DB_PATH = str(_DATA_DIR / "dupfinder.db")
# Shared scan state (updated by background thread, read by status endpoint)
scan_state = {
"scan_id": None,
"status": "idle", # idle | running | complete | error | cancelled
"phase": "idle", # discovery | takeout | indexing | phash | grouping | done
"progress": 0,
"total": 0,
"message": "",
"cancel_requested": False,
"stats": {},
"scan_id": None,
"status": "idle", # idle|running|paused|complete|error
"phase": "idle", # takeout|indexing|phash|grouping|done
"progress": 0,
"total": 0,
"message": "",
"folder_path": None, # persists so resume knows where to continue
"pause_requested": False,
"files_indexed": 0, # cumulative across phases
"phashes_done": 0,
"stats": {},
}
@@ -92,12 +95,15 @@ def init_db():
);
CREATE TABLE IF NOT EXISTS scans (
id INTEGER PRIMARY KEY AUTOINCREMENT,
folder_path TEXT NOT NULL,
started_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
completed_at TIMESTAMP,
total_files INTEGER DEFAULT 0,
status TEXT DEFAULT 'running'
id INTEGER PRIMARY KEY AUTOINCREMENT,
folder_path TEXT NOT NULL,
started_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
completed_at TIMESTAMP,
total_files INTEGER DEFAULT 0,
files_indexed INTEGER DEFAULT 0,
phashes_done INTEGER DEFAULT 0,
last_phase TEXT DEFAULT 'indexing',
status TEXT DEFAULT 'running'
);
CREATE TABLE IF NOT EXISTS duplicate_groups (
@@ -122,7 +128,48 @@ def init_db():
CREATE INDEX IF NOT EXISTS idx_size_dim ON files(file_size, width, height);
CREATE INDEX IF NOT EXISTS idx_status ON files(status);
""")
# Migration: add new columns to scans if upgrading from older schema
for col, defn in [
("files_indexed", "INTEGER DEFAULT 0"),
("phashes_done", "INTEGER DEFAULT 0"),
("last_phase", "TEXT DEFAULT 'indexing'"),
]:
try:
cur.execute(f"ALTER TABLE scans ADD COLUMN {col} {defn}")
except Exception:
pass # column already exists
con.commit()
# ── Detect interrupted scans from previous run ────────────────────────────
# Any scan left as 'running' means the server was killed mid-scan.
# Mark them 'paused' so the UI offers a resume button.
cur.execute("""
UPDATE scans SET status = 'paused'
WHERE status = 'running'
""")
con.commit()
# Restore scan_state if there's a paused scan
cur.execute("""
SELECT id, folder_path, files_indexed, phashes_done, last_phase
FROM scans WHERE status = 'paused'
ORDER BY started_at DESC LIMIT 1
""")
row = cur.fetchone()
if row:
scan_state.update(
scan_id=row["id"],
status="paused",
phase=row["last_phase"] or "indexing",
folder_path=row["folder_path"],
files_indexed=row["files_indexed"] or 0,
phashes_done=row["phashes_done"] or 0,
message=(
f"Paused — {row['files_indexed']:,} files indexed, "
f"{row['phashes_done']:,} phashes done"
),
)
con.close()
@@ -473,11 +520,27 @@ def _run_filesize_pass(con: sqlite3.Connection, scan_id: int):
)
# ── Pause helpers ────────────────────────────────────────────────────────────
def _save_pause_state(cur, scan_id: int, phase: str,
files_indexed: int, phashes_done: int):
"""Persist pause progress so the scan survives a server restart."""
cur.execute("""
UPDATE scans SET
status = 'paused',
last_phase = ?,
files_indexed = ?,
phashes_done = ?
WHERE id = ?
""", (phase, files_indexed, phashes_done, scan_id))
# ── Main scan entry point ─────────────────────────────────────────────────────
def run_scan(folder_path: str, scan_id: int, mode: str = "incremental"):
"""Main scan function — runs in background thread."""
global scan_state
scan_state["folder_path"] = folder_path # persist so resume knows where to continue
con = get_db()
cur = con.cursor()
@@ -498,10 +561,13 @@ def run_scan(folder_path: str, scan_id: int, mode: str = "incremental"):
else:
scan_state["message"] = "Not a Takeout folder — skipping"
if scan_state["cancel_requested"]:
_mark_scan(cur, scan_id, "cancelled")
if scan_state["pause_requested"]:
_save_pause_state(cur, scan_id, "takeout", 0, 0)
con.commit()
scan_state["status"] = "cancelled"
scan_state.update(
status="paused", pause_requested=False,
message="Paused during Takeout check",
)
return
# ── Phases: discovery + indexing (pipelined) ──────────────────────
@@ -530,6 +596,7 @@ def run_scan(folder_path: str, scan_id: int, mode: str = "incremental"):
_discovered = [0] # total files found by walker so far
_done = [0] # files fully indexed (skipped + processed)
_walk_done = [False]
_pause_at_end = False # set True when pause requested mid-walk
all_files: list[str] = []
to_skip: list[str] = []
changed_ids: list[int] = []
@@ -608,12 +675,9 @@ def run_scan(folder_path: str, scan_id: int, mode: str = "incremental"):
for root, dirs, files in os.walk(folder_path):
dirs[:] = [d for d in dirs if not d.startswith(".")]
if scan_state["cancel_requested"]:
pool.shutdown(wait=False, cancel_futures=True)
_mark_scan(cur, scan_id, "cancelled")
con.commit()
scan_state["status"] = "cancelled"
return
if scan_state["pause_requested"]:
_pause_at_end = True
break # stop walking; in-flight futures drain normally
for fname in files:
if fname.endswith(".json"):
@@ -671,12 +735,6 @@ def run_scan(folder_path: str, scan_id: int, mode: str = "incremental"):
# ── Wait for remaining futures ────────────────────────────────
scan_state["total"] = len(all_files)
for future in as_completed(pending):
if scan_state["cancel_requested"]:
pool.shutdown(wait=False, cancel_futures=True)
_mark_scan(cur, scan_id, "cancelled")
con.commit()
scan_state["status"] = "cancelled"
return
path, existing = pending[future]
_write_result(path, future.result(), existing)
with _lock:
@@ -691,6 +749,17 @@ def run_scan(folder_path: str, scan_id: int, mode: str = "incremental"):
con.commit()
# ── Pause checkpoint: after indexing ──────────────────────────────
scan_state["files_indexed"] = _done[0]
if _pause_at_end:
_save_pause_state(cur, scan_id, "indexing", _done[0], 0)
con.commit()
scan_state.update(
status="paused", pause_requested=False,
message=f"Paused — {_done[0]:,} files indexed",
)
return
# ── Phase: phash ──────────────────────────────────────────────────
phasher = get_phasher()
hw_label = "GPU" if phasher.using_gpu else "CPU"
@@ -709,29 +778,34 @@ def run_scan(folder_path: str, scan_id: int, mode: str = "incremental"):
scan_state["total"] = len(photo_rows)
if photo_rows:
# Build id lookup so we can write results back efficiently
path_to_id = {row["path"]: row["id"] for row in photo_rows}
all_paths = list(path_to_id.keys())
def _phash_progress(n_done: int):
if scan_state["cancel_requested"]:
return
scan_state["progress"] = n_done
scan_state["message"] = (
f"Phash ({hw_label}): {n_done:,} / {len(all_paths):,}"
)
# Process in chunks so pause requests are honoured between batches
PHASH_CHUNK = 500
phashes_written = 0
results = phasher.hash_files(all_paths, progress_cb=_phash_progress)
# Bulk write to DB in chunks of 500
items = list(results.items())
for chunk_start in range(0, len(items), 500):
if scan_state["cancel_requested"]:
_mark_scan(cur, scan_id, "cancelled")
for chunk_start in range(0, len(all_paths), PHASH_CHUNK):
if scan_state["pause_requested"]:
_save_pause_state(
cur, scan_id, "phash",
scan_state["files_indexed"], phashes_written,
)
con.commit()
scan_state["status"] = "cancelled"
scan_state.update(
status="paused", pause_requested=False,
phashes_done=phashes_written,
message=(
f"Paused — {phashes_written:,} / {len(all_paths):,} "
"perceptual hashes computed"
),
)
return
for path, ph in items[chunk_start : chunk_start + 500]:
chunk = all_paths[chunk_start : chunk_start + PHASH_CHUNK]
chunk_results = phasher.hash_files(chunk, progress_cb=None)
for path, ph in chunk_results.items():
fid = path_to_id.get(path)
if fid and ph:
cur.execute(
@@ -739,6 +813,13 @@ def run_scan(folder_path: str, scan_id: int, mode: str = "incremental"):
)
con.commit()
phashes_written += len(chunk)
scan_state["phashes_done"] = phashes_written
scan_state["progress"] = phashes_written
scan_state["message"] = (
f"Phash ({hw_label}): {phashes_written:,} / {len(all_paths):,}"
)
con.commit()
# ── Phase: grouping ───────────────────────────────────────────────

View File

@@ -61,6 +61,7 @@
#scan-chip.complete { border-color: var(--success); color: var(--success); }
#scan-chip.error { border-color: var(--danger); color: var(--danger); }
#scan-chip.cancelled { border-color: var(--warning); color: var(--warning); }
#scan-chip.paused { border-color: var(--warning); color: var(--warning); }
#topbar-stats { margin-left: auto; display: flex; gap: 20px; font-size: 12px; color: var(--text-dim); }
#topbar-stats span b { color: var(--text); }
@@ -242,6 +243,20 @@
/* ── Rescan buttons ── */
#rescan-area { display: none; margin-top: 16px; }
#rescan-area.show { display: block; }
#paused-area { display: none; margin-top: 16px; }
#paused-area.show { display: block; }
.pause-banner {
display: flex; align-items: flex-start; gap: 12px;
background: rgba(226,164,58,.1);
border: 1px solid rgba(226,164,58,.35);
border-radius: var(--radius);
padding: 12px 14px;
margin-bottom: 10px;
}
.pause-icon { font-size: 22px; line-height: 1; }
.pause-title { font-weight: 600; color: var(--warning); margin-bottom: 4px; }
.pause-details { font-size: 12px; color: var(--text-dim); line-height: 1.6; }
.rescan-info { font-size: 12px; color: var(--text-dim); margin-bottom: 10px; }
.rescan-buttons {
display: flex;
@@ -765,7 +780,21 @@
<span class="phase-pill" data-phase="grouping">Grouping</span>
</div>
<div class="mt8">
<button class="btn-secondary btn-sm" onclick="cancelScan()">Cancel</button>
<button class="btn-secondary btn-sm" onclick="pauseScan()">&#9646;&#9646; Pause</button>
</div>
</div>
<div id="paused-area">
<div class="pause-banner">
<div class="pause-icon">&#9646;&#9646;</div>
<div class="pause-info">
<div class="pause-title">Scan paused</div>
<div id="pause-details" class="pause-details"></div>
</div>
</div>
<div style="display:flex;gap:8px;flex-wrap:wrap;">
<button class="btn-primary btn-sm" onclick="resumeScan()">&#9654; Resume</button>
<button class="btn-danger btn-sm" onclick="confirmFullReset()">Full reset &#9888;</button>
</div>
</div>
@@ -1058,9 +1087,11 @@ function updateScanUI(s) {
chip.classList.add(s.status);
const isRunning = s.status === 'running';
const isPaused = s.status === 'paused';
el('progress-area').classList.toggle('show', isRunning);
el('first-scan-ui').style.display = (s.scan_id || isRunning) ? 'none' : '';
el('rescan-area').classList.toggle('show', !isRunning && !!s.scan_id);
el('paused-area').classList.toggle('show', isPaused);
el('first-scan-ui').style.display = (s.scan_id || isRunning || isPaused) ? 'none' : '';
el('rescan-area').classList.toggle('show', !isRunning && !isPaused && !!s.scan_id);
if (isRunning) {
el('progress-msg').textContent = s.message || '';
@@ -1081,7 +1112,16 @@ function updateScanUI(s) {
});
}
if (s.scan_id && !isRunning) {
if (isPaused) {
const parts = [];
if (s.folder_path) parts.push(`Folder: ${s.folder_path}`);
if (s.files_indexed) parts.push(`${fmt(s.files_indexed)} files indexed`);
if (s.phashes_done) parts.push(`${fmt(s.phashes_done)} phashes computed`);
if (s.message) parts.push(s.message);
el('pause-details').textContent = parts.join(' · ') || 'Progress saved';
}
if (s.scan_id && !isRunning && !isPaused) {
// populate rescan folder from last scan
el('rescan-folder-input').value = el('folder-input').value || '/photos';
}
@@ -1114,11 +1154,20 @@ async function startScan(mode) {
}
}
async function cancelScan() {
async function pauseScan() {
try {
await api('POST', '/api/scan/cancel');
showToast('Cancelling scan...');
} catch(e) {}
await api('POST', '/api/scan/pause');
showToast('Pausing scan — finishing in-flight work...');
} catch(e) { showToast('Error: ' + e.message, 3000); }
}
async function resumeScan() {
try {
await api('POST', '/api/scan/resume');
state.scanStatus = 'running';
showToast('Resuming scan...');
startPoller();
} catch(e) { showToast('Error: ' + e.message, 4000); }
}
function confirmFullReset() {
@@ -1548,6 +1597,7 @@ async function init() {
try {
const s = await api('GET', '/api/scan/status');
updateScanUI(s);
state.scanStatus = s.status;
if (s.status === 'running') startPoller();
} catch(e) {}
}