feat: replace Cancel with Pause/Resume — survives server restarts
- scanner.py: replace cancel_requested with pause_requested throughout; pause during walk drains in-flight futures gracefully then saves state; phash phase processes in 500-image chunks with pause check between each; _save_pause_state() persists files_indexed/phashes_done/last_phase to DB; init_db() already detects killed-mid-scan (running→paused) on startup - main.py: add POST /api/scan/pause and POST /api/scan/resume endpoints; /api/scan/cancel kept as alias; scan_status now returns folder_path, files_indexed, phashes_done; scan_reset clears all new fields - index.html: "Cancel" → "⏸ Pause" button; new #paused-area banner shows folder, files indexed, phashes done with "▶ Resume" and "Full reset" buttons; updateScanUI handles paused status; pauseScan()/resumeScan() JS functions added; chip gains .paused amber style Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
83
app/main.py
83
app/main.py
@@ -105,11 +105,14 @@ def scan_start(body: ScanStartBody):
|
||||
sc.scan_state.update(
|
||||
scan_id=scan_id,
|
||||
status="running",
|
||||
phase="discovery",
|
||||
phase="takeout",
|
||||
progress=0,
|
||||
total=0,
|
||||
message="Starting...",
|
||||
cancel_requested=False,
|
||||
pause_requested=False,
|
||||
files_indexed=0,
|
||||
phashes_done=0,
|
||||
folder_path=body.folder_path,
|
||||
stats={},
|
||||
)
|
||||
|
||||
@@ -146,24 +149,76 @@ def scan_status():
|
||||
con.close()
|
||||
|
||||
return {
|
||||
"scan_id": state["scan_id"],
|
||||
"status": state["status"],
|
||||
"phase": state["phase"],
|
||||
"progress": state["progress"],
|
||||
"total": state["total"],
|
||||
"message": state["message"],
|
||||
"stats": stats,
|
||||
"scan_id": state["scan_id"],
|
||||
"status": state["status"],
|
||||
"phase": state["phase"],
|
||||
"progress": state["progress"],
|
||||
"total": state["total"],
|
||||
"message": state["message"],
|
||||
"folder_path": state.get("folder_path"),
|
||||
"files_indexed": state.get("files_indexed", 0),
|
||||
"phashes_done": state.get("phashes_done", 0),
|
||||
"stats": stats,
|
||||
}
|
||||
|
||||
|
||||
@app.post("/api/scan/cancel")
|
||||
def scan_cancel():
|
||||
@app.post("/api/scan/pause")
|
||||
def scan_pause():
|
||||
if sc.scan_state["status"] != "running":
|
||||
raise HTTPException(400, "No scan is currently running")
|
||||
sc.scan_state["cancel_requested"] = True
|
||||
sc.scan_state["pause_requested"] = True
|
||||
return {"success": True}
|
||||
|
||||
|
||||
# Keep /cancel as an alias so any lingering clients still work
|
||||
@app.post("/api/scan/cancel")
|
||||
def scan_cancel():
|
||||
return scan_pause()
|
||||
|
||||
|
||||
@app.post("/api/scan/resume")
|
||||
def scan_resume():
|
||||
if sc.scan_state["status"] != "paused":
|
||||
raise HTTPException(400, "No paused scan to resume")
|
||||
|
||||
folder_path = sc.scan_state.get("folder_path")
|
||||
if not folder_path:
|
||||
raise HTTPException(400, "No folder path saved — please start a new scan")
|
||||
|
||||
con = get_db()
|
||||
cur = con.cursor()
|
||||
cur.execute(
|
||||
"INSERT INTO scans (folder_path, status) VALUES (?, 'running')",
|
||||
(folder_path,),
|
||||
)
|
||||
scan_id = cur.lastrowid
|
||||
con.commit()
|
||||
con.close()
|
||||
|
||||
sc.scan_state.update(
|
||||
scan_id=scan_id,
|
||||
status="running",
|
||||
phase="takeout",
|
||||
progress=0,
|
||||
total=0,
|
||||
message="Resuming scan...",
|
||||
pause_requested=False,
|
||||
files_indexed=0,
|
||||
phashes_done=0,
|
||||
folder_path=folder_path,
|
||||
stats={},
|
||||
)
|
||||
|
||||
thread = threading.Thread(
|
||||
target=sc.run_scan,
|
||||
args=(folder_path, scan_id, "incremental"),
|
||||
daemon=True,
|
||||
)
|
||||
thread.start()
|
||||
|
||||
return {"scan_id": scan_id}
|
||||
|
||||
|
||||
@app.delete("/api/scan/reset")
|
||||
def scan_reset(confirm: str = Query("")):
|
||||
if confirm != "RESET":
|
||||
@@ -178,7 +233,9 @@ def scan_reset(confirm: str = Query("")):
|
||||
con.close()
|
||||
sc.scan_state.update(
|
||||
scan_id=None, status="idle", phase="idle",
|
||||
progress=0, total=0, message="", stats={},
|
||||
progress=0, total=0, message="",
|
||||
pause_requested=False, files_indexed=0,
|
||||
phashes_done=0, folder_path=None, stats={},
|
||||
)
|
||||
return {"success": True}
|
||||
|
||||
|
||||
173
app/scanner.py
173
app/scanner.py
@@ -44,14 +44,17 @@ DB_PATH = str(_DATA_DIR / "dupfinder.db")
|
||||
|
||||
# Shared scan state (updated by background thread, read by status endpoint)
|
||||
scan_state = {
|
||||
"scan_id": None,
|
||||
"status": "idle", # idle | running | complete | error | cancelled
|
||||
"phase": "idle", # discovery | takeout | indexing | phash | grouping | done
|
||||
"progress": 0,
|
||||
"total": 0,
|
||||
"message": "",
|
||||
"cancel_requested": False,
|
||||
"stats": {},
|
||||
"scan_id": None,
|
||||
"status": "idle", # idle|running|paused|complete|error
|
||||
"phase": "idle", # takeout|indexing|phash|grouping|done
|
||||
"progress": 0,
|
||||
"total": 0,
|
||||
"message": "",
|
||||
"folder_path": None, # persists so resume knows where to continue
|
||||
"pause_requested": False,
|
||||
"files_indexed": 0, # cumulative across phases
|
||||
"phashes_done": 0,
|
||||
"stats": {},
|
||||
}
|
||||
|
||||
|
||||
@@ -92,12 +95,15 @@ def init_db():
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS scans (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
folder_path TEXT NOT NULL,
|
||||
started_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
completed_at TIMESTAMP,
|
||||
total_files INTEGER DEFAULT 0,
|
||||
status TEXT DEFAULT 'running'
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
folder_path TEXT NOT NULL,
|
||||
started_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
completed_at TIMESTAMP,
|
||||
total_files INTEGER DEFAULT 0,
|
||||
files_indexed INTEGER DEFAULT 0,
|
||||
phashes_done INTEGER DEFAULT 0,
|
||||
last_phase TEXT DEFAULT 'indexing',
|
||||
status TEXT DEFAULT 'running'
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS duplicate_groups (
|
||||
@@ -122,7 +128,48 @@ def init_db():
|
||||
CREATE INDEX IF NOT EXISTS idx_size_dim ON files(file_size, width, height);
|
||||
CREATE INDEX IF NOT EXISTS idx_status ON files(status);
|
||||
""")
|
||||
# Migration: add new columns to scans if upgrading from older schema
|
||||
for col, defn in [
|
||||
("files_indexed", "INTEGER DEFAULT 0"),
|
||||
("phashes_done", "INTEGER DEFAULT 0"),
|
||||
("last_phase", "TEXT DEFAULT 'indexing'"),
|
||||
]:
|
||||
try:
|
||||
cur.execute(f"ALTER TABLE scans ADD COLUMN {col} {defn}")
|
||||
except Exception:
|
||||
pass # column already exists
|
||||
con.commit()
|
||||
|
||||
# ── Detect interrupted scans from previous run ────────────────────────────
|
||||
# Any scan left as 'running' means the server was killed mid-scan.
|
||||
# Mark them 'paused' so the UI offers a resume button.
|
||||
cur.execute("""
|
||||
UPDATE scans SET status = 'paused'
|
||||
WHERE status = 'running'
|
||||
""")
|
||||
con.commit()
|
||||
|
||||
# Restore scan_state if there's a paused scan
|
||||
cur.execute("""
|
||||
SELECT id, folder_path, files_indexed, phashes_done, last_phase
|
||||
FROM scans WHERE status = 'paused'
|
||||
ORDER BY started_at DESC LIMIT 1
|
||||
""")
|
||||
row = cur.fetchone()
|
||||
if row:
|
||||
scan_state.update(
|
||||
scan_id=row["id"],
|
||||
status="paused",
|
||||
phase=row["last_phase"] or "indexing",
|
||||
folder_path=row["folder_path"],
|
||||
files_indexed=row["files_indexed"] or 0,
|
||||
phashes_done=row["phashes_done"] or 0,
|
||||
message=(
|
||||
f"Paused — {row['files_indexed']:,} files indexed, "
|
||||
f"{row['phashes_done']:,} phashes done"
|
||||
),
|
||||
)
|
||||
|
||||
con.close()
|
||||
|
||||
|
||||
@@ -473,11 +520,27 @@ def _run_filesize_pass(con: sqlite3.Connection, scan_id: int):
|
||||
)
|
||||
|
||||
|
||||
# ── Pause helpers ────────────────────────────────────────────────────────────
|
||||
|
||||
def _save_pause_state(cur, scan_id: int, phase: str,
|
||||
files_indexed: int, phashes_done: int):
|
||||
"""Persist pause progress so the scan survives a server restart."""
|
||||
cur.execute("""
|
||||
UPDATE scans SET
|
||||
status = 'paused',
|
||||
last_phase = ?,
|
||||
files_indexed = ?,
|
||||
phashes_done = ?
|
||||
WHERE id = ?
|
||||
""", (phase, files_indexed, phashes_done, scan_id))
|
||||
|
||||
|
||||
# ── Main scan entry point ─────────────────────────────────────────────────────
|
||||
|
||||
def run_scan(folder_path: str, scan_id: int, mode: str = "incremental"):
|
||||
"""Main scan function — runs in background thread."""
|
||||
global scan_state
|
||||
scan_state["folder_path"] = folder_path # persist so resume knows where to continue
|
||||
con = get_db()
|
||||
cur = con.cursor()
|
||||
|
||||
@@ -498,10 +561,13 @@ def run_scan(folder_path: str, scan_id: int, mode: str = "incremental"):
|
||||
else:
|
||||
scan_state["message"] = "Not a Takeout folder — skipping"
|
||||
|
||||
if scan_state["cancel_requested"]:
|
||||
_mark_scan(cur, scan_id, "cancelled")
|
||||
if scan_state["pause_requested"]:
|
||||
_save_pause_state(cur, scan_id, "takeout", 0, 0)
|
||||
con.commit()
|
||||
scan_state["status"] = "cancelled"
|
||||
scan_state.update(
|
||||
status="paused", pause_requested=False,
|
||||
message="Paused during Takeout check",
|
||||
)
|
||||
return
|
||||
|
||||
# ── Phases: discovery + indexing (pipelined) ──────────────────────
|
||||
@@ -530,6 +596,7 @@ def run_scan(folder_path: str, scan_id: int, mode: str = "incremental"):
|
||||
_discovered = [0] # total files found by walker so far
|
||||
_done = [0] # files fully indexed (skipped + processed)
|
||||
_walk_done = [False]
|
||||
_pause_at_end = False # set True when pause requested mid-walk
|
||||
all_files: list[str] = []
|
||||
to_skip: list[str] = []
|
||||
changed_ids: list[int] = []
|
||||
@@ -608,12 +675,9 @@ def run_scan(folder_path: str, scan_id: int, mode: str = "incremental"):
|
||||
for root, dirs, files in os.walk(folder_path):
|
||||
dirs[:] = [d for d in dirs if not d.startswith(".")]
|
||||
|
||||
if scan_state["cancel_requested"]:
|
||||
pool.shutdown(wait=False, cancel_futures=True)
|
||||
_mark_scan(cur, scan_id, "cancelled")
|
||||
con.commit()
|
||||
scan_state["status"] = "cancelled"
|
||||
return
|
||||
if scan_state["pause_requested"]:
|
||||
_pause_at_end = True
|
||||
break # stop walking; in-flight futures drain normally
|
||||
|
||||
for fname in files:
|
||||
if fname.endswith(".json"):
|
||||
@@ -671,12 +735,6 @@ def run_scan(folder_path: str, scan_id: int, mode: str = "incremental"):
|
||||
# ── Wait for remaining futures ────────────────────────────────
|
||||
scan_state["total"] = len(all_files)
|
||||
for future in as_completed(pending):
|
||||
if scan_state["cancel_requested"]:
|
||||
pool.shutdown(wait=False, cancel_futures=True)
|
||||
_mark_scan(cur, scan_id, "cancelled")
|
||||
con.commit()
|
||||
scan_state["status"] = "cancelled"
|
||||
return
|
||||
path, existing = pending[future]
|
||||
_write_result(path, future.result(), existing)
|
||||
with _lock:
|
||||
@@ -691,6 +749,17 @@ def run_scan(folder_path: str, scan_id: int, mode: str = "incremental"):
|
||||
|
||||
con.commit()
|
||||
|
||||
# ── Pause checkpoint: after indexing ──────────────────────────────
|
||||
scan_state["files_indexed"] = _done[0]
|
||||
if _pause_at_end:
|
||||
_save_pause_state(cur, scan_id, "indexing", _done[0], 0)
|
||||
con.commit()
|
||||
scan_state.update(
|
||||
status="paused", pause_requested=False,
|
||||
message=f"Paused — {_done[0]:,} files indexed",
|
||||
)
|
||||
return
|
||||
|
||||
# ── Phase: phash ──────────────────────────────────────────────────
|
||||
phasher = get_phasher()
|
||||
hw_label = "GPU" if phasher.using_gpu else "CPU"
|
||||
@@ -709,29 +778,34 @@ def run_scan(folder_path: str, scan_id: int, mode: str = "incremental"):
|
||||
scan_state["total"] = len(photo_rows)
|
||||
|
||||
if photo_rows:
|
||||
# Build id lookup so we can write results back efficiently
|
||||
path_to_id = {row["path"]: row["id"] for row in photo_rows}
|
||||
all_paths = list(path_to_id.keys())
|
||||
|
||||
def _phash_progress(n_done: int):
|
||||
if scan_state["cancel_requested"]:
|
||||
return
|
||||
scan_state["progress"] = n_done
|
||||
scan_state["message"] = (
|
||||
f"Phash ({hw_label}): {n_done:,} / {len(all_paths):,}"
|
||||
)
|
||||
# Process in chunks so pause requests are honoured between batches
|
||||
PHASH_CHUNK = 500
|
||||
phashes_written = 0
|
||||
|
||||
results = phasher.hash_files(all_paths, progress_cb=_phash_progress)
|
||||
|
||||
# Bulk write to DB in chunks of 500
|
||||
items = list(results.items())
|
||||
for chunk_start in range(0, len(items), 500):
|
||||
if scan_state["cancel_requested"]:
|
||||
_mark_scan(cur, scan_id, "cancelled")
|
||||
for chunk_start in range(0, len(all_paths), PHASH_CHUNK):
|
||||
if scan_state["pause_requested"]:
|
||||
_save_pause_state(
|
||||
cur, scan_id, "phash",
|
||||
scan_state["files_indexed"], phashes_written,
|
||||
)
|
||||
con.commit()
|
||||
scan_state["status"] = "cancelled"
|
||||
scan_state.update(
|
||||
status="paused", pause_requested=False,
|
||||
phashes_done=phashes_written,
|
||||
message=(
|
||||
f"Paused — {phashes_written:,} / {len(all_paths):,} "
|
||||
"perceptual hashes computed"
|
||||
),
|
||||
)
|
||||
return
|
||||
for path, ph in items[chunk_start : chunk_start + 500]:
|
||||
|
||||
chunk = all_paths[chunk_start : chunk_start + PHASH_CHUNK]
|
||||
chunk_results = phasher.hash_files(chunk, progress_cb=None)
|
||||
|
||||
for path, ph in chunk_results.items():
|
||||
fid = path_to_id.get(path)
|
||||
if fid and ph:
|
||||
cur.execute(
|
||||
@@ -739,6 +813,13 @@ def run_scan(folder_path: str, scan_id: int, mode: str = "incremental"):
|
||||
)
|
||||
con.commit()
|
||||
|
||||
phashes_written += len(chunk)
|
||||
scan_state["phashes_done"] = phashes_written
|
||||
scan_state["progress"] = phashes_written
|
||||
scan_state["message"] = (
|
||||
f"Phash ({hw_label}): {phashes_written:,} / {len(all_paths):,}"
|
||||
)
|
||||
|
||||
con.commit()
|
||||
|
||||
# ── Phase: grouping ───────────────────────────────────────────────
|
||||
|
||||
@@ -61,6 +61,7 @@
|
||||
#scan-chip.complete { border-color: var(--success); color: var(--success); }
|
||||
#scan-chip.error { border-color: var(--danger); color: var(--danger); }
|
||||
#scan-chip.cancelled { border-color: var(--warning); color: var(--warning); }
|
||||
#scan-chip.paused { border-color: var(--warning); color: var(--warning); }
|
||||
#topbar-stats { margin-left: auto; display: flex; gap: 20px; font-size: 12px; color: var(--text-dim); }
|
||||
#topbar-stats span b { color: var(--text); }
|
||||
|
||||
@@ -242,6 +243,20 @@
|
||||
/* ── Rescan buttons ── */
|
||||
#rescan-area { display: none; margin-top: 16px; }
|
||||
#rescan-area.show { display: block; }
|
||||
|
||||
#paused-area { display: none; margin-top: 16px; }
|
||||
#paused-area.show { display: block; }
|
||||
.pause-banner {
|
||||
display: flex; align-items: flex-start; gap: 12px;
|
||||
background: rgba(226,164,58,.1);
|
||||
border: 1px solid rgba(226,164,58,.35);
|
||||
border-radius: var(--radius);
|
||||
padding: 12px 14px;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
.pause-icon { font-size: 22px; line-height: 1; }
|
||||
.pause-title { font-weight: 600; color: var(--warning); margin-bottom: 4px; }
|
||||
.pause-details { font-size: 12px; color: var(--text-dim); line-height: 1.6; }
|
||||
.rescan-info { font-size: 12px; color: var(--text-dim); margin-bottom: 10px; }
|
||||
.rescan-buttons {
|
||||
display: flex;
|
||||
@@ -765,7 +780,21 @@
|
||||
<span class="phase-pill" data-phase="grouping">Grouping</span>
|
||||
</div>
|
||||
<div class="mt8">
|
||||
<button class="btn-secondary btn-sm" onclick="cancelScan()">Cancel</button>
|
||||
<button class="btn-secondary btn-sm" onclick="pauseScan()">▮▮ Pause</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="paused-area">
|
||||
<div class="pause-banner">
|
||||
<div class="pause-icon">▮▮</div>
|
||||
<div class="pause-info">
|
||||
<div class="pause-title">Scan paused</div>
|
||||
<div id="pause-details" class="pause-details"></div>
|
||||
</div>
|
||||
</div>
|
||||
<div style="display:flex;gap:8px;flex-wrap:wrap;">
|
||||
<button class="btn-primary btn-sm" onclick="resumeScan()">▶ Resume</button>
|
||||
<button class="btn-danger btn-sm" onclick="confirmFullReset()">Full reset ⚠</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -1058,9 +1087,11 @@ function updateScanUI(s) {
|
||||
chip.classList.add(s.status);
|
||||
|
||||
const isRunning = s.status === 'running';
|
||||
const isPaused = s.status === 'paused';
|
||||
el('progress-area').classList.toggle('show', isRunning);
|
||||
el('first-scan-ui').style.display = (s.scan_id || isRunning) ? 'none' : '';
|
||||
el('rescan-area').classList.toggle('show', !isRunning && !!s.scan_id);
|
||||
el('paused-area').classList.toggle('show', isPaused);
|
||||
el('first-scan-ui').style.display = (s.scan_id || isRunning || isPaused) ? 'none' : '';
|
||||
el('rescan-area').classList.toggle('show', !isRunning && !isPaused && !!s.scan_id);
|
||||
|
||||
if (isRunning) {
|
||||
el('progress-msg').textContent = s.message || '';
|
||||
@@ -1081,7 +1112,16 @@ function updateScanUI(s) {
|
||||
});
|
||||
}
|
||||
|
||||
if (s.scan_id && !isRunning) {
|
||||
if (isPaused) {
|
||||
const parts = [];
|
||||
if (s.folder_path) parts.push(`Folder: ${s.folder_path}`);
|
||||
if (s.files_indexed) parts.push(`${fmt(s.files_indexed)} files indexed`);
|
||||
if (s.phashes_done) parts.push(`${fmt(s.phashes_done)} phashes computed`);
|
||||
if (s.message) parts.push(s.message);
|
||||
el('pause-details').textContent = parts.join(' · ') || 'Progress saved';
|
||||
}
|
||||
|
||||
if (s.scan_id && !isRunning && !isPaused) {
|
||||
// populate rescan folder from last scan
|
||||
el('rescan-folder-input').value = el('folder-input').value || '/photos';
|
||||
}
|
||||
@@ -1114,11 +1154,20 @@ async function startScan(mode) {
|
||||
}
|
||||
}
|
||||
|
||||
async function cancelScan() {
|
||||
async function pauseScan() {
|
||||
try {
|
||||
await api('POST', '/api/scan/cancel');
|
||||
showToast('Cancelling scan...');
|
||||
} catch(e) {}
|
||||
await api('POST', '/api/scan/pause');
|
||||
showToast('Pausing scan — finishing in-flight work...');
|
||||
} catch(e) { showToast('Error: ' + e.message, 3000); }
|
||||
}
|
||||
|
||||
async function resumeScan() {
|
||||
try {
|
||||
await api('POST', '/api/scan/resume');
|
||||
state.scanStatus = 'running';
|
||||
showToast('Resuming scan...');
|
||||
startPoller();
|
||||
} catch(e) { showToast('Error: ' + e.message, 4000); }
|
||||
}
|
||||
|
||||
function confirmFullReset() {
|
||||
@@ -1548,6 +1597,7 @@ async function init() {
|
||||
try {
|
||||
const s = await api('GET', '/api/scan/status');
|
||||
updateScanUI(s);
|
||||
state.scanStatus = s.status;
|
||||
if (s.status === 'running') startPoller();
|
||||
} catch(e) {}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user