commit 868da9016d6f59f6a92040c59a2fc7be50eece91
Author: tocmo <tocmo@localhost>
Date:   Sat Apr 4 23:42:58 2026 -0400

    Initial implementation of duplicate finder
    
    Full project per spec: FastAPI backend, 4-method duplicate detection
    (SHA-256, phash, EXIF, filesize), Google Takeout pre-processor,
    4 scan modes, and dark-theme vanilla JS gallery frontend.
    
    Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..b9f89f0
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,15 @@
+FROM python:3.12-slim
+
+RUN apt-get update && apt-get install -y \
+    libheif-dev libjpeg-dev libpng-dev libtiff-dev libwebp-dev exiftool \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY app/ /app/
+COPY templates/ /app/templates/
+RUN mkdir -p /data /photos /app/static
+
+EXPOSE 8000
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "1"]
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..deb128d
--- /dev/null
+++ b/README.md
@@ -0,0 +1,56 @@
+# Duplicate Finder
+
+A self-hosted Docker web app that scans a photo/video library, detects duplicates using four methods, and lets you review them in a gallery UI. **No files are ever moved, renamed, or deleted** — all decisions are recorded in SQLite only.
+
+## Quick start
+
+```bash
+# 1. Edit docker-compose.yml — set your photos volume path
+# 2. Build and run
+docker compose up -d --build
+# 3. Open http://localhost:8765
+# 4. Enter folder path in UI and click Scan
+```
+
+## Volume mounts
+
+| Container path | Purpose |
+|---|---|
+| `/photos` | Your photo library — mounted **read-only** |
+| `/data` | SQLite database persistence |
+
+Edit `docker-compose.yml` to point these at your NAS paths.
+
+## Detection methods
+
+| Method | Color | Description |
+|---|---|---|
+| SHA-256 | Blue | Byte-identical files |
+| Perceptual hash | Purple | Visually similar photos (hamming ≤ 10) |
+| EXIF timestamp + device | Amber | Same camera, same moment |
+| File size + dimensions | Gray | Same size and resolution (low confidence) |
+
+## Scan modes
+
+| Mode | Description |
+|---|---|
+| Incremental | Only re-hashes changed/new files. Prior decisions preserved. |
+| New files only | Indexes newly added files. Existing decisions untouched. |
+| Rebuild groups | Re-runs detection on existing index. No re-hashing. |
+| Full reset | Wipes everything and scans from scratch. |
+
+## Google Takeout
+
+The scanner automatically detects Google Takeout folder structures and reads `.json` sidecar files to restore correct capture timestamps and original filenames. Takeout files are flagged in the UI.
+
+## What "redundant" means
+
+Marking a file redundant **only writes to the database**. Nothing is moved, renamed, or deleted. This tool produces a decision record only. A separate tool handles file actions.
+
+## Tech stack
+
+- Python 3.12, FastAPI, Uvicorn
+- SQLite (stdlib `sqlite3`)
+- Pillow, imagehash, pillow-heif
+- Vanilla JS single-page frontend
+- Docker / docker-compose
diff --git a/app/main.py b/app/main.py
new file mode 100644
index 0000000..9a74274
--- /dev/null
+++ b/app/main.py
@@ -0,0 +1,599 @@
+"""
+FastAPI application — all API routes for the duplicate finder.
+"""
+
+import csv
+import io
+import os
+import sqlite3
+import subprocess
+import threading
+from pathlib import Path
+from typing import Optional
+
+from fastapi import FastAPI, HTTPException, Query, Request
+from fastapi.responses import (
+    FileResponse, JSONResponse, Response, StreamingResponse
+)
+from fastapi.staticfiles import StaticFiles
+from fastapi.templating import Jinja2Templates
+from pydantic import BaseModel
+
+import scanner as sc
+
+app = FastAPI(title="Duplicate Finder")
+templates = Jinja2Templates(directory="/app/templates")
+
+app.mount("/static", StaticFiles(directory="/app/static"), name="static")
+
+METHOD_META = {
+    "sha256":   {"color": "#378ADD", "label": "Exact copy"},
+    "phash":    {"color": "#9b7de8", "label": "Visual match"},
+    "exif":     {"color": "#e2a43a", "label": "Same moment"},
+    "filesize": {"color": "#888780", "label": "Possible match"},
+}
+
+# ── Startup ───────────────────────────────────────────────────────────────────
+
+@app.on_event("startup")
+def startup():
+    sc.init_db()
+
+
+# ── Frontend ──────────────────────────────────────────────────────────────────
+
+@app.get("/")
+def index(request: Request):
+    return templates.TemplateResponse("index.html", {"request": request})
+
+
+# ── DB helper ─────────────────────────────────────────────────────────────────
+
+def get_db() -> sqlite3.Connection:
+    return sc.get_db()
+
+
+# ── Scan management ───────────────────────────────────────────────────────────
+
+class ScanStartBody(BaseModel):
+    folder_path: str
+    mode: str = "incremental"
+
+
+@app.post("/api/scan/start")
+def scan_start(body: ScanStartBody):
+    if sc.scan_state["status"] == "running":
+        raise HTTPException(400, "A scan is already running")
+
+    mode = body.mode
+    if mode not in ("incremental", "full_reset", "new_files", "regroup"):
+        raise HTTPException(400, f"Unknown scan mode: {mode}")
+
+    if mode == "full_reset":
+        con = get_db()
+        cur = con.cursor()
+        cur.execute("DELETE FROM duplicate_members")
+        cur.execute("DELETE FROM duplicate_groups")
+        cur.execute("DELETE FROM files")
+        cur.execute("DELETE FROM scans")
+        con.commit()
+        con.close()
+
+    con = get_db()
+    cur = con.cursor()
+    cur.execute(
+        "INSERT INTO scans (folder_path, status) VALUES (?, 'running')",
+        (body.folder_path,),
+    )
+    scan_id = cur.lastrowid
+    con.commit()
+    con.close()
+
+    sc.scan_state.update(
+        scan_id=scan_id,
+        status="running",
+        phase="discovery",
+        progress=0,
+        total=0,
+        message="Starting...",
+        cancel_requested=False,
+        stats={},
+    )
+
+    thread = threading.Thread(
+        target=sc.run_scan,
+        args=(body.folder_path, scan_id, mode),
+        daemon=True,
+    )
+    thread.start()
+
+    return {"scan_id": scan_id}
+
+
+@app.get("/api/scan/status")
+def scan_status():
+    state = sc.scan_state
+    con = get_db()
+    cur = con.cursor()
+
+    # Build group counts per method
+    stats = {}
+    for method in ("sha256", "phash", "exif", "filesize"):
+        cur.execute("SELECT COUNT(*) FROM duplicate_groups WHERE method=?", (method,))
+        stats[f"{method}_groups"] = cur.fetchone()[0]
+
+    cur.execute("SELECT COUNT(*) FROM duplicate_groups")
+    stats["total_groups"] = cur.fetchone()[0]
+    cur.execute("SELECT COUNT(*) FROM duplicate_groups WHERE reviewed=1")
+    stats["reviewed"] = cur.fetchone()[0]
+    cur.execute("SELECT COUNT(*) FROM duplicate_groups WHERE reviewed=0")
+    stats["pending"] = cur.fetchone()[0]
+    cur.execute("SELECT COUNT(*) FROM files WHERE status != 'error'")
+    stats["total_files"] = cur.fetchone()[0]
+    con.close()
+
+    return {
+        "scan_id": state["scan_id"],
+        "status": state["status"],
+        "phase": state["phase"],
+        "progress": state["progress"],
+        "total": state["total"],
+        "message": state["message"],
+        "stats": stats,
+    }
+
+
+@app.post("/api/scan/cancel")
+def scan_cancel():
+    if sc.scan_state["status"] != "running":
+        raise HTTPException(400, "No scan is currently running")
+    sc.scan_state["cancel_requested"] = True
+    return {"success": True}
+
+
+@app.delete("/api/scan/reset")
+def scan_reset(confirm: str = Query("")):
+    if confirm != "RESET":
+        raise HTTPException(400, "Pass ?confirm=RESET to confirm")
+    con = get_db()
+    cur = con.cursor()
+    cur.execute("DELETE FROM duplicate_members")
+    cur.execute("DELETE FROM duplicate_groups")
+    cur.execute("DELETE FROM files")
+    cur.execute("DELETE FROM scans")
+    con.commit()
+    con.close()
+    sc.scan_state.update(
+        scan_id=None, status="idle", phase="idle",
+        progress=0, total=0, message="", stats={},
+    )
+    return {"success": True}
+
+
+# ── Duplicate groups ──────────────────────────────────────────────────────────
+
+@app.get("/api/groups")
+def list_groups(
+    method: str = "all",
+    reviewed: str = "false",
+    sort: str = "count",
+    offset: int = 0,
+    limit: int = 50,
+):
+    con = get_db()
+    cur = con.cursor()
+
+    where = []
+    params: list = []
+
+    if method != "all":
+        where.append("dg.method = ?")
+        params.append(method)
+
+    if reviewed == "false":
+        where.append("dg.reviewed = 0")
+    elif reviewed == "true":
+        where.append("dg.reviewed = 1")
+
+    where_clause = ("WHERE " + " AND ".join(where)) if where else ""
+
+    order = {
+        "count": "member_count DESC",
+        "method": "dg.method, member_count DESC",
+        "date": "dg.created_at DESC",
+    }.get(sort, "member_count DESC")
+
+    cur.execute(f"""
+        SELECT COUNT(*) FROM duplicate_groups dg {where_clause}
+    """, params)
+    total = cur.fetchone()[0]
+
+    cur.execute(f"""
+        SELECT dg.id, dg.method, dg.reviewed,
+               COUNT(dm.id) as member_count,
+               (SELECT dm2.file_id FROM duplicate_members dm2
+                WHERE dm2.group_id = dg.id AND dm2.suggested = 1
+                LIMIT 1) as suggested_file_id
+        FROM duplicate_groups dg
+        LEFT JOIN duplicate_members dm ON dm.group_id = dg.id
+        {where_clause}
+        GROUP BY dg.id
+        ORDER BY {order}
+        LIMIT ? OFFSET ?
+    """, params + [limit, offset])
+
+    groups = []
+    for row in cur.fetchall():
+        meta = METHOD_META.get(row["method"], {"color": "#888", "label": row["method"]})
+        suggested = None
+        if row["suggested_file_id"]:
+            cur.execute(
+                "SELECT id, filename, width, height FROM files WHERE id=?",
+                (row["suggested_file_id"],),
+            )
+            f = cur.fetchone()
+            if f:
+                suggested = {
+                    "file_id": f["id"],
+                    "filename": f["filename"],
+                    "width": f["width"],
+                    "height": f["height"],
+                    "thumb_url": f"/api/thumb/{f['id']}",
+                }
+        groups.append({
+            "id": row["id"],
+            "method": row["method"],
+            "method_color": meta["color"],
+            "method_label": meta["label"],
+            "member_count": row["member_count"],
+            "reviewed": bool(row["reviewed"]),
+            "suggested_keeper": suggested,
+        })
+
+    con.close()
+    return {"total": total, "groups": groups}
+
+
+@app.get("/api/groups/{group_id}")
+def get_group(group_id: int):
+    con = get_db()
+    cur = con.cursor()
+
+    cur.execute("SELECT * FROM duplicate_groups WHERE id=?", (group_id,))
+    grp = cur.fetchone()
+    if not grp:
+        raise HTTPException(404, "Group not found")
+
+    meta = METHOD_META.get(grp["method"], {"color": "#888", "label": grp["method"]})
+
+    cur.execute("""
+        SELECT f.id, f.filename, f.path, f.file_size, f.width, f.height,
+               f.mime_type, f.exif_datetime, f.exif_device,
+               f.is_takeout, f.is_edited,
+               dm.is_keeper, dm.suggested
+        FROM duplicate_members dm
+        JOIN files f ON f.id = dm.file_id
+        WHERE dm.group_id = ?
+        ORDER BY dm.suggested DESC, f.width * f.height DESC
+    """, (group_id,))
+
+    members = []
+    for r in cur.fetchall():
+        members.append({
+            "file_id": r["id"],
+            "filename": r["filename"],
+            "path": r["path"],
+            "file_size": r["file_size"],
+            "width": r["width"],
+            "height": r["height"],
+            "mime_type": r["mime_type"],
+            "exif_datetime": r["exif_datetime"],
+            "exif_device": r["exif_device"],
+            "is_takeout": bool(r["is_takeout"]),
+            "is_edited": bool(r["is_edited"]) if r["is_edited"] is not None else False,
+            "is_suggested": bool(r["suggested"]),
+            "is_keeper": bool(r["is_keeper"]),
+            "thumb_url": f"/api/thumb/{r['id']}",
+        })
+
+    con.close()
+    return {
+        "id": grp["id"],
+        "method": grp["method"],
+        "method_color": meta["color"],
+        "method_label": meta["label"],
+        "method_value": grp["method_value"],
+        "reviewed": bool(grp["reviewed"]),
+        "members": members,
+    }
+
+
+# ── Decisions ─────────────────────────────────────────────────────────────────
+
+class DecideBody(BaseModel):
+    keeper_file_id: int
+
+
+@app.post("/api/groups/{group_id}/decide")
+def decide(group_id: int, body: DecideBody):
+    con = get_db()
+    cur = con.cursor()
+
+    cur.execute("SELECT id FROM duplicate_groups WHERE id=?", (group_id,))
+    if not cur.fetchone():
+        raise HTTPException(404, "Group not found")
+
+    cur.execute("SELECT file_id FROM duplicate_members WHERE group_id=?", (group_id,))
+    all_members = [r["file_id"] for r in cur.fetchall()]
+
+    if body.keeper_file_id not in all_members:
+        raise HTTPException(400, "keeper_file_id is not a member of this group")
+
+    for fid in all_members:
+        is_k = 1 if fid == body.keeper_file_id else 0
+        cur.execute(
+            "UPDATE duplicate_members SET is_keeper=? WHERE group_id=? AND file_id=?",
+            (is_k, group_id, fid),
+        )
+        status = "keeper" if is_k else "redundant"
+        cur.execute("UPDATE files SET status=? WHERE id=?", (status, fid))
+
+    cur.execute("UPDATE duplicate_groups SET reviewed=1 WHERE id=?", (group_id,))
+    con.commit()
+    con.close()
+    return {"success": True}
+
+
+@app.post("/api/groups/{group_id}/skip")
+def skip_group(group_id: int):
+    con = get_db()
+    cur = con.cursor()
+    cur.execute("SELECT id FROM duplicate_groups WHERE id=?", (group_id,))
+    if not cur.fetchone():
+        raise HTTPException(404, "Group not found")
+    cur.execute("UPDATE duplicate_groups SET reviewed=1 WHERE id=?", (group_id,))
+    con.commit()
+    con.close()
+    return {"success": True}
+
+
+@app.post("/api/groups/{group_id}/keep-all")
+def keep_all(group_id: int):
+    con = get_db()
+    cur = con.cursor()
+    cur.execute("SELECT id FROM duplicate_groups WHERE id=?", (group_id,))
+    if not cur.fetchone():
+        raise HTTPException(404, "Group not found")
+    cur.execute("SELECT file_id FROM duplicate_members WHERE group_id=?", (group_id,))
+    for r in cur.fetchall():
+        cur.execute(
+            "UPDATE duplicate_members SET is_keeper=1 WHERE group_id=? AND file_id=?",
+            (group_id, r["file_id"]),
+        )
+        cur.execute("UPDATE files SET status='keeper' WHERE id=?", (r["file_id"],))
+    cur.execute("UPDATE duplicate_groups SET reviewed=1 WHERE id=?", (group_id,))
+    con.commit()
+    con.close()
+    return {"success": True}
+
+
+@app.post("/api/groups/{group_id}/unreview")
+def unreview_group(group_id: int):
+    con = get_db()
+    cur = con.cursor()
+    cur.execute("SELECT id FROM duplicate_groups WHERE id=?", (group_id,))
+    if not cur.fetchone():
+        raise HTTPException(404, "Group not found")
+    cur.execute("SELECT file_id FROM duplicate_members WHERE group_id=?", (group_id,))
+    for r in cur.fetchall():
+        cur.execute(
+            "UPDATE duplicate_members SET is_keeper=0 WHERE group_id=? AND file_id=?",
+            (group_id, r["file_id"]),
+        )
+        cur.execute("UPDATE files SET status='pending' WHERE id=?", (r["file_id"],))
+    cur.execute("UPDATE duplicate_groups SET reviewed=0 WHERE id=?", (group_id,))
+    con.commit()
+    con.close()
+    return {"success": True}
+
+
+@app.post("/api/groups/auto-resolve-exact")
+def auto_resolve_exact():
+    con = get_db()
+    cur = con.cursor()
+    cur.execute("""
+        SELECT id FROM duplicate_groups
+        WHERE method='sha256' AND reviewed=0
+    """)
+    groups = [r["id"] for r in cur.fetchall()]
+    resolved = 0
+
+    for gid in groups:
+        cur.execute("""
+            SELECT f.id, f.width, f.height, f.file_size, f.exif_datetime
+            FROM duplicate_members dm
+            JOIN files f ON f.id = dm.file_id
+            WHERE dm.group_id = ?
+        """, (gid,))
+        members = [dict(r) for r in cur.fetchall()]
+        if not members:
+            continue
+
+        keeper_id = sc._suggested_keeper_by_resolution(members)
+        for m in members:
+            is_k = 1 if m["id"] == keeper_id else 0
+            cur.execute(
+                "UPDATE duplicate_members SET is_keeper=? WHERE group_id=? AND file_id=?",
+                (is_k, gid, m["id"]),
+            )
+            cur.execute(
+                "UPDATE files SET status=? WHERE id=?",
+                ("keeper" if is_k else "redundant", m["id"]),
+            )
+        cur.execute("UPDATE duplicate_groups SET reviewed=1 WHERE id=?", (gid,))
+        resolved += 1
+
+    con.commit()
+    con.close()
+    return {"resolved": resolved}
+
+
+# ── Files + thumbnails ────────────────────────────────────────────────────────
+
+VIDEO_PLACEHOLDER_SVG = """<svg xmlns="http://www.w3.org/2000/svg" width="200" height="200"
+  viewBox="0 0 200 200">
+  <rect width="200" height="200" fill="#1e1e2e"/>
+  <polygon points="75,55 75,145 145,100" fill="#9b7de8"/>
+</svg>"""
+
+VIDEO_EXT = {".mp4", ".mov", ".avi", ".mkv", ".m4v", ".3gp", ".wmv", ".mts", ".m2ts"}
+
+
+@app.get("/api/thumb/{file_id}")
+def get_thumb(file_id: int):
+    con = get_db()
+    cur = con.cursor()
+    cur.execute("SELECT path, mime_type, extension FROM files WHERE id=?", (file_id,))
+    row = cur.fetchone()
+    con.close()
+
+    if not row:
+        raise HTTPException(404, "File not found")
+
+    path = row["path"]
+    ext = (row["extension"] or "").lower()
+
+    if not os.path.isfile(path):
+        raise HTTPException(404, "File not on disk")
+
+    if ext in VIDEO_EXT:
+        # Try ffmpeg for first frame
+        try:
+            result = subprocess.run(
+                [
+                    "ffmpeg", "-i", path,
+                    "-vframes", "1", "-f", "image2", "-vcodec", "mjpeg",
+                    "pipe:1",
+                ],
+                capture_output=True, timeout=10,
+            )
+            if result.returncode == 0 and result.stdout:
+                return Response(content=result.stdout, media_type="image/jpeg")
+        except Exception:
+            pass
+        return Response(content=VIDEO_PLACEHOLDER_SVG, media_type="image/svg+xml")
+
+    # Serve photo directly
+    mime = row["mime_type"] or "application/octet-stream"
+    return FileResponse(path, media_type=mime)
+
+
+@app.get("/api/files/{file_id}")
+def get_file_meta(file_id: int):
+    con = get_db()
+    cur = con.cursor()
+    cur.execute("SELECT * FROM files WHERE id=?", (file_id,))
+    row = cur.fetchone()
+    con.close()
+    if not row:
+        raise HTTPException(404, "File not found")
+    return dict(row)
+
+
+# ── Stats ─────────────────────────────────────────────────────────────────────
+
+@app.get("/api/stats")
+def get_stats():
+    con = get_db()
+    cur = con.cursor()
+
+    cur.execute("SELECT COUNT(*), SUM(file_size) FROM files WHERE status != 'error'")
+    r = cur.fetchone()
+    total_files = r[0] or 0
+    total_size = r[1] or 0
+
+    cur.execute("""
+        SELECT COUNT(*), SUM(f.file_size)
+        FROM files f
+        JOIN duplicate_members dm ON dm.file_id = f.id
+        WHERE dm.is_keeper = 0
+    """)
+    r = cur.fetchone()
+    dup_files = r[0] or 0
+    dup_size = r[1] or 0
+
+    by_method = {}
+    for method in ("sha256", "phash", "exif", "filesize"):
+        cur.execute("SELECT COUNT(*) FROM duplicate_groups WHERE method=?", (method,))
+        groups = cur.fetchone()[0]
+        cur.execute("""
+            SELECT COUNT(*) FROM duplicate_members dm
+            JOIN duplicate_groups dg ON dg.id = dm.group_id
+            WHERE dg.method = ?
+        """, (method,))
+        files = cur.fetchone()[0]
+        by_method[method] = {"groups": groups, "files": files}
+
+    cur.execute("SELECT COUNT(*) FROM duplicate_groups WHERE reviewed=1")
+    reviewed = cur.fetchone()[0]
+    cur.execute("SELECT COUNT(*) FROM duplicate_groups WHERE reviewed=0")
+    pending = cur.fetchone()[0]
+
+    cur.execute("SELECT COUNT(*) FROM files WHERE is_takeout=1")
+    takeout_files = cur.fetchone()[0]
+
+    con.close()
+    return {
+        "total_files": total_files,
+        "total_size_bytes": total_size,
+        "duplicate_files": dup_files,
+        "duplicate_size_bytes": dup_size,
+        "groups_by_method": by_method,
+        "reviewed": reviewed,
+        "pending": pending,
+        "takeout_files": takeout_files,
+    }
+
+
+# ── Export ────────────────────────────────────────────────────────────────────
+
+@app.get("/api/export/csv")
+def export_csv():
+    con = get_db()
+    cur = con.cursor()
+    cur.execute("""
+        SELECT dg.id as group_id, dg.method, f.id as file_id,
+               f.path, f.filename, f.file_size,
+               f.width, f.height, f.exif_datetime, f.exif_device,
+               dm.is_keeper,
+               CASE WHEN dm.is_keeper=0 AND dg.reviewed=1 THEN 1 ELSE 0 END as is_redundant,
+               dg.reviewed
+        FROM duplicate_groups dg
+        JOIN duplicate_members dm ON dm.group_id = dg.id
+        JOIN files f ON f.id = dm.file_id
+        ORDER BY dg.id, dm.is_keeper DESC
+    """)
+    rows = cur.fetchall()
+    con.close()
+
+    output = io.StringIO()
+    writer = csv.writer(output)
+    writer.writerow([
+        "group_id", "method", "file_id", "path", "filename",
+        "size", "width", "height", "exif_date", "device",
+        "is_keeper", "is_redundant", "reviewed",
+    ])
+    for r in rows:
+        writer.writerow([
+            r["group_id"], r["method"], r["file_id"],
+            r["path"], r["filename"], r["file_size"],
+            r["width"], r["height"], r["exif_datetime"], r["exif_device"],
+            r["is_keeper"], r["is_redundant"], r["reviewed"],
+        ])
+
+    output.seek(0)
+    return StreamingResponse(
+        iter([output.getvalue()]),
+        media_type="text/csv",
+        headers={"Content-Disposition": "attachment; filename=dup-finder-export.csv"},
+    )
diff --git a/app/scanner.py b/app/scanner.py
new file mode 100644
index 0000000..108db0d
--- /dev/null
+++ b/app/scanner.py
@@ -0,0 +1,758 @@
+"""
+File scanner: discovery, per-file extraction, and all 4 duplicate detection passes.
+"""
+
+import hashlib
+import mimetypes
+import os
+import sqlite3
+import subprocess
+from datetime import datetime
+from pathlib import Path
+
+import imagehash
+from PIL import Image, ExifTags, UnidentifiedImageError
+
+try:
+    from pillow_heif import register_heif_opener
+    register_heif_opener()
+except ImportError:
+    pass
+
+from takeout import is_takeout_folder, process_takeout
+
+
+PHOTO_EXT = {
+    ".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".tif",
+    ".webp", ".heic", ".heif", ".raw", ".cr2", ".nef", ".arw",
+    ".dng", ".orf", ".rw2", ".pef", ".srw", ".x3f",
+}
+
+VIDEO_EXT = {
+    ".mp4", ".mov", ".avi", ".mkv", ".m4v", ".3gp",
+    ".wmv", ".mts", ".m2ts",
+}
+
+SUPPORTED_EXT = PHOTO_EXT | VIDEO_EXT
+
+DB_PATH = "/data/dupfinder.db"
+
+# Shared scan state (updated by background thread, read by status endpoint)
+scan_state = {
+    "scan_id": None,
+    "status": "idle",       # idle | running | complete | error | cancelled
+    "phase": "idle",        # discovery | takeout | indexing | phash | grouping | done
+    "progress": 0,
+    "total": 0,
+    "message": "",
+    "cancel_requested": False,
+    "stats": {},
+}
+
+
+# ── DB helpers ────────────────────────────────────────────────────────────────
+
+def get_db() -> sqlite3.Connection:
+    con = sqlite3.connect(DB_PATH, timeout=30)
+    con.row_factory = sqlite3.Row
+    con.execute("PRAGMA journal_mode=WAL")
+    con.execute("PRAGMA foreign_keys=ON")
+    return con
+
+
+def init_db():
+    con = get_db()
+    cur = con.cursor()
+    cur.executescript("""
+        CREATE TABLE IF NOT EXISTS files (
+            id              INTEGER PRIMARY KEY AUTOINCREMENT,
+            path            TEXT UNIQUE NOT NULL,
+            filename        TEXT NOT NULL,
+            extension       TEXT,
+            file_size       INTEGER,
+            mime_type       TEXT,
+            sha256          TEXT,
+            phash           TEXT,
+            exif_datetime   TEXT,
+            exif_device     TEXT,
+            width           INTEGER,
+            height          INTEGER,
+            is_takeout      INTEGER DEFAULT 0,
+            is_edited       INTEGER DEFAULT 0,
+            takeout_json    TEXT,
+            scan_id         INTEGER,
+            status          TEXT DEFAULT 'pending',
+            created_at      TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+            updated_at      TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+        );
+
+        CREATE TABLE IF NOT EXISTS scans (
+            id           INTEGER PRIMARY KEY AUTOINCREMENT,
+            folder_path  TEXT NOT NULL,
+            started_at   TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+            completed_at TIMESTAMP,
+            total_files  INTEGER DEFAULT 0,
+            status       TEXT DEFAULT 'running'
+        );
+
+        CREATE TABLE IF NOT EXISTS duplicate_groups (
+            id           INTEGER PRIMARY KEY AUTOINCREMENT,
+            method       TEXT NOT NULL,
+            method_value TEXT,
+            reviewed     INTEGER DEFAULT 0,
+            created_at   TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+        );
+
+        CREATE TABLE IF NOT EXISTS duplicate_members (
+            id        INTEGER PRIMARY KEY AUTOINCREMENT,
+            group_id  INTEGER REFERENCES duplicate_groups(id) ON DELETE CASCADE,
+            file_id   INTEGER REFERENCES files(id) ON DELETE CASCADE,
+            is_keeper INTEGER DEFAULT 0,
+            suggested INTEGER DEFAULT 0
+        );
+
+        CREATE INDEX IF NOT EXISTS idx_sha256   ON files(sha256);
+        CREATE INDEX IF NOT EXISTS idx_phash    ON files(phash);
+        CREATE INDEX IF NOT EXISTS idx_exif_dt  ON files(exif_datetime, exif_device);
+        CREATE INDEX IF NOT EXISTS idx_size_dim ON files(file_size, width, height);
+        CREATE INDEX IF NOT EXISTS idx_status   ON files(status);
+    """)
+    con.commit()
+    con.close()
+
+
+# ── Per-file extraction ───────────────────────────────────────────────────────
+
+def _sha256(path: str) -> str:
+    h = hashlib.sha256()
+    with open(path, "rb") as f:
+        while chunk := f.read(65536):
+            h.update(chunk)
+    return h.hexdigest()
+
+
+def _exif_data(path: str) -> tuple[str | None, str | None]:
+    """Returns (exif_datetime, exif_device) or (None, None)."""
+    try:
+        img = Image.open(path)
+        exif_raw = img._getexif()
+        if not exif_raw:
+            return None, None
+        exif = {ExifTags.TAGS.get(k, k): v for k, v in exif_raw.items()}
+        dt = exif.get("DateTimeOriginal") or exif.get("DateTime")
+        if dt:
+            try:
+                dt = datetime.strptime(dt, "%Y:%m:%d %H:%M:%S").strftime("%Y-%m-%dT%H:%M:%S")
+            except ValueError:
+                dt = None
+        make = str(exif.get("Make", "")).strip()
+        model = str(exif.get("Model", "")).strip()
+        device = (make + " " + model).strip() if (make or model) else None
+        return dt, device
+    except Exception:
+        return None, None
+
+
+def _image_dims(path: str) -> tuple[int | None, int | None]:
+    try:
+        with Image.open(path) as img:
+            return img.size  # (width, height)
+    except Exception:
+        return None, None
+
+
+def _phash(path: str) -> str | None:
+    try:
+        with Image.open(path) as img:
+            return str(imagehash.phash(img))
+    except Exception:
+        return None
+
+
+def _video_dims(path: str) -> tuple[int | None, int | None]:
+    try:
+        result = subprocess.run(
+            [
+                "ffprobe", "-v", "error",
+                "-select_streams", "v:0",
+                "-show_entries", "stream=width,height",
+                "-of", "csv=p=0",
+                path,
+            ],
+            capture_output=True, text=True, timeout=10,
+        )
+        parts = result.stdout.strip().split(",")
+        if len(parts) == 2:
+            return int(parts[0]), int(parts[1])
+    except Exception:
+        pass
+    return None, None
+
+
+def _mtime_str(path: str) -> str | None:
+    try:
+        ts = os.path.getmtime(path)
+        return datetime.fromtimestamp(ts).strftime("%Y-%m-%dT%H:%M:%S")
+    except Exception:
+        return None
+
+
+def extract_file(path: str) -> dict:
+    ext = Path(path).suffix.lower()
+    filename = Path(path).name
+    is_photo = ext in PHOTO_EXT
+    is_video = ext in VIDEO_EXT
+
+    record = {
+        "path": path,
+        "filename": filename,
+        "extension": ext,
+        "file_size": None,
+        "mime_type": None,
+        "sha256": None,
+        "phash": None,
+        "exif_datetime": None,
+        "exif_device": None,
+        "width": None,
+        "height": None,
+    }
+
+    try:
+        record["file_size"] = os.path.getsize(path)
+    except OSError:
+        pass
+
+    record["mime_type"] = mimetypes.guess_type(path)[0]
+
+    try:
+        record["sha256"] = _sha256(path)
+    except OSError:
+        pass
+
+    if is_photo:
+        w, h = _image_dims(path)
+        record["width"], record["height"] = w, h
+        dt, device = _exif_data(path)
+        record["exif_datetime"] = dt or _mtime_str(path)
+        record["exif_device"] = device
+        # phash computed in separate phase for progress reporting
+
+    elif is_video:
+        w, h = _video_dims(path)
+        record["width"], record["height"] = w, h
+        record["exif_datetime"] = _mtime_str(path)
+
+    return record
+
+
+# ── Union-Find for phash grouping ────────────────────────────────────────────
+
+class UnionFind:
+    def __init__(self):
+        self.parent: dict[int, int] = {}
+
+    def find(self, x: int) -> int:
+        if x not in self.parent:
+            self.parent[x] = x
+        if self.parent[x] != x:
+            self.parent[x] = self.find(self.parent[x])
+        return self.parent[x]
+
+    def union(self, x: int, y: int):
+        px, py = self.find(x), self.find(y)
+        if px != py:
+            self.parent[px] = py
+
+    def groups(self) -> dict[int, list[int]]:
+        from collections import defaultdict
+        result: dict[int, list[int]] = defaultdict(list)
+        for x in self.parent:
+            result[self.find(x)].append(x)
+        return {k: v for k, v in result.items() if len(v) >= 2}
+
+
+# ── Detection passes ──────────────────────────────────────────────────────────
+
+def _suggested_keeper_by_resolution(members: list[dict]) -> int:
+    """Return file_id of highest resolution member; tie-break by size then oldest date."""
+    def score(m):
+        w = m["width"] or 0
+        h = m["height"] or 0
+        size = m["file_size"] or 0
+        dt = m["exif_datetime"] or "9999"
+        return (w * h, size, dt)
+
+    best = max(members, key=lambda m: (
+        (m["width"] or 0) * (m["height"] or 0),
+        m["file_size"] or 0,
+        # older date = better; invert by negating epoch or use str comparison inverted
+    ))
+    return best["id"]
+
+
+def _suggested_keeper_oldest(members: list[dict]) -> int:
+    def key(m):
+        return m["exif_datetime"] or "9999"
+    return min(members, key=key)["id"]
+
+
+def _run_sha256_pass(con: sqlite3.Connection, scan_id: int):
+    cur = con.cursor()
+    cur.execute("""
+        SELECT sha256, COUNT(*) as cnt
+        FROM files
+        WHERE sha256 IS NOT NULL
+        GROUP BY sha256
+        HAVING cnt > 1
+    """)
+    rows = cur.fetchall()
+    for row in rows:
+        sha = row["sha256"]
+        cur.execute("""
+            SELECT id, width, height, file_size, exif_datetime
+            FROM files WHERE sha256 = ?
+        """, (sha,))
+        members = [dict(r) for r in cur.fetchall()]
+
+        keeper_id = _suggested_keeper_by_resolution(members)
+
+        cur.execute(
+            "INSERT INTO duplicate_groups (method, method_value) VALUES ('sha256', ?)",
+            (sha,),
+        )
+        group_id = cur.lastrowid
+        for m in members:
+            cur.execute(
+                "INSERT INTO duplicate_members (group_id, file_id, suggested) VALUES (?, ?, ?)",
+                (group_id, m["id"], 1 if m["id"] == keeper_id else 0),
+            )
+
+
+def _run_phash_pass(con: sqlite3.Connection, scan_id: int):
+    cur = con.cursor()
+    # Exclude files already in sha256 groups
+    cur.execute("""
+        SELECT f.id, f.phash, f.width, f.height, f.file_size, f.exif_datetime
+        FROM files f
+        WHERE f.phash IS NOT NULL
+          AND f.extension NOT IN (
+              '.mp4','.mov','.avi','.mkv','.m4v','.3gp','.wmv','.mts','.m2ts'
+          )
+          AND f.id NOT IN (
+              SELECT dm.file_id FROM duplicate_members dm
+              JOIN duplicate_groups dg ON dg.id = dm.group_id
+              WHERE dg.method = 'sha256'
+          )
+    """)
+    rows = [dict(r) for r in cur.fetchall()]
+
+    if len(rows) < 2:
+        return
+
+    # Bucket by first 2 hex chars to reduce O(n²) comparisons
+    buckets: dict[str, list[dict]] = {}
+    for r in rows:
+        key = r["phash"][:2]
+        buckets.setdefault(key, []).append(r)
+
+    uf = UnionFind()
+    # Ensure all IDs are registered
+    for r in rows:
+        uf.find(r["id"])
+
+    THRESHOLD = 10
+    for bucket in buckets.values():
+        for i in range(len(bucket)):
+            for j in range(i + 1, len(bucket)):
+                a, b = bucket[i], bucket[j]
+                try:
+                    dist = imagehash.hex_to_hash(a["phash"]) - imagehash.hex_to_hash(b["phash"])
+                    if dist <= THRESHOLD:
+                        uf.union(a["id"], b["id"])
+                except Exception:
+                    pass
+
+    id_map = {r["id"]: r for r in rows}
+    for _, member_ids in uf.groups().items():
+        members = [id_map[mid] for mid in member_ids if mid in id_map]
+        if len(members) < 2:
+            continue
+        keeper_id = _suggested_keeper_by_resolution(members)
+        keeper = id_map[keeper_id]
+        cur.execute(
+            "INSERT INTO duplicate_groups (method, method_value) VALUES ('phash', ?)",
+            (keeper["phash"],),
+        )
+        group_id = cur.lastrowid
+        for m in members:
+            cur.execute(
+                "INSERT INTO duplicate_members (group_id, file_id, suggested) VALUES (?, ?, ?)",
+                (group_id, m["id"], 1 if m["id"] == keeper_id else 0),
+            )
+
+
+def _run_exif_pass(con: sqlite3.Connection, scan_id: int):
+    cur = con.cursor()
+    cur.execute("""
+        SELECT exif_datetime, exif_device, COUNT(*) as cnt
+        FROM files
+        WHERE exif_datetime IS NOT NULL
+          AND exif_device IS NOT NULL
+          AND id NOT IN (
+              SELECT file_id FROM duplicate_members dm
+              JOIN duplicate_groups dg ON dg.id = dm.group_id
+              WHERE dg.method IN ('sha256', 'phash')
+          )
+        GROUP BY exif_datetime, exif_device
+        HAVING cnt > 1
+    """)
+    rows = cur.fetchall()
+    for row in rows:
+        dt, dev = row["exif_datetime"], row["exif_device"]
+        cur.execute("""
+            SELECT id, width, height, file_size, exif_datetime
+            FROM files
+            WHERE exif_datetime = ? AND exif_device = ?
+        """, (dt, dev))
+        members = [dict(r) for r in cur.fetchall()]
+        keeper_id = _suggested_keeper_by_resolution(members)
+        method_value = f"{dt}::{dev}"
+        cur.execute(
+            "INSERT INTO duplicate_groups (method, method_value) VALUES ('exif', ?)",
+            (method_value,),
+        )
+        group_id = cur.lastrowid
+        for m in members:
+            cur.execute(
+                "INSERT INTO duplicate_members (group_id, file_id, suggested) VALUES (?, ?, ?)",
+                (group_id, m["id"], 1 if m["id"] == keeper_id else 0),
+            )
+
+
+def _run_filesize_pass(con: sqlite3.Connection, scan_id: int):
+    cur = con.cursor()
+    cur.execute("""
+        SELECT file_size, width, height, COUNT(*) as cnt
+        FROM files
+        WHERE file_size IS NOT NULL
+          AND width IS NOT NULL
+          AND height IS NOT NULL
+          AND id NOT IN (
+              SELECT file_id FROM duplicate_members dm
+              JOIN duplicate_groups dg ON dg.id = dm.group_id
+              WHERE dg.method IN ('sha256', 'phash', 'exif')
+          )
+        GROUP BY file_size, width, height
+        HAVING cnt > 1
+    """)
+    rows = cur.fetchall()
+    for row in rows:
+        fs, w, h = row["file_size"], row["width"], row["height"]
+        cur.execute("""
+            SELECT id, width, height, file_size, exif_datetime
+            FROM files
+            WHERE file_size = ? AND width = ? AND height = ?
+        """, (fs, w, h))
+        members = [dict(r) for r in cur.fetchall()]
+        keeper_id = _suggested_keeper_oldest(members)
+        method_value = f"{fs}::{w}x{h}"
+        cur.execute(
+            "INSERT INTO duplicate_groups (method, method_value) VALUES ('filesize', ?)",
+            (method_value,),
+        )
+        group_id = cur.lastrowid
+        for m in members:
+            cur.execute(
+                "INSERT INTO duplicate_members (group_id, file_id, suggested) VALUES (?, ?, ?)",
+                (group_id, m["id"], 1 if m["id"] == keeper_id else 0),
+            )
+
+
+# ── Main scan entry point ─────────────────────────────────────────────────────
+
+def run_scan(folder_path: str, scan_id: int, mode: str = "incremental"):
+    """Main scan function — runs in background thread."""
+    global scan_state
+    con = get_db()
+    cur = con.cursor()
+
+    try:
+        # ── Phase: discovery ──────────────────────────────────────────────
+        scan_state.update(phase="discovery", progress=0, total=0,
+                          message="Discovering files...")
+
+        all_files = []
+        for root, dirs, files in os.walk(folder_path):
+            dirs[:] = [d for d in dirs if not d.startswith(".")]
+            for fname in files:
+                if fname.endswith(".json"):
+                    continue
+                ext = Path(fname).suffix.lower()
+                if ext in SUPPORTED_EXT:
+                    all_files.append(os.path.join(root, fname))
+
+        scan_state["total"] = len(all_files)
+        scan_state["message"] = f"Found {len(all_files):,} files"
+
+        if scan_state["cancel_requested"]:
+            _mark_scan(cur, scan_id, "cancelled")
+            con.commit()
+            scan_state["status"] = "cancelled"
+            return
+
+        # ── Mode: full reset ──────────────────────────────────────────────
+        if mode == "full_reset":
+            cur.execute("DELETE FROM duplicate_members")
+            cur.execute("DELETE FROM duplicate_groups")
+            cur.execute("DELETE FROM files")
+            con.commit()
+
+        # ── Phase: takeout pre-processing ─────────────────────────────────
+        scan_state.update(phase="takeout", message="Checking for Google Takeout structure...")
+        if is_takeout_folder(folder_path):
+            scan_state["message"] = "Processing Google Takeout sidecars..."
+            process_takeout(folder_path, DB_PATH)
+
+        if scan_state["cancel_requested"]:
+            _mark_scan(cur, scan_id, "cancelled")
+            con.commit()
+            scan_state["status"] = "cancelled"
+            return
+
+        # ── Phase: indexing ───────────────────────────────────────────────
+        scan_state.update(phase="indexing", progress=0,
+                          message="Indexing files (SHA-256 + EXIF + dimensions)...")
+
+        for i, path in enumerate(all_files):
+            if scan_state["cancel_requested"]:
+                _mark_scan(cur, scan_id, "cancelled")
+                con.commit()
+                scan_state["status"] = "cancelled"
+                return
+
+            scan_state["progress"] = i + 1
+            scan_state["message"] = f"Indexing: {Path(path).name}"
+
+            # Check existing record
+            cur.execute("SELECT id, file_size, updated_at FROM files WHERE path = ?", (path,))
+            existing = cur.fetchone()
+
+            try:
+                current_size = os.path.getsize(path)
+            except OSError:
+                continue
+
+            if existing and mode in ("incremental", "new_files"):
+                if mode == "new_files":
+                    # Skip entirely — don't re-hash existing files
+                    cur.execute("UPDATE files SET scan_id = ? WHERE path = ?", (scan_id, path))
+                    continue
+                # Incremental: skip if size unchanged (use size as proxy for change)
+                if existing["file_size"] == current_size:
+                    cur.execute("UPDATE files SET scan_id = ? WHERE path = ?", (scan_id, path))
+                    continue
+                # File changed — re-hash, clear group memberships
+                cur.execute(
+                    "DELETE FROM duplicate_members WHERE file_id = ?", (existing["id"],)
+                )
+
+            try:
+                record = extract_file(path)
+            except Exception as e:
+                cur.execute(
+                    "INSERT OR IGNORE INTO files (path, filename, extension, scan_id, status) "
+                    "VALUES (?, ?, ?, ?, 'error')",
+                    (path, Path(path).name, Path(path).suffix.lower(), scan_id),
+                )
+                cur.execute(
+                    "UPDATE files SET status='error', scan_id=?, updated_at=CURRENT_TIMESTAMP "
+                    "WHERE path=?",
+                    (scan_id, path),
+                )
+                con.commit()
+                continue
+
+            record["scan_id"] = scan_id
+            if existing:
+                cur.execute("""
+                    UPDATE files SET
+                        filename=:filename, extension=:extension, file_size=:file_size,
+                        mime_type=:mime_type, sha256=:sha256,
+                        exif_datetime=:exif_datetime, exif_device=:exif_device,
+                        width=:width, height=:height, scan_id=:scan_id,
+                        status='pending', updated_at=CURRENT_TIMESTAMP
+                    WHERE path=:path
+                """, record)
+            else:
+                cur.execute("""
+                    INSERT OR IGNORE INTO files
+                        (path, filename, extension, file_size, mime_type, sha256,
+                         exif_datetime, exif_device, width, height, scan_id, status)
+                    VALUES
+                        (:path, :filename, :extension, :file_size, :mime_type, :sha256,
+                         :exif_datetime, :exif_device, :width, :height, :scan_id, 'pending')
+                """, record)
+
+            if (i + 1) % 100 == 0:
+                con.commit()
+
+        con.commit()
+
+        # ── Phase: phash ──────────────────────────────────────────────────
+        scan_state.update(phase="phash", progress=0,
+                          message="Computing perceptual hashes...")
+
+        cur.execute("""
+            SELECT id, path FROM files
+            WHERE extension IN (
+                '.jpg','.jpeg','.png','.gif','.bmp','.tiff','.tif',
+                '.webp','.heic','.heif','.raw','.cr2','.nef','.arw',
+                '.dng','.orf','.rw2','.pef','.srw','.x3f'
+            ) AND phash IS NULL AND status != 'error'
+        """)
+        photo_rows = cur.fetchall()
+        scan_state["total"] = len(photo_rows)
+
+        for i, row in enumerate(photo_rows):
+            if scan_state["cancel_requested"]:
+                _mark_scan(cur, scan_id, "cancelled")
+                con.commit()
+                scan_state["status"] = "cancelled"
+                return
+
+            scan_state["progress"] = i + 1
+            scan_state["message"] = f"Phash: {Path(row['path']).name}"
+            ph = _phash(row["path"])
+            if ph:
+                cur.execute("UPDATE files SET phash=? WHERE id=?", (ph, row["id"]))
+            if (i + 1) % 200 == 0:
+                con.commit()
+
+        con.commit()
+
+        # ── Phase: grouping ───────────────────────────────────────────────
+        scan_state.update(phase="grouping", progress=0, total=4,
+                          message="Running duplicate detection...")
+
+        if mode in ("incremental", "full_reset", "regroup"):
+            cur.execute("DELETE FROM duplicate_members")
+            cur.execute("DELETE FROM duplicate_groups")
+            con.commit()
+        elif mode == "new_files":
+            # Only clear groups containing new files
+            cur.execute("""
+                DELETE FROM duplicate_groups WHERE id IN (
+                    SELECT DISTINCT dm.group_id FROM duplicate_members dm
+                    JOIN files f ON f.id = dm.file_id
+                    WHERE f.scan_id = ?
+                )
+            """, (scan_id,))
+            con.commit()
+
+        scan_state["message"] = "Pass 1/4: SHA-256 exact duplicates..."
+        _run_sha256_pass(con, scan_id)
+        scan_state["progress"] = 1
+        con.commit()
+
+        scan_state["message"] = "Pass 2/4: Perceptual hash similarity..."
+        _run_phash_pass(con, scan_id)
+        scan_state["progress"] = 2
+        con.commit()
+
+        scan_state["message"] = "Pass 3/4: EXIF timestamp + device..."
+        _run_exif_pass(con, scan_id)
+        scan_state["progress"] = 3
+        con.commit()
+
+        scan_state["message"] = "Pass 4/4: File size + dimensions..."
+        _run_filesize_pass(con, scan_id)
+        scan_state["progress"] = 4
+        con.commit()
+
+        # ── Restore keeper statuses for mode=incremental ──────────────────
+        if mode == "incremental":
+            # If a previously marked keeper no longer appears in any group, reset to pending
+            cur.execute("""
+                UPDATE files SET status='pending'
+                WHERE status='keeper'
+                  AND id NOT IN (
+                      SELECT file_id FROM duplicate_members WHERE is_keeper=1
+                  )
+            """)
+            con.commit()
+
+        # Update scan record
+        cur.execute(
+            "UPDATE scans SET completed_at=CURRENT_TIMESTAMP, total_files=?, status='complete' "
+            "WHERE id=?",
+            (len(all_files), scan_id),
+        )
+        con.commit()
+
+        scan_state.update(status="complete", phase="done",
+                          message="Scan complete.", progress=scan_state["total"])
+        _update_stats()
+
+    except Exception as e:
+        scan_state.update(status="error", message=str(e))
+        try:
+            _mark_scan(cur, scan_id, "error")
+            con.commit()
+        except Exception:
+            pass
+    finally:
+        con.close()
+
+
+def _mark_scan(cur, scan_id: int, status: str):
+    cur.execute(
+        "UPDATE scans SET completed_at=CURRENT_TIMESTAMP, status=? WHERE id=?",
+        (status, scan_id),
+    )
+
+
+def _update_stats():
+    """Refresh stats in scan_state."""
+    try:
+        con = get_db()
+        cur = con.cursor()
+        cur.execute("SELECT COUNT(*) FROM files WHERE status != 'error'")
+        total_files = cur.fetchone()[0]
+
+        cur.execute("SELECT COUNT(*), SUM(file_size) FROM files WHERE status='redundant'")
+        r = cur.fetchone()
+        dup_count = r[0] or 0
+        dup_size = r[1] or 0
+
+        for method in ("sha256", "phash", "exif", "filesize"):
+            cur.execute(
+                "SELECT COUNT(*) FROM duplicate_groups WHERE method=?", (method,)
+            )
+        cur.execute("""
+            SELECT method,
+                COUNT(*) as groups,
+                (SELECT COUNT(*) FROM duplicate_members dm2
+                 JOIN duplicate_groups dg2 ON dg2.id=dm2.group_id
+                 WHERE dg2.method=dg.method) as files
+            FROM duplicate_groups dg
+            GROUP BY method
+        """)
+        by_method = {r["method"]: {"groups": r["groups"], "files": r["files"]}
+                     for r in cur.fetchall()}
+
+        cur.execute("SELECT COUNT(*) FROM duplicate_groups WHERE reviewed=1")
+        reviewed = cur.fetchone()[0]
+        cur.execute("SELECT COUNT(*) FROM duplicate_groups WHERE reviewed=0")
+        pending = cur.fetchone()[0]
+
+        scan_state["stats"] = {
+            "total_files": total_files,
+            "duplicate_files": dup_count,
+            "duplicate_size_bytes": dup_size,
+            "groups_by_method": by_method,
+            "reviewed": reviewed,
+            "pending": pending,
+        }
+        con.close()
+    except Exception:
+        pass
diff --git a/app/takeout.py b/app/takeout.py
new file mode 100644
index 0000000..34c0c42
--- /dev/null
+++ b/app/takeout.py
@@ -0,0 +1,149 @@
+"""
+Google Takeout pre-processor.
+Detects Takeout folder structures, reads JSON sidecars, and enriches
+the files table with corrected timestamps, normalized filenames, and
+edit-version flags.
+"""
+
+import json
+import os
+import re
+import sqlite3
+from datetime import datetime, timezone
+from pathlib import Path
+
+
+# Google edit suffixes appended to filenames
+EDIT_SUFFIXES = ("-edited", "-effects", "-smile", "-mix")
+
+
+def _find_sidecar(media_path: str) -> str | None:
+    """Return path to the JSON sidecar for a media file, or None."""
+    p = Path(media_path)
+    # Try filename.ext.json first, then filename.json
+    candidates = [
+        str(p) + ".json",
+        str(p.with_suffix(".json")),
+    ]
+    for c in candidates:
+        if os.path.isfile(c):
+            return c
+    return None
+
+
+def _strip_collision_suffix(filename: str) -> str:
+    """Strip Google's (1), (2) collision suffixes from a filename."""
+    stem = Path(filename).stem
+    ext = Path(filename).suffix
+    cleaned = re.sub(r"\(\d+\)$", "", stem).rstrip()
+    return cleaned + ext
+
+
+def _is_edited(filename: str) -> bool:
+    stem = Path(filename).stem.lower()
+    return any(stem.endswith(s) for s in EDIT_SUFFIXES)
+
+
+def is_takeout_folder(folder_path: str) -> bool:
+    """
+    Heuristic: walk folder looking for .json files whose names match
+    adjacent media files. If we find at least 5 such pairs, call it Takeout.
+    """
+    count = 0
+    for root, dirs, files in os.walk(folder_path):
+        # Skip hidden dirs
+        dirs[:] = [d for d in dirs if not d.startswith(".")]
+        file_set = set(files)
+        for f in files:
+            if not f.endswith(".json"):
+                continue
+            # Check if a media file exists that this could be a sidecar for
+            base = f[:-5]  # strip .json
+            if base in file_set:
+                count += 1
+            if count >= 5:
+                return True
+    return False
+
+
+def process_takeout(folder_path: str, db_path: str) -> int:
+    """
+    Walk folder_path, find all media files with JSON sidecars,
+    and enrich their DB records. Returns count of files enriched.
+    """
+    con = sqlite3.connect(db_path)
+    con.row_factory = sqlite3.Row
+    cur = con.cursor()
+
+    enriched = 0
+
+    for root, dirs, files in os.walk(folder_path):
+        dirs[:] = [d for d in dirs if not d.startswith(".")]
+        for fname in files:
+            if fname.endswith(".json"):
+                continue
+            media_path = os.path.join(root, fname)
+            sidecar = _find_sidecar(media_path)
+            if not sidecar:
+                continue
+
+            try:
+                with open(sidecar, "r", encoding="utf-8") as f:
+                    data = json.load(f)
+            except (json.JSONDecodeError, OSError):
+                continue
+
+            # Extract fields from sidecar
+            photo_taken_ts = None
+            try:
+                ts = int(data["photoTakenTime"]["timestamp"])
+                dt = datetime.fromtimestamp(ts, tz=timezone.utc)
+                photo_taken_ts = dt.strftime("%Y-%m-%dT%H:%M:%S")
+            except (KeyError, ValueError, TypeError):
+                pass
+
+            title = data.get("title", "")
+            takeout_json_str = json.dumps(data)
+
+            # Normalized filename: use title if present, else strip suffix from fname
+            if title:
+                normalized = _strip_collision_suffix(title)
+            else:
+                normalized = _strip_collision_suffix(fname)
+
+            edited = _is_edited(fname)
+
+            # Update the DB record for this file
+            updates = {
+                "is_takeout": 1,
+                "filename": normalized,
+                "takeout_json": takeout_json_str,
+            }
+            if photo_taken_ts:
+                updates["exif_datetime"] = photo_taken_ts
+
+            set_clause = ", ".join(f"{k} = ?" for k in updates)
+            values = list(updates.values()) + [media_path]
+
+            cur.execute(
+                f"UPDATE files SET {set_clause}, updated_at = CURRENT_TIMESTAMP "
+                f"WHERE path = ?",
+                values,
+            )
+
+            # Handle edited flag — add is_edited column if needed (migration-safe)
+            if edited:
+                try:
+                    cur.execute(
+                        "UPDATE files SET is_edited = 1 WHERE path = ?",
+                        (media_path,),
+                    )
+                except sqlite3.OperationalError:
+                    pass  # column doesn't exist yet, skip
+
+            if cur.rowcount > 0:
+                enriched += 1
+
+    con.commit()
+    con.close()
+    return enriched
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..3e59faa
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,17 @@
+services:
+  dup-finder:
+    build: .
+    container_name: dup-finder
+    restart: unless-stopped
+    ports:
+      - "8765:8000"
+    volumes:
+      # Mount the photo library — READ ONLY, never modified
+      - /volume1/photos:/photos:ro
+      # Database persistence
+      - /volume1/docker/dup-finder/data:/data
+    deploy:
+      resources:
+        limits:
+          cpus: "2.0"
+          memory: 2G
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..c15bd5c
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,7 @@
+fastapi==0.115.6
+uvicorn==0.32.1
+Pillow==11.0.0
+imagehash==4.3.1
+pillow-heif==0.21.0
+jinja2==3.1.4
+aiofiles==24.1.0
diff --git a/templates/index.html b/templates/index.html
new file mode 100644
index 0000000..a085d6e
--- /dev/null
+++ b/templates/index.html
@@ -0,0 +1,1392 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<title>Duplicate Finder</title>
+<style>
+  :root {
+    --bg:        #0f0f17;
+    --surface:   #1a1a2e;
+    --surface2:  #16213e;
+    --border:    #2a2a4a;
+    --text:      #e0e0f0;
+    --text-dim:  #8888aa;
+    --accent:    #7c6cfc;
+    --accent2:   #9b7de8;
+    --danger:    #e05c5c;
+    --success:   #4caf7d;
+    --warning:   #e2a43a;
+    --c-sha256:  #378ADD;
+    --c-phash:   #9b7de8;
+    --c-exif:    #e2a43a;
+    --c-size:    #888780;
+    --radius:    8px;
+    --sidebar-w: 200px;
+  }
+  *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
+  body {
+    background: var(--bg);
+    color: var(--text);
+    font-family: 'Segoe UI', system-ui, sans-serif;
+    font-size: 14px;
+    height: 100vh;
+    display: flex;
+    flex-direction: column;
+    overflow: hidden;
+  }
+
+  /* ── Topbar ── */
+  #topbar {
+    display: flex;
+    align-items: center;
+    gap: 16px;
+    padding: 10px 20px;
+    background: var(--surface);
+    border-bottom: 1px solid var(--border);
+    flex-shrink: 0;
+    z-index: 10;
+  }
+  #topbar h1 { font-size: 16px; font-weight: 700; letter-spacing: .5px; }
+  #topbar h1 span { color: var(--accent); }
+  #scan-chip {
+    padding: 3px 10px;
+    border-radius: 99px;
+    font-size: 12px;
+    font-weight: 600;
+    background: var(--surface2);
+    border: 1px solid var(--border);
+  }
+  #scan-chip.running  { border-color: var(--accent); color: var(--accent); }
+  #scan-chip.complete { border-color: var(--success); color: var(--success); }
+  #scan-chip.error    { border-color: var(--danger);  color: var(--danger);  }
+  #scan-chip.cancelled { border-color: var(--warning); color: var(--warning); }
+  #topbar-stats { margin-left: auto; display: flex; gap: 20px; font-size: 12px; color: var(--text-dim); }
+  #topbar-stats span b { color: var(--text); }
+
+  /* ── Layout ── */
+  #layout {
+    display: flex;
+    flex: 1;
+    overflow: hidden;
+  }
+
+  /* ── Sidebar ── */
+  #sidebar {
+    width: var(--sidebar-w);
+    background: var(--surface);
+    border-right: 1px solid var(--border);
+    flex-shrink: 0;
+    display: flex;
+    flex-direction: column;
+    padding: 12px 0;
+    overflow-y: auto;
+  }
+  .nav-item {
+    display: flex;
+    align-items: center;
+    gap: 8px;
+    padding: 8px 16px;
+    cursor: pointer;
+    border-radius: 0;
+    font-size: 13px;
+    color: var(--text-dim);
+    transition: background .15s, color .15s;
+    position: relative;
+  }
+  .nav-item:hover { background: var(--surface2); color: var(--text); }
+  .nav-item.active { background: rgba(124,108,252,.15); color: var(--accent); }
+  .nav-item .dot {
+    width: 8px; height: 8px;
+    border-radius: 50%;
+    flex-shrink: 0;
+  }
+  .nav-sep { border-top: 1px solid var(--border); margin: 8px 12px; }
+  .nav-badge {
+    margin-left: auto;
+    background: var(--surface2);
+    border: 1px solid var(--border);
+    border-radius: 99px;
+    padding: 1px 7px;
+    font-size: 11px;
+    color: var(--text-dim);
+  }
+
+  /* ── Main content ── */
+  #main {
+    flex: 1;
+    overflow-y: auto;
+    padding: 24px;
+  }
+
+  /* ── View panels ── */
+  .view { display: none; }
+  .view.active { display: block; }
+
+  /* ── Cards ── */
+  .stat-cards {
+    display: grid;
+    grid-template-columns: repeat(auto-fill, minmax(180px, 1fr));
+    gap: 14px;
+    margin-bottom: 28px;
+  }
+  .stat-card {
+    background: var(--surface);
+    border: 1px solid var(--border);
+    border-radius: var(--radius);
+    padding: 16px 18px;
+  }
+  .stat-card .label { font-size: 11px; color: var(--text-dim); text-transform: uppercase; letter-spacing: .5px; }
+  .stat-card .value { font-size: 26px; font-weight: 700; margin-top: 4px; }
+  .stat-card .sub   { font-size: 12px; color: var(--text-dim); margin-top: 2px; }
+
+  /* ── Section headers ── */
+  .section-title {
+    font-size: 13px;
+    font-weight: 600;
+    color: var(--text-dim);
+    text-transform: uppercase;
+    letter-spacing: .6px;
+    margin-bottom: 12px;
+  }
+
+  /* ── Scan form ── */
+  #scan-form-area {
+    background: var(--surface);
+    border: 1px solid var(--border);
+    border-radius: var(--radius);
+    padding: 20px;
+    margin-bottom: 24px;
+  }
+  .input-row {
+    display: flex;
+    gap: 10px;
+    align-items: center;
+  }
+  input[type=text] {
+    flex: 1;
+    background: var(--bg);
+    border: 1px solid var(--border);
+    border-radius: var(--radius);
+    color: var(--text);
+    padding: 8px 12px;
+    font-size: 13px;
+    outline: none;
+    font-family: monospace;
+  }
+  input[type=text]:focus { border-color: var(--accent); }
+  button {
+    padding: 8px 16px;
+    border-radius: var(--radius);
+    border: none;
+    cursor: pointer;
+    font-size: 13px;
+    font-weight: 600;
+    transition: opacity .15s, background .15s;
+  }
+  button:hover { opacity: .85; }
+  button:disabled { opacity: .4; cursor: not-allowed; }
+  .btn-primary { background: var(--accent); color: #fff; }
+  .btn-secondary { background: var(--surface2); color: var(--text); border: 1px solid var(--border); }
+  .btn-danger { background: var(--danger); color: #fff; }
+  .btn-success { background: var(--success); color: #fff; }
+  .btn-warning { background: var(--warning); color: #000; }
+  .btn-sm { padding: 5px 12px; font-size: 12px; }
+
+  /* ── Progress ── */
+  #progress-area {
+    margin-top: 16px;
+    display: none;
+  }
+  #progress-area.show { display: block; }
+  .progress-bar-wrap {
+    background: var(--surface2);
+    border-radius: 99px;
+    height: 6px;
+    overflow: hidden;
+    margin: 8px 0;
+  }
+  .progress-bar-fill {
+    height: 100%;
+    border-radius: 99px;
+    background: var(--accent);
+    transition: width .3s;
+  }
+  .progress-msg { font-size: 12px; color: var(--text-dim); }
+  .phase-pills {
+    display: flex;
+    gap: 6px;
+    flex-wrap: wrap;
+    margin-top: 10px;
+  }
+  .phase-pill {
+    padding: 3px 10px;
+    border-radius: 99px;
+    font-size: 11px;
+    font-weight: 600;
+    background: var(--surface2);
+    border: 1px solid var(--border);
+    color: var(--text-dim);
+  }
+  .phase-pill.active { background: rgba(124,108,252,.2); border-color: var(--accent); color: var(--accent); }
+  .phase-pill.done   { background: rgba(76,175,125,.1);  border-color: var(--success); color: var(--success); }
+
+  /* ── Rescan buttons ── */
+  #rescan-area { display: none; margin-top: 16px; }
+  #rescan-area.show { display: block; }
+  .rescan-info { font-size: 12px; color: var(--text-dim); margin-bottom: 10px; }
+  .rescan-buttons {
+    display: flex;
+    gap: 8px;
+    flex-wrap: wrap;
+    align-items: flex-start;
+  }
+  .rescan-btn-group {
+    display: flex;
+    flex-direction: column;
+    gap: 4px;
+  }
+  .rescan-btn-group .desc { font-size: 11px; color: var(--text-dim); max-width: 160px; }
+
+  /* ── Gallery ── */
+  .filter-bar {
+    display: flex;
+    gap: 8px;
+    margin-bottom: 16px;
+    flex-wrap: wrap;
+    align-items: center;
+  }
+  .filter-pill {
+    padding: 5px 14px;
+    border-radius: 99px;
+    font-size: 12px;
+    font-weight: 600;
+    background: var(--surface);
+    border: 1px solid var(--border);
+    color: var(--text-dim);
+    cursor: pointer;
+    transition: all .15s;
+  }
+  .filter-pill:hover { border-color: var(--text-dim); color: var(--text); }
+  .filter-pill.active { background: rgba(124,108,252,.2); border-color: var(--accent); color: var(--accent); }
+  .filter-pill .dot { display: inline-block; width: 7px; height: 7px; border-radius: 50%; margin-right: 5px; }
+
+  #auto-resolve-btn {
+    margin-left: auto;
+  }
+
+  .gallery-grid {
+    display: grid;
+    grid-template-columns: repeat(auto-fill, minmax(180px, 1fr));
+    gap: 12px;
+  }
+  .gallery-cell {
+    background: var(--surface);
+    border: 1px solid var(--border);
+    border-radius: var(--radius);
+    overflow: hidden;
+    cursor: pointer;
+    position: relative;
+    aspect-ratio: 1;
+    transition: border-color .15s, transform .1s;
+  }
+  .gallery-cell:hover { border-color: var(--accent); transform: scale(1.01); }
+  .gallery-cell.reviewed { opacity: .55; }
+  .gallery-cell.active { border-color: var(--accent); box-shadow: 0 0 0 2px var(--accent); }
+
+  .gallery-cell img, .gallery-cell .vid-placeholder {
+    width: 100%; height: 100%;
+    object-fit: cover;
+    display: block;
+  }
+  .vid-placeholder {
+    background: var(--surface2);
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    color: var(--accent2);
+    font-size: 40px;
+  }
+
+  .cell-badge-method {
+    position: absolute;
+    top: 6px; left: 6px;
+    display: flex;
+    align-items: center;
+    gap: 4px;
+    background: rgba(0,0,0,.75);
+    border-radius: 99px;
+    padding: 2px 7px;
+    font-size: 10px;
+    font-weight: 700;
+    color: #fff;
+  }
+  .cell-badge-method .dot { width: 6px; height: 6px; border-radius: 50%; }
+
+  .cell-badge-count {
+    position: absolute;
+    top: 6px; right: 6px;
+    background: rgba(0,0,0,.8);
+    border-radius: 99px;
+    padding: 2px 8px;
+    font-size: 11px;
+    font-weight: 700;
+    color: #fff;
+  }
+
+  .cell-hover-info {
+    position: absolute;
+    bottom: 0; left: 0; right: 0;
+    background: linear-gradient(transparent, rgba(0,0,0,.85));
+    padding: 20px 8px 8px;
+    opacity: 0;
+    transition: opacity .15s;
+    font-size: 11px;
+  }
+  .gallery-cell:hover .cell-hover-info { opacity: 1; }
+  .cell-hover-info .fname { font-weight: 600; word-break: break-all; }
+  .cell-hover-info .res   { color: var(--text-dim); }
+
+  .cell-check {
+    position: absolute;
+    bottom: 6px; right: 6px;
+    background: var(--success);
+    color: #fff;
+    border-radius: 50%;
+    width: 20px; height: 20px;
+    display: none;
+    align-items: center;
+    justify-content: center;
+    font-size: 12px;
+  }
+  .gallery-cell.reviewed .cell-check { display: flex; }
+
+  /* ── Load more ── */
+  #load-more-wrap { text-align: center; margin-top: 20px; }
+
+  /* ── Detail panel ── */
+  #detail-panel {
+    background: var(--surface);
+    border: 1px solid var(--border);
+    border-radius: var(--radius);
+    margin-top: 12px;
+    padding: 20px;
+    display: none;
+  }
+  #detail-panel.show { display: block; }
+  .detail-header {
+    display: flex;
+    align-items: center;
+    gap: 12px;
+    margin-bottom: 16px;
+  }
+  .detail-header h3 { font-size: 15px; font-weight: 700; flex: 1; }
+  .detail-header .method-badge {
+    padding: 3px 10px;
+    border-radius: 99px;
+    font-size: 11px;
+    font-weight: 700;
+    color: #fff;
+  }
+  #detail-close {
+    background: none;
+    border: 1px solid var(--border);
+    color: var(--text-dim);
+    padding: 4px 10px;
+    border-radius: var(--radius);
+    font-size: 13px;
+  }
+
+  .copy-cards {
+    display: flex;
+    gap: 14px;
+    overflow-x: auto;
+    padding-bottom: 8px;
+  }
+  .copy-card {
+    background: var(--surface2);
+    border: 2px solid var(--border);
+    border-radius: var(--radius);
+    min-width: 200px;
+    max-width: 220px;
+    flex-shrink: 0;
+    overflow: hidden;
+    transition: border-color .15s;
+  }
+  .copy-card.suggested { border-color: var(--success); }
+  .copy-card.selected  { border-color: var(--accent); }
+  .copy-card.dimmed    { opacity: .4; }
+
+  .copy-card-thumb {
+    width: 100%;
+    aspect-ratio: 1;
+    object-fit: contain;
+    background: #111;
+    display: block;
+  }
+  .copy-card-body { padding: 10px; }
+  .copy-card-badges { display: flex; gap: 4px; flex-wrap: wrap; margin-bottom: 6px; }
+  .badge {
+    padding: 2px 7px;
+    border-radius: 99px;
+    font-size: 10px;
+    font-weight: 700;
+  }
+  .badge-suggested { background: rgba(76,175,125,.2); color: var(--success); border: 1px solid var(--success); }
+  .badge-selected  { background: rgba(124,108,252,.2); color: var(--accent); border: 1px solid var(--accent); }
+  .badge-takeout   { background: rgba(226,164,58,.2);  color: var(--warning); border: 1px solid var(--warning); }
+  .badge-edited    { background: rgba(224,92,92,.2);   color: var(--danger); border: 1px solid var(--danger); }
+
+  .meta-row {
+    display: flex;
+    justify-content: space-between;
+    font-size: 11px;
+    margin-bottom: 3px;
+    gap: 6px;
+  }
+  .meta-row .mk { color: var(--text-dim); flex-shrink: 0; }
+  .meta-row .mv { font-family: monospace; text-align: right; word-break: break-all; }
+  .meta-path {
+    font-family: monospace;
+    font-size: 10px;
+    color: var(--text-dim);
+    word-break: break-all;
+    margin-top: 6px;
+    border-top: 1px solid var(--border);
+    padding-top: 6px;
+  }
+  .keep-btn {
+    width: 100%;
+    margin-top: 8px;
+    background: var(--surface);
+    color: var(--text);
+    border: 1px solid var(--border);
+    padding: 6px;
+    font-size: 12px;
+    border-radius: var(--radius);
+  }
+  .keep-btn:hover { background: var(--accent); border-color: var(--accent); color: #fff; }
+  .copy-card.selected .keep-btn { background: var(--accent); border-color: var(--accent); color: #fff; }
+
+  .detail-footer {
+    display: flex;
+    align-items: center;
+    gap: 10px;
+    margin-top: 16px;
+    flex-wrap: wrap;
+  }
+  .recoverable-info { margin-left: auto; font-size: 12px; color: var(--text-dim); }
+
+  /* ── Reviewed view ── */
+  .reviewed-group-row {
+    background: var(--surface);
+    border: 1px solid var(--border);
+    border-radius: var(--radius);
+    padding: 12px 16px;
+    margin-bottom: 8px;
+    display: flex;
+    align-items: center;
+    gap: 12px;
+  }
+  .reviewed-group-row .thumb-sm {
+    width: 48px; height: 48px;
+    object-fit: cover;
+    border-radius: 4px;
+    flex-shrink: 0;
+  }
+  .reviewed-group-row .info { flex: 1; min-width: 0; }
+  .reviewed-group-row .info .fname { font-weight: 600; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }
+  .reviewed-group-row .info .sub { font-size: 11px; color: var(--text-dim); }
+
+  /* ── Export view ── */
+  #export-view table {
+    width: 100%;
+    border-collapse: collapse;
+    font-size: 12px;
+  }
+  #export-view th, #export-view td {
+    border: 1px solid var(--border);
+    padding: 6px 10px;
+    text-align: left;
+  }
+  #export-view th { background: var(--surface2); font-weight: 600; color: var(--text-dim); }
+  #export-view tr:hover td { background: rgba(255,255,255,.02); }
+
+  /* ── Confirm dialog ── */
+  #confirm-overlay {
+    position: fixed; inset: 0;
+    background: rgba(0,0,0,.7);
+    display: none;
+    align-items: center;
+    justify-content: center;
+    z-index: 100;
+  }
+  #confirm-overlay.show { display: flex; }
+  #confirm-box {
+    background: var(--surface);
+    border: 1px solid var(--border);
+    border-radius: var(--radius);
+    padding: 28px;
+    max-width: 420px;
+    width: 90%;
+  }
+  #confirm-box h3 { margin-bottom: 10px; }
+  #confirm-box p  { font-size: 13px; color: var(--text-dim); margin-bottom: 16px; }
+  #confirm-input { width: 100%; margin-bottom: 14px; }
+
+  /* ── Toast ── */
+  #toast {
+    position: fixed;
+    bottom: 24px; right: 24px;
+    background: var(--surface);
+    border: 1px solid var(--border);
+    border-radius: var(--radius);
+    padding: 10px 18px;
+    font-size: 13px;
+    opacity: 0;
+    transition: opacity .2s;
+    z-index: 200;
+    pointer-events: none;
+  }
+  #toast.show { opacity: 1; }
+
+  /* ── Empty state ── */
+  .empty-state {
+    text-align: center;
+    padding: 60px 20px;
+    color: var(--text-dim);
+  }
+  .empty-state .icon { font-size: 48px; margin-bottom: 12px; }
+  .empty-state h3 { color: var(--text); margin-bottom: 6px; }
+
+  /* ── Utils ── */
+  .flex { display: flex; }
+  .gap8 { gap: 8px; }
+  .mt8  { margin-top: 8px; }
+  .mt16 { margin-top: 16px; }
+  .text-dim { color: var(--text-dim); }
+  .text-success { color: var(--success); }
+  .text-danger  { color: var(--danger); }
+  .sr-only { position: absolute; width: 1px; height: 1px; overflow: hidden; clip: rect(0,0,0,0); }
+
+  /* scrollbar */
+  ::-webkit-scrollbar { width: 6px; height: 6px; }
+  ::-webkit-scrollbar-track { background: transparent; }
+  ::-webkit-scrollbar-thumb { background: var(--border); border-radius: 3px; }
+</style>
+</head>
+<body>
+
+<!-- Topbar -->
+<div id="topbar">
+  <h1>Dup<span>Finder</span></h1>
+  <div id="scan-chip">idle</div>
+  <div id="topbar-stats">
+    <span><b id="ts-files">—</b> files</span>
+    <span><b id="ts-groups">—</b> groups</span>
+    <span><b id="ts-pending">—</b> pending</span>
+    <span><b id="ts-size">—</b> recoverable</span>
+  </div>
+</div>
+
+<!-- Layout -->
+<div id="layout">
+
+  <!-- Sidebar -->
+  <nav id="sidebar">
+    <div class="nav-item active" data-view="dashboard">
+      <span>&#9000;</span> Dashboard
+    </div>
+    <div class="nav-sep"></div>
+    <div class="nav-item" data-view="gallery" data-method="all">
+      <span class="dot" style="background:var(--accent)"></span> All Groups
+      <span class="nav-badge" id="nb-all">0</span>
+    </div>
+    <div class="nav-item" data-view="gallery" data-method="sha256">
+      <span class="dot" style="background:var(--c-sha256)"></span> Exact
+      <span class="nav-badge" id="nb-sha256">0</span>
+    </div>
+    <div class="nav-item" data-view="gallery" data-method="phash">
+      <span class="dot" style="background:var(--c-phash)"></span> Visual
+      <span class="nav-badge" id="nb-phash">0</span>
+    </div>
+    <div class="nav-item" data-view="gallery" data-method="exif">
+      <span class="dot" style="background:var(--c-exif)"></span> EXIF
+      <span class="nav-badge" id="nb-exif">0</span>
+    </div>
+    <div class="nav-item" data-view="gallery" data-method="filesize">
+      <span class="dot" style="background:var(--c-size)"></span> Size
+      <span class="nav-badge" id="nb-size">0</span>
+    </div>
+    <div class="nav-sep"></div>
+    <div class="nav-item" data-view="reviewed">
+      &#10003; Reviewed
+    </div>
+    <div class="nav-item" data-view="export">
+      &#8659; Export
+    </div>
+  </nav>
+
+  <!-- Main -->
+  <main id="main">
+
+    <!-- Dashboard -->
+    <div id="view-dashboard" class="view active">
+      <div class="stat-cards">
+        <div class="stat-card">
+          <div class="label">Total files</div>
+          <div class="value" id="s-total">—</div>
+          <div class="sub" id="s-total-size">—</div>
+        </div>
+        <div class="stat-card">
+          <div class="label">Duplicate groups</div>
+          <div class="value" id="s-groups">—</div>
+          <div class="sub" id="s-groups-sub">—</div>
+        </div>
+        <div class="stat-card">
+          <div class="label">Space recoverable</div>
+          <div class="value" id="s-recov">—</div>
+          <div class="sub" id="s-recov-sub">if redundant removed</div>
+        </div>
+        <div class="stat-card">
+          <div class="label">Reviewed</div>
+          <div class="value" id="s-reviewed">—</div>
+          <div class="sub" id="s-reviewed-sub">—</div>
+        </div>
+      </div>
+
+      <div id="scan-form-area">
+        <div class="section-title">Scan library</div>
+        <div id="first-scan-ui">
+          <div class="input-row">
+            <input type="text" id="folder-input" placeholder="/photos/MyLibrary" value="/photos">
+            <button class="btn-primary" id="start-scan-btn" onclick="startScan('incremental')">Start Scan</button>
+          </div>
+        </div>
+
+        <div id="progress-area">
+          <div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:4px;">
+            <span class="progress-msg" id="progress-msg">Scanning...</span>
+            <span class="progress-msg" id="progress-count"></span>
+          </div>
+          <div class="progress-bar-wrap">
+            <div class="progress-bar-fill" id="progress-fill" style="width:0%"></div>
+          </div>
+          <div class="phase-pills">
+            <span class="phase-pill" data-phase="discovery">Discovery</span>
+            <span class="phase-pill" data-phase="takeout">Takeout</span>
+            <span class="phase-pill" data-phase="indexing">Indexing</span>
+            <span class="phase-pill" data-phase="phash">Phash</span>
+            <span class="phase-pill" data-phase="grouping">Grouping</span>
+          </div>
+          <div class="mt8">
+            <button class="btn-secondary btn-sm" onclick="cancelScan()">Cancel</button>
+          </div>
+        </div>
+
+        <div id="rescan-area">
+          <div class="rescan-info" id="rescan-info-text"></div>
+          <div class="input-row" style="margin-bottom:10px;">
+            <input type="text" id="rescan-folder-input" placeholder="/photos">
+          </div>
+          <div class="rescan-buttons">
+            <div class="rescan-btn-group">
+              <button class="btn-primary btn-sm" onclick="startScan('incremental')">Scan new &amp; changed</button>
+              <span class="desc">Only processes files added or modified since last scan. Prior decisions preserved.</span>
+            </div>
+            <div class="rescan-btn-group">
+              <button class="btn-secondary btn-sm" onclick="startScan('new_files')">Add new files</button>
+              <span class="desc">Indexes newly added files only. Existing decisions untouched.</span>
+            </div>
+            <div class="rescan-btn-group">
+              <button class="btn-secondary btn-sm" onclick="startScan('regroup')">Rebuild groups</button>
+              <span class="desc">Re-runs duplicate detection on existing index. No re-hashing.</span>
+            </div>
+            <div class="rescan-btn-group" style="margin-left:auto;">
+              <button class="btn-danger btn-sm" onclick="confirmFullReset()">Full reset &#9888;</button>
+              <span class="desc">Wipes all data and re-scans everything.</span>
+            </div>
+          </div>
+        </div>
+      </div>
+
+      <!-- Method breakdown -->
+      <div class="section-title">Breakdown by method</div>
+      <div class="stat-cards" id="method-cards">
+        <div class="stat-card">
+          <div class="label" style="color:var(--c-sha256)">&#11044; Exact copies (SHA-256)</div>
+          <div class="value" id="mc-sha256">—</div>
+          <div class="sub" id="mc-sha256-files">— files</div>
+        </div>
+        <div class="stat-card">
+          <div class="label" style="color:var(--c-phash)">&#11044; Visual match (phash)</div>
+          <div class="value" id="mc-phash">—</div>
+          <div class="sub" id="mc-phash-files">— files</div>
+        </div>
+        <div class="stat-card">
+          <div class="label" style="color:var(--c-exif)">&#11044; Same moment (EXIF)</div>
+          <div class="value" id="mc-exif">—</div>
+          <div class="sub" id="mc-exif-files">— files</div>
+        </div>
+        <div class="stat-card">
+          <div class="label" style="color:var(--c-size)">&#11044; Possible (size+dims)</div>
+          <div class="value" id="mc-size">—</div>
+          <div class="sub" id="mc-size-files">— files</div>
+        </div>
+      </div>
+    </div>
+
+    <!-- Gallery -->
+    <div id="view-gallery" class="view">
+      <div class="filter-bar">
+        <span class="filter-pill active" data-method="all">All</span>
+        <span class="filter-pill" data-method="sha256"><span class="dot" style="background:var(--c-sha256)"></span>Exact</span>
+        <span class="filter-pill" data-method="phash"><span class="dot" style="background:var(--c-phash)"></span>Visual</span>
+        <span class="filter-pill" data-method="exif"><span class="dot" style="background:var(--c-exif)"></span>EXIF</span>
+        <span class="filter-pill" data-method="filesize"><span class="dot" style="background:var(--c-size)"></span>Size</span>
+        <button class="btn-primary btn-sm" id="auto-resolve-btn" onclick="autoResolveExact()" style="margin-left:auto">
+          Auto-resolve exact copies
+        </button>
+      </div>
+
+      <div class="gallery-grid" id="gallery-grid"></div>
+      <div id="detail-panel">
+        <div class="detail-header">
+          <h3 id="detail-title">—</h3>
+          <span class="method-badge" id="detail-method-badge">—</span>
+          <button id="detail-close" onclick="closeDetail()">&#10005;</button>
+        </div>
+        <div class="copy-cards" id="copy-cards"></div>
+        <div class="detail-footer">
+          <button class="btn-success btn-sm" id="confirm-btn" onclick="confirmDecision()" disabled>Confirm selection</button>
+          <button class="btn-secondary btn-sm" onclick="skipGroup()">Skip group</button>
+          <button class="btn-secondary btn-sm" onclick="keepAll()">Keep all</button>
+          <span class="recoverable-info" id="recoverable-info"></span>
+        </div>
+      </div>
+      <div id="load-more-wrap" style="display:none">
+        <button class="btn-secondary" onclick="loadMoreGroups()">Load more</button>
+      </div>
+    </div>
+
+    <!-- Reviewed -->
+    <div id="view-reviewed" class="view">
+      <div id="reviewed-list"></div>
+    </div>
+
+    <!-- Export -->
+    <div id="view-export" class="view">
+      <div style="margin-bottom:16px;display:flex;gap:10px;align-items:center;">
+        <button class="btn-primary" onclick="window.location='/api/export/csv'">&#8659; Download CSV</button>
+        <span class="text-dim" style="font-size:12px;">No files are moved or deleted. This tool only records decisions.</span>
+      </div>
+      <div id="export-table-wrap"></div>
+    </div>
+
+  </main>
+</div>
+
+<!-- Confirm dialog -->
+<div id="confirm-overlay">
+  <div id="confirm-box">
+    <h3 id="confirm-title">Confirm action</h3>
+    <p id="confirm-desc">Are you sure?</p>
+    <input type="text" id="confirm-input" class="btn-sm" placeholder='Type RESET to confirm' style="display:none">
+    <div style="display:flex;gap:8px;">
+      <button class="btn-danger" id="confirm-ok-btn" onclick="confirmOk()">Confirm</button>
+      <button class="btn-secondary" onclick="closeConfirm()">Cancel</button>
+    </div>
+  </div>
+</div>
+
+<!-- Toast -->
+<div id="toast"></div>
+
+<script>
+// ── State ─────────────────────────────────────────────────────────────────────
+const state = {
+  currentView: 'dashboard',
+  galleryMethod: 'all',
+  galleryOffset: 0,
+  galleryLimit: 50,
+  galleryTotal: 0,
+  galleryGroups: [],
+  activeGroupId: null,
+  activeGroupData: null,
+  selectedKeeperId: null,
+  scanStatus: 'idle',
+  lastScanInfo: null,
+  confirmCallback: null,
+  phaseOrder: ['discovery','takeout','indexing','phash','grouping'],
+};
+
+// ── Utils ─────────────────────────────────────────────────────────────────────
+function fmt(n) { return n == null ? '—' : n.toLocaleString(); }
+function fmtBytes(b) {
+  if (b == null) return '—';
+  if (b >= 1e9) return (b/1e9).toFixed(1) + ' GB';
+  if (b >= 1e6) return (b/1e6).toFixed(1) + ' MB';
+  if (b >= 1e3) return (b/1e3).toFixed(1) + ' KB';
+  return b + ' B';
+}
+function el(id) { return document.getElementById(id); }
+function showToast(msg, ms=2500) {
+  const t = el('toast');
+  t.textContent = msg;
+  t.classList.add('show');
+  setTimeout(() => t.classList.remove('show'), ms);
+}
+async function api(method, path, body) {
+  const opts = { method, headers: {} };
+  if (body) { opts.body = JSON.stringify(body); opts.headers['Content-Type'] = 'application/json'; }
+  const res = await fetch(path, opts);
+  if (!res.ok) {
+    const err = await res.json().catch(() => ({ detail: res.statusText }));
+    throw new Error(err.detail || res.statusText);
+  }
+  return res.json();
+}
+
+// ── Navigation ────────────────────────────────────────────────────────────────
+document.querySelectorAll('.nav-item').forEach(item => {
+  item.addEventListener('click', () => {
+    const view = item.dataset.view;
+    const method = item.dataset.method;
+    if (view === 'gallery' && method) {
+      state.galleryMethod = method;
+      el('view-gallery').querySelectorAll('.filter-pill').forEach(p => {
+        p.classList.toggle('active', p.dataset.method === method);
+      });
+    }
+    switchView(view);
+    document.querySelectorAll('.nav-item').forEach(n => n.classList.remove('active'));
+    item.classList.add('active');
+  });
+});
+
+document.querySelectorAll('.filter-pill').forEach(pill => {
+  pill.addEventListener('click', () => {
+    document.querySelectorAll('.filter-pill').forEach(p => p.classList.remove('active'));
+    pill.classList.add('active');
+    state.galleryMethod = pill.dataset.method;
+    loadGallery(true);
+  });
+});
+
+function switchView(view) {
+  state.currentView = view;
+  document.querySelectorAll('.view').forEach(v => v.classList.remove('active'));
+  el(`view-${view}`).classList.add('active');
+  closeDetail();
+  if (view === 'gallery') loadGallery(true);
+  if (view === 'reviewed') loadReviewed();
+  if (view === 'export') loadExport();
+}
+
+// ── Stats + topbar refresh ────────────────────────────────────────────────────
+async function refreshStats() {
+  try {
+    const s = await api('GET', '/api/stats');
+    el('s-total').textContent = fmt(s.total_files);
+    el('s-total-size').textContent = fmtBytes(s.total_size_bytes);
+    el('s-groups').textContent = fmt(s.reviewed + s.pending);
+    el('s-groups-sub').textContent = `${fmt(s.pending)} pending`;
+    el('s-reviewed').textContent = fmt(s.reviewed);
+    el('s-reviewed-sub').textContent = `of ${fmt(s.reviewed + s.pending)} total`;
+    el('s-recov').textContent = fmtBytes(s.duplicate_size_bytes);
+
+    const gm = s.groups_by_method || {};
+    const total = (gm.sha256?.groups||0)+(gm.phash?.groups||0)+(gm.exif?.groups||0)+(gm.filesize?.groups||0);
+    el('nb-all').textContent = total;
+    el('nb-sha256').textContent = gm.sha256?.groups || 0;
+    el('nb-phash').textContent = gm.phash?.groups || 0;
+    el('nb-exif').textContent = gm.exif?.groups || 0;
+    el('nb-size').textContent = gm.filesize?.groups || 0;
+
+    el('mc-sha256').textContent = fmt(gm.sha256?.groups);
+    el('mc-sha256-files').textContent = fmt(gm.sha256?.files) + ' files';
+    el('mc-phash').textContent = fmt(gm.phash?.groups);
+    el('mc-phash-files').textContent = fmt(gm.phash?.files) + ' files';
+    el('mc-exif').textContent = fmt(gm.exif?.groups);
+    el('mc-exif-files').textContent = fmt(gm.exif?.files) + ' files';
+    el('mc-size').textContent = fmt(gm.filesize?.groups);
+    el('mc-size-files').textContent = fmt(gm.filesize?.files) + ' files';
+
+    el('ts-files').textContent = fmt(s.total_files);
+    el('ts-groups').textContent = fmt(s.reviewed + s.pending);
+    el('ts-pending').textContent = fmt(s.pending);
+    el('ts-size').textContent = fmtBytes(s.duplicate_size_bytes);
+  } catch(e) {}
+}
+
+// ── Scan polling ──────────────────────────────────────────────────────────────
+let scanPoller = null;
+const PHASES = ['discovery','takeout','indexing','phash','grouping'];
+
+function startPoller() {
+  if (scanPoller) return;
+  scanPoller = setInterval(pollScan, 800);
+}
+function stopPoller() {
+  clearInterval(scanPoller);
+  scanPoller = null;
+}
+
+async function pollScan() {
+  try {
+    const s = await api('GET', '/api/scan/status');
+    state.scanStatus = s.status;
+    updateScanUI(s);
+    if (s.status !== 'running') {
+      stopPoller();
+      refreshStats();
+      if (s.status === 'complete') {
+        showToast('Scan complete!');
+        if (state.currentView === 'gallery') loadGallery(true);
+      }
+    }
+  } catch(e) {}
+}
+
+function updateScanUI(s) {
+  const chip = el('scan-chip');
+  chip.textContent = s.status;
+  chip.className = '';
+  chip.classList.add(s.status);
+
+  const isRunning = s.status === 'running';
+  el('progress-area').classList.toggle('show', isRunning);
+  el('first-scan-ui').style.display = (s.scan_id || isRunning) ? 'none' : '';
+  el('rescan-area').classList.toggle('show', !isRunning && !!s.scan_id);
+
+  if (isRunning) {
+    el('progress-msg').textContent = s.message || '';
+    const pct = s.total > 0 ? Math.round((s.progress / s.total) * 100) : 0;
+    el('progress-fill').style.width = pct + '%';
+    el('progress-count').textContent = s.total > 0 ? `${fmt(s.progress)} / ${fmt(s.total)}` : '';
+
+    const phaseIdx = PHASES.indexOf(s.phase);
+    document.querySelectorAll('.phase-pill').forEach((pill, i) => {
+      pill.classList.remove('active','done');
+      if (i < phaseIdx) pill.classList.add('done');
+      else if (i === phaseIdx) pill.classList.add('active');
+    });
+  }
+
+  if (s.scan_id && !isRunning) {
+    // populate rescan folder from last scan
+    el('rescan-folder-input').value = el('folder-input').value || '/photos';
+  }
+
+  // Update stats in topbar
+  if (s.stats) {
+    el('ts-files').textContent = fmt(s.stats.total_files);
+    el('ts-groups').textContent = fmt(s.stats.total_groups);
+    el('ts-pending').textContent = fmt(s.stats.pending);
+  }
+}
+
+function getFolderPath() {
+  const rescanField = el('rescan-folder-input');
+  const firstField = el('folder-input');
+  const rescanVisible = el('rescan-area').classList.contains('show');
+  return rescanVisible ? rescanField.value.trim() : firstField.value.trim();
+}
+
+async function startScan(mode) {
+  const folder = getFolderPath();
+  if (!folder) { showToast('Enter a folder path first.'); return; }
+  try {
+    const r = await api('POST', '/api/scan/start', { folder_path: folder, mode });
+    state.scanStatus = 'running';
+    showToast('Scan started');
+    startPoller();
+  } catch(e) {
+    showToast('Error: ' + e.message, 4000);
+  }
+}
+
+async function cancelScan() {
+  try {
+    await api('POST', '/api/scan/cancel');
+    showToast('Cancelling scan...');
+  } catch(e) {}
+}
+
+function confirmFullReset() {
+  el('confirm-title').textContent = 'Full reset';
+  el('confirm-desc').textContent = 'This will delete ALL scan results and decisions. Type RESET to confirm.';
+  el('confirm-input').style.display = '';
+  el('confirm-input').value = '';
+  state.confirmCallback = async () => {
+    if (el('confirm-input').value !== 'RESET') { showToast('Type RESET to confirm'); return false; }
+    await startScan('full_reset');
+    return true;
+  };
+  el('confirm-overlay').classList.add('show');
+}
+
+async function confirmOk() {
+  if (state.confirmCallback) {
+    const ok = await state.confirmCallback();
+    if (ok !== false) closeConfirm();
+  } else {
+    closeConfirm();
+  }
+}
+function closeConfirm() {
+  el('confirm-overlay').classList.remove('show');
+  state.confirmCallback = null;
+  el('confirm-input').style.display = 'none';
+}
+
+// ── Gallery ───────────────────────────────────────────────────────────────────
+const METHOD_COLOR = { sha256:'#378ADD', phash:'#9b7de8', exif:'#e2a43a', filesize:'#888780' };
+const METHOD_LABEL = { sha256:'Exact copy', phash:'Visual match', exif:'Same moment', filesize:'Possible match' };
+
+async function loadGallery(reset=false) {
+  if (reset) {
+    state.galleryOffset = 0;
+    state.galleryGroups = [];
+    el('gallery-grid').innerHTML = '';
+    closeDetail();
+  }
+  try {
+    const params = new URLSearchParams({
+      method: state.galleryMethod,
+      reviewed: 'false',
+      sort: 'count',
+      offset: state.galleryOffset,
+      limit: state.galleryLimit,
+    });
+    const data = await api('GET', `/api/groups?${params}`);
+    state.galleryTotal = data.total;
+    state.galleryGroups.push(...data.groups);
+    renderGallery(data.groups, reset);
+    state.galleryOffset += data.groups.length;
+    el('load-more-wrap').style.display =
+      state.galleryOffset < state.galleryTotal ? '' : 'none';
+  } catch(e) {}
+}
+
+function loadMoreGroups() { loadGallery(false); }
+
+function renderGallery(groups, reset) {
+  const grid = el('gallery-grid');
+  if (reset && groups.length === 0) {
+    grid.innerHTML = `<div class="empty-state" style="grid-column:1/-1">
+      <div class="icon">&#128247;</div>
+      <h3>No duplicate groups found</h3>
+      <p>Run a scan to detect duplicates, or all groups have been reviewed.</p>
+    </div>`;
+    return;
+  }
+
+  groups.forEach(g => {
+    const cell = document.createElement('div');
+    cell.className = 'gallery-cell' + (g.reviewed ? ' reviewed' : '');
+    cell.dataset.groupId = g.id;
+
+    const color = METHOD_COLOR[g.method] || '#888';
+    const label = METHOD_LABEL[g.method] || g.method;
+    const sk = g.suggested_keeper;
+    const thumbUrl = sk ? sk.thumb_url : '';
+    const fname = sk ? sk.filename : '';
+    const res = sk && sk.width ? `${sk.width}×${sk.height}` : '';
+
+    cell.innerHTML = `
+      ${thumbUrl
+        ? `<img src="${thumbUrl}" alt="${fname}" loading="lazy" onerror="this.parentElement.querySelector('.vid-placeholder')&&(this.style.display='none')">`
+        : `<div class="vid-placeholder">&#9654;</div>`
+      }
+      <div class="cell-badge-method">
+        <span class="dot" style="background:${color}"></span>${label}
+      </div>
+      <div class="cell-badge-count">${g.member_count} copies</div>
+      <div class="cell-hover-info">
+        <div class="fname">${fname}</div>
+        <div class="res">${res}</div>
+      </div>
+      <div class="cell-check">&#10003;</div>
+    `;
+    cell.addEventListener('click', () => openGroup(g.id, cell));
+    grid.appendChild(cell);
+  });
+}
+
+async function openGroup(groupId, cellEl) {
+  if (state.activeGroupId === groupId) {
+    closeDetail();
+    return;
+  }
+  document.querySelectorAll('.gallery-cell').forEach(c => c.classList.remove('active'));
+  if (cellEl) cellEl.classList.add('active');
+
+  state.activeGroupId = groupId;
+  state.selectedKeeperId = null;
+  el('confirm-btn').disabled = true;
+
+  try {
+    const g = await api('GET', `/api/groups/${groupId}`);
+    state.activeGroupData = g;
+    renderDetail(g);
+
+    // Insert detail panel after the row containing the clicked cell
+    const panel = el('detail-panel');
+    const grid = el('gallery-grid');
+    if (cellEl) {
+      // find row end
+      const cellRect = cellEl.getBoundingClientRect();
+      const gridRect = grid.getBoundingClientRect();
+      const cells = Array.from(grid.children).filter(c => c.classList.contains('gallery-cell'));
+      const cols = Math.round(grid.offsetWidth / (cellEl.offsetWidth + 12));
+      const cellIdx = cells.indexOf(cellEl);
+      const rowEnd = Math.min(Math.ceil((cellIdx + 1) / cols) * cols, cells.length);
+      const afterCell = cells[rowEnd - 1];
+      grid.parentNode.insertBefore(panel, afterCell.nextSibling || el('load-more-wrap'));
+    }
+    panel.classList.add('show');
+    panel.scrollIntoView({ behavior: 'smooth', block: 'nearest' });
+  } catch(e) {
+    showToast('Failed to load group: ' + e.message);
+  }
+}
+
+function renderDetail(g) {
+  el('detail-title').textContent = g.members[0]?.filename || 'Group ' + g.id;
+  const badge = el('detail-method-badge');
+  badge.textContent = METHOD_LABEL[g.method] || g.method;
+  badge.style.background = METHOD_COLOR[g.method] || '#888';
+
+  const cards = el('copy-cards');
+  cards.innerHTML = '';
+
+  g.members.forEach(m => {
+    const card = document.createElement('div');
+    card.className = 'copy-card' + (m.is_suggested ? ' suggested' : '');
+    card.dataset.fileId = m.file_id;
+
+    const badges = [];
+    if (m.is_suggested) badges.push(`<span class="badge badge-suggested">Suggested</span>`);
+    if (m.is_keeper)    badges.push(`<span class="badge badge-selected">Selected &#10003;</span>`);
+    if (m.is_takeout)   badges.push(`<span class="badge badge-takeout">Takeout</span>`);
+    if (m.is_edited)    badges.push(`<span class="badge badge-edited">Edited</span>`);
+
+    const res = m.width ? `${m.width}×${m.height}` : '—';
+
+    card.innerHTML = `
+      <img class="copy-card-thumb" src="${m.thumb_url}" alt="${m.filename}" loading="lazy">
+      <div class="copy-card-body">
+        <div class="copy-card-badges">${badges.join('')}</div>
+        <div class="meta-row"><span class="mk">Resolution</span><span class="mv">${res}</span></div>
+        <div class="meta-row"><span class="mk">Size</span><span class="mv">${fmtBytes(m.file_size)}</span></div>
+        <div class="meta-row"><span class="mk">Format</span><span class="mv">${m.extension||m.mime_type||'—'}</span></div>
+        <div class="meta-row"><span class="mk">Device</span><span class="mv">${m.exif_device||'—'}</span></div>
+        <div class="meta-row"><span class="mk">Date</span><span class="mv">${m.exif_datetime||'—'}</span></div>
+        <div class="meta-path" title="${m.path}">${m.path}</div>
+        <button class="keep-btn" onclick="selectKeeper(${m.file_id})">Keep this</button>
+      </div>
+    `;
+
+    if (m.is_keeper) {
+      state.selectedKeeperId = m.file_id;
+      card.classList.add('selected');
+      el('confirm-btn').disabled = false;
+    }
+
+    cards.appendChild(card);
+  });
+
+  updateRecoverableInfo();
+}
+
+function selectKeeper(fileId) {
+  state.selectedKeeperId = fileId;
+  el('confirm-btn').disabled = false;
+
+  document.querySelectorAll('.copy-card').forEach(card => {
+    const fid = parseInt(card.dataset.fileId);
+    card.classList.remove('selected','dimmed');
+    if (fid === fileId) card.classList.add('selected');
+    else card.classList.add('dimmed');
+  });
+
+  updateRecoverableInfo();
+}
+
+function updateRecoverableInfo() {
+  const g = state.activeGroupData;
+  if (!g) return;
+  const total = g.members.reduce((s,m) => s + (m.file_size||0), 0);
+  const keeperSize = g.members.find(m => m.file_id === state.selectedKeeperId)?.file_size || 0;
+  const recoverable = total - keeperSize;
+  const sel = state.selectedKeeperId ? `Keeping 1 of ${g.members.length}` : `${g.members.length} copies`;
+  el('recoverable-info').textContent = `${sel} · ${fmtBytes(recoverable)} recoverable`;
+}
+
+function closeDetail() {
+  el('detail-panel').classList.remove('show');
+  document.querySelectorAll('.gallery-cell').forEach(c => c.classList.remove('active'));
+  state.activeGroupId = null;
+  state.activeGroupData = null;
+  state.selectedKeeperId = null;
+}
+
+async function confirmDecision() {
+  if (!state.activeGroupId || !state.selectedKeeperId) return;
+  try {
+    await api('POST', `/api/groups/${state.activeGroupId}/decide`, { keeper_file_id: state.selectedKeeperId });
+    showToast('Decision saved');
+    markGroupReviewed(state.activeGroupId);
+    closeDetail();
+    refreshStats();
+  } catch(e) {
+    showToast('Error: ' + e.message);
+  }
+}
+
+async function skipGroup() {
+  if (!state.activeGroupId) return;
+  try {
+    await api('POST', `/api/groups/${state.activeGroupId}/skip`);
+    showToast('Group skipped');
+    markGroupReviewed(state.activeGroupId);
+    closeDetail();
+    refreshStats();
+  } catch(e) {}
+}
+
+async function keepAll() {
+  if (!state.activeGroupId) return;
+  try {
+    await api('POST', `/api/groups/${state.activeGroupId}/keep-all`);
+    showToast('All marked as keepers');
+    markGroupReviewed(state.activeGroupId);
+    closeDetail();
+    refreshStats();
+  } catch(e) {}
+}
+
+function markGroupReviewed(groupId) {
+  const cell = document.querySelector(`.gallery-cell[data-group-id="${groupId}"]`);
+  if (cell) cell.classList.add('reviewed');
+  // Remove from unreviewed list
+  state.galleryGroups = state.galleryGroups.filter(g => g.id !== groupId);
+}
+
+async function autoResolveExact() {
+  try {
+    const r = await api('POST', '/api/groups/auto-resolve-exact');
+    showToast(`Auto-resolved ${r.resolved} exact duplicate groups`);
+    loadGallery(true);
+    refreshStats();
+  } catch(e) {
+    showToast('Error: ' + e.message);
+  }
+}
+
+// ── Reviewed view ─────────────────────────────────────────────────────────────
+async function loadReviewed() {
+  const list = el('reviewed-list');
+  list.innerHTML = '<div class="text-dim" style="padding:20px">Loading...</div>';
+  try {
+    const data = await api('GET', '/api/groups?reviewed=true&limit=100&offset=0');
+    if (data.groups.length === 0) {
+      list.innerHTML = `<div class="empty-state"><div class="icon">&#10003;</div><h3>No reviewed groups yet</h3></div>`;
+      return;
+    }
+    list.innerHTML = '';
+    for (const g of data.groups) {
+      const row = document.createElement('div');
+      row.className = 'reviewed-group-row';
+      const sk = g.suggested_keeper;
+      const label = METHOD_LABEL[g.method] || g.method;
+      const color = METHOD_COLOR[g.method] || '#888';
+      row.innerHTML = `
+        ${sk ? `<img class="thumb-sm" src="${sk.thumb_url}" alt="">` : ''}
+        <div class="info">
+          <div class="fname">${sk?.filename || 'Group ' + g.id}</div>
+          <div class="sub">
+            <span style="color:${color}">${label}</span> &middot;
+            ${g.member_count} copies
+          </div>
+        </div>
+        <button class="btn-secondary btn-sm" onclick="unreviewGroup(${g.id}, this)">Undo</button>
+      `;
+      list.appendChild(row);
+    }
+  } catch(e) {
+    list.innerHTML = '<div class="text-dim">Failed to load.</div>';
+  }
+}
+
+async function unreviewGroup(groupId, btn) {
+  try {
+    await api('POST', `/api/groups/${groupId}/unreview`);
+    btn.closest('.reviewed-group-row').remove();
+    showToast('Group un-reviewed');
+    refreshStats();
+  } catch(e) {}
+}
+
+// ── Export view ───────────────────────────────────────────────────────────────
+async function loadExport() {
+  const wrap = el('export-table-wrap');
+  wrap.innerHTML = '<div class="text-dim" style="padding:20px">Loading...</div>';
+  try {
+    const data = await api('GET', '/api/groups?limit=100&offset=0&reviewed=all');
+    if (!data.groups.length) {
+      wrap.innerHTML = '<div class="empty-state"><div class="icon">&#128193;</div><h3>No groups yet</h3></div>';
+      return;
+    }
+    const rows = data.groups.map(g => `
+      <tr>
+        <td>${g.id}</td>
+        <td><span style="color:${g.method_color}">${g.method_label}</span></td>
+        <td>${g.member_count}</td>
+        <td>${g.reviewed ? '<span class="text-success">&#10003; Reviewed</span>' : '<span class="text-dim">Pending</span>'}</td>
+        <td>${g.suggested_keeper?.filename || '—'}</td>
+      </tr>
+    `).join('');
+    wrap.innerHTML = `
+      <table>
+        <thead><tr><th>ID</th><th>Method</th><th>Copies</th><th>Status</th><th>Suggested keeper</th></tr></thead>
+        <tbody>${rows}</tbody>
+      </table>
+      <p class="text-dim mt16" style="font-size:12px;">
+        Showing ${data.groups.length} of ${data.total} groups.
+        Download the CSV for full details including file paths and decisions.
+      </p>
+    `;
+  } catch(e) {
+    wrap.innerHTML = '<div class="text-dim">Failed to load.</div>';
+  }
+}
+
+// ── Keyboard shortcuts ────────────────────────────────────────────────────────
+document.addEventListener('keydown', e => {
+  if (e.key === 'Escape') {
+    if (el('confirm-overlay').classList.contains('show')) closeConfirm();
+    else closeDetail();
+  }
+});
+
+// ── Init ──────────────────────────────────────────────────────────────────────
+async function init() {
+  await refreshStats();
+  // Check if there's an existing scan
+  try {
+    const s = await api('GET', '/api/scan/status');
+    updateScanUI(s);
+    if (s.status === 'running') startPoller();
+  } catch(e) {}
+}
+
+init();
+// Refresh stats every 30s when idle
+setInterval(() => {
+  if (state.scanStatus !== 'running') refreshStats();
+}, 30000);
+</script>
+</body>
+</html>