From b0df7fd5b00b55e6448191968b2424748aac91ce Mon Sep 17 00:00:00 2001 From: tocmo0nlord Date: Sat, 27 Jun 2026 17:36:20 +0000 Subject: [PATCH] Fix missed quiet "yes" after phone confirmation: more sensitive VAD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After the phone confirmation a caller's "yes" wasn't picked up (silence) until they repeated it louder. Logs: line was live and the half-duplex gate had reopened, but VAD never fired for ~14s — the quick/quiet "yes" was below threshold (min_volume 0.3, start_secs 0.2). Now that HalfDuplexGate gates out the agent's echo while it speaks, VAD can be sensitive without echo false-triggers (it only listens hard on the caller's turn). Lowered min_volume 0.3->0.15, start_secs 0.2->0.1, and trimmed the echo tail 0.5->0.25 so an answer right after the agent stops isn't dropped. Co-Authored-By: Claude Opus 4.8 --- .env.example | 7 ++++++- CLAUDE.md | 8 +++++--- bot.py | 9 ++++++--- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/.env.example b/.env.example index 879528a..f21bb9f 100644 --- a/.env.example +++ b/.env.example @@ -62,4 +62,9 @@ HANGUP_DELAY_SECS=4.0 # Half-duplex: ignore caller audio while the agent speaks (+ tail) so its own echo on the # phone line can't trigger a false barge-in that cancels its reply. false = allow barge-in. HALF_DUPLEX=true -ECHO_TAIL_SECS=0.5 +ECHO_TAIL_SECS=0.25 +# VAD kept sensitive (half-duplex gates echo, so this only affects the caller's turn). +VAD_CONFIDENCE=0.5 +VAD_MIN_VOLUME=0.15 +VAD_START_SECS=0.1 +VAD_STOP_SECS=0.5 diff --git a/CLAUDE.md b/CLAUDE.md index 6f109f8..859538d 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -52,9 +52,11 @@ info-only calls (no booking keyword) are never asked for a number. `PIPELINE_SAMPLE_RATE = 16000`, `WIRE_SAMPLE_RATE = 8000` are already set correctly. No custom audio module needed. -**VAD tuned for telephony** — `confidence=0.5`, `min_volume=0.3` already loosened from -desktop defaults. These settings directly address the repeat-yourself problem on the -VAD side. +**VAD tuned for telephony** — `confidence=0.5`, `min_volume=0.15`, `start_secs=0.1` — kept +sensitive so a quick/quiet "yes" isn't missed (a caller had to repeat it after the phone +confirmation). This is safe **because `HalfDuplexGate` gates out the agent's echo while it +speaks**, so sensitive VAD only listens hard during the caller's own turn and doesn't cause +echo false-triggers. Addresses the repeat-yourself / missed-short-answer problem. **Capacity gating** — `MAX_CONCURRENT_CALLS=2` with atomic slot reservation in `server.py` prevents GPU thrashing. Keep it. diff --git a/bot.py b/bot.py index cb59730..7cec333 100644 --- a/bot.py +++ b/bot.py @@ -98,15 +98,18 @@ PIPELINE_SAMPLE_RATE = 16000 # internal rate Whisper/VAD actually need # VAD tuning. Defaults (confidence 0.7 / min_volume 0.6) are desktop-mic values that can # miss short/quiet 8 kHz telephony utterances like "yes" — loosen them for the phone. +# VAD is kept sensitive so a quick/quiet "yes" isn't missed (a caller had to repeat it). This +# is safe because HalfDuplexGate gates out the agent's echo while it speaks, so sensitive VAD +# doesn't cause echo false-triggers — it only listens hard during the caller's own turn. VAD_CONFIDENCE = float(os.environ.get("VAD_CONFIDENCE", "0.5")) -VAD_MIN_VOLUME = float(os.environ.get("VAD_MIN_VOLUME", "0.3")) -VAD_START_SECS = float(os.environ.get("VAD_START_SECS", "0.2")) +VAD_MIN_VOLUME = float(os.environ.get("VAD_MIN_VOLUME", "0.15")) +VAD_START_SECS = float(os.environ.get("VAD_START_SECS", "0.1")) VAD_STOP_SECS = float(os.environ.get("VAD_STOP_SECS", "0.5")) # Half-duplex: ignore inbound audio while the agent is speaking (+ this tail in seconds) # so the agent's own voice echoing back the phone line can't trigger a false barge-in that # cancels its reply (= caller hears silence). Set HALF_DUPLEX=false to allow barge-in. HALF_DUPLEX = os.environ.get("HALF_DUPLEX", "true").lower() not in ("false", "0", "no") -ECHO_TAIL_SECS = float(os.environ.get("ECHO_TAIL_SECS", "0.5")) +ECHO_TAIL_SECS = float(os.environ.get("ECHO_TAIL_SECS", "0.25")) # Agent persona name — purely for warmth; change/remove freely. AGENT_NAME = os.environ.get("AGENT_NAME", "Sofia")