diff --git a/.env.example b/.env.example index 879528a..f21bb9f 100644 --- a/.env.example +++ b/.env.example @@ -62,4 +62,9 @@ HANGUP_DELAY_SECS=4.0 # Half-duplex: ignore caller audio while the agent speaks (+ tail) so its own echo on the # phone line can't trigger a false barge-in that cancels its reply. false = allow barge-in. HALF_DUPLEX=true -ECHO_TAIL_SECS=0.5 +ECHO_TAIL_SECS=0.25 +# VAD kept sensitive (half-duplex gates echo, so this only affects the caller's turn). +VAD_CONFIDENCE=0.5 +VAD_MIN_VOLUME=0.15 +VAD_START_SECS=0.1 +VAD_STOP_SECS=0.5 diff --git a/CLAUDE.md b/CLAUDE.md index 6f109f8..859538d 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -52,9 +52,11 @@ info-only calls (no booking keyword) are never asked for a number. `PIPELINE_SAMPLE_RATE = 16000`, `WIRE_SAMPLE_RATE = 8000` are already set correctly. No custom audio module needed. -**VAD tuned for telephony** — `confidence=0.5`, `min_volume=0.3` already loosened from -desktop defaults. These settings directly address the repeat-yourself problem on the -VAD side. +**VAD tuned for telephony** — `confidence=0.5`, `min_volume=0.15`, `start_secs=0.1` — kept +sensitive so a quick/quiet "yes" isn't missed (a caller had to repeat it after the phone +confirmation). This is safe **because `HalfDuplexGate` gates out the agent's echo while it +speaks**, so sensitive VAD only listens hard during the caller's own turn and doesn't cause +echo false-triggers. Addresses the repeat-yourself / missed-short-answer problem. **Capacity gating** — `MAX_CONCURRENT_CALLS=2` with atomic slot reservation in `server.py` prevents GPU thrashing. Keep it. diff --git a/bot.py b/bot.py index cb59730..7cec333 100644 --- a/bot.py +++ b/bot.py @@ -98,15 +98,18 @@ PIPELINE_SAMPLE_RATE = 16000 # internal rate Whisper/VAD actually need # VAD tuning. Defaults (confidence 0.7 / min_volume 0.6) are desktop-mic values that can # miss short/quiet 8 kHz telephony utterances like "yes" — loosen them for the phone. +# VAD is kept sensitive so a quick/quiet "yes" isn't missed (a caller had to repeat it). This +# is safe because HalfDuplexGate gates out the agent's echo while it speaks, so sensitive VAD +# doesn't cause echo false-triggers — it only listens hard during the caller's own turn. VAD_CONFIDENCE = float(os.environ.get("VAD_CONFIDENCE", "0.5")) -VAD_MIN_VOLUME = float(os.environ.get("VAD_MIN_VOLUME", "0.3")) -VAD_START_SECS = float(os.environ.get("VAD_START_SECS", "0.2")) +VAD_MIN_VOLUME = float(os.environ.get("VAD_MIN_VOLUME", "0.15")) +VAD_START_SECS = float(os.environ.get("VAD_START_SECS", "0.1")) VAD_STOP_SECS = float(os.environ.get("VAD_STOP_SECS", "0.5")) # Half-duplex: ignore inbound audio while the agent is speaking (+ this tail in seconds) # so the agent's own voice echoing back the phone line can't trigger a false barge-in that # cancels its reply (= caller hears silence). Set HALF_DUPLEX=false to allow barge-in. HALF_DUPLEX = os.environ.get("HALF_DUPLEX", "true").lower() not in ("false", "0", "no") -ECHO_TAIL_SECS = float(os.environ.get("ECHO_TAIL_SECS", "0.5")) +ECHO_TAIL_SECS = float(os.environ.get("ECHO_TAIL_SECS", "0.25")) # Agent persona name — purely for warmth; change/remove freely. AGENT_NAME = os.environ.get("AGENT_NAME", "Sofia")