diff --git a/CLAUDE.md b/CLAUDE.md index 67e9f8c..506ab4d 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -83,6 +83,19 @@ audio while the bot is speaking (+`ECHO_TAIL_SECS`, default 0.5s) so echo never Trade-off: half-duplex — the caller can't barge in mid-utterance (fine for short replies). `HALF_DUPLEX=false` restores barge-in. Keep it on for telephony. +**Interruption broadcasts OFF under half-duplex (2026-07-04).** The gate left one window +open: between "LLM starts generating" and "first audio on the wire" the bot isn't speaking +yet, so caller-side audio still reaches the VAD — and a false VAD blip (breath/background +noise, no transcript ever produced) made the user aggregator `broadcast_interruption`, +silently discarding the queued reply. Live call showed 20–35s of dead air, the caller saying +"Hello?" and repeating themselves; SilenceWatchdog never fired because the cancelled reply +never produced a `BotStoppedSpeakingFrame` to arm it. Fix: when `HALF_DUPLEX` is on, the +aggregator is built with `VADUserTurnStartStrategy(enable_interruptions=False)` + +`TranscriptionUserTurnStartStrategy(enable_interruptions=False)` — strict turn-taking, no +interruption broadcasts at all (there's nothing legitimate for them to do in a no-barge-in +bot). `UserStartedSpeakingFrame` is still emitted, so the watchdog reset keeps working. If +the caller talks over generation, both replies play in order instead of one being dropped. + **`CallStateGroomer` (`callstate.py`) — deterministic slot memory (2026-07-03).** Fixes the 8B re-asking for things the caller already gave (name, reason, phone — seen repeatedly in the historical call logs: "Didn't you say you had my phone number?", "I already gave you my full diff --git a/bot.py b/bot.py index 9f2d613..d36e45b 100644 --- a/bot.py +++ b/bot.py @@ -39,7 +39,15 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.turns.user_start import ( + TranscriptionUserTurnStartStrategy, + VADUserTurnStartStrategy, +) +from pipecat.turns.user_turn_strategies import UserTurnStrategies from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor from pipecat.processors.audio.vad_processor import VADProcessor from pipecat.processors.frame_processor import FrameDirection, FrameProcessor @@ -675,7 +683,27 @@ async def run_agent(transport, caller_number=None, call_sid=None, do_capture=Tru if ENABLE_TOOLS: context_kwargs["tools"] = _build_tools() context = LLMContext(**context_kwargs) - agg = LLMContextAggregatorPair(context) + # STRICT TURN-TAKING — no interruption broadcasts (live-call diagnosis 2026-07-04): + # interruptions are VAD-driven and fire on ANY turn start. HalfDuplexGate already blocks + # barge-in while the bot SPEAKS, but between "LLM starts generating" and "first audio on + # the wire" the gate is open — a false VAD blip (breath/background noise, no transcript) in that + # window broadcast an interruption that silently discarded the queued reply: caller heard + # 20-35s of dead air and said "Hello?". With HALF_DUPLEX there is nothing legitimate for + # an interruption to do, so don't broadcast them at all. UserStartedSpeakingFrame is still + # emitted (SilenceWatchdog reset keeps working); if the caller talks over generation, both + # replies simply play in order instead of one being thrown away. + if HALF_DUPLEX: + user_params = LLMUserAggregatorParams( + user_turn_strategies=UserTurnStrategies( + start=[ + VADUserTurnStartStrategy(enable_interruptions=False), + TranscriptionUserTurnStartStrategy(enable_interruptions=False), + ], + ), + ) + agg = LLMContextAggregatorPair(context, user_params=user_params) + else: + agg = LLMContextAggregatorPair(context) # Deterministic slot memory: merges fragmented user turns + injects the live # collected/needed checklist into the system message before each generation. groomer = CallStateGroomer(