Fix dead-air: stop VAD interruption broadcasts under half-duplex
Live call diagnosis (recording + log): replies were generated in <1s but a false VAD trigger (background noise, no transcript) fired 0.7s later, and the aggregator's broadcast_interruption silently discarded the queued TTS audio. Caller heard 20-35s of silence, said "Hello?", repeated themselves. The HalfDuplexGate only closes while the bot is audibly speaking, so the window between generation start and first wire audio was unprotected. SilenceWatchdog never fired because the cancelled reply never emitted BotStoppedSpeaking. With HALF_DUPLEX on, build the user aggregator with enable_interruptions=False on both turn-start strategies: strict turn-taking, nothing is ever cancelled. UserStartedSpeakingFrame still flows, so watchdog resets keep working. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
32
bot.py
32
bot.py
@@ -39,7 +39,15 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.turns.user_start import (
|
||||
TranscriptionUserTurnStartStrategy,
|
||||
VADUserTurnStartStrategy,
|
||||
)
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
|
||||
from pipecat.processors.audio.vad_processor import VADProcessor
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
@@ -675,7 +683,27 @@ async def run_agent(transport, caller_number=None, call_sid=None, do_capture=Tru
|
||||
if ENABLE_TOOLS:
|
||||
context_kwargs["tools"] = _build_tools()
|
||||
context = LLMContext(**context_kwargs)
|
||||
agg = LLMContextAggregatorPair(context)
|
||||
# STRICT TURN-TAKING — no interruption broadcasts (live-call diagnosis 2026-07-04):
|
||||
# interruptions are VAD-driven and fire on ANY turn start. HalfDuplexGate already blocks
|
||||
# barge-in while the bot SPEAKS, but between "LLM starts generating" and "first audio on
|
||||
# the wire" the gate is open — a false VAD blip (breath/background noise, no transcript) in that
|
||||
# window broadcast an interruption that silently discarded the queued reply: caller heard
|
||||
# 20-35s of dead air and said "Hello?". With HALF_DUPLEX there is nothing legitimate for
|
||||
# an interruption to do, so don't broadcast them at all. UserStartedSpeakingFrame is still
|
||||
# emitted (SilenceWatchdog reset keeps working); if the caller talks over generation, both
|
||||
# replies simply play in order instead of one being thrown away.
|
||||
if HALF_DUPLEX:
|
||||
user_params = LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
start=[
|
||||
VADUserTurnStartStrategy(enable_interruptions=False),
|
||||
TranscriptionUserTurnStartStrategy(enable_interruptions=False),
|
||||
],
|
||||
),
|
||||
)
|
||||
agg = LLMContextAggregatorPair(context, user_params=user_params)
|
||||
else:
|
||||
agg = LLMContextAggregatorPair(context)
|
||||
# Deterministic slot memory: merges fragmented user turns + injects the live
|
||||
# collected/needed checklist into the system message before each generation.
|
||||
groomer = CallStateGroomer(
|
||||
|
||||
Reference in New Issue
Block a user