Cut smart-turn INCOMPLETE wait 3s -> 1.5s (SMART_TURN_STOP_SECS)
Follow-up test call: no more cancelled replies, but 3-5s response gaps on
turns the smart-turn model judged INCOMPLETE ("I'm due to my annual exam.") -
it waited the library-default 3s of silence before triggering the LLM. Build
the stop strategy explicitly with SmartTurnParams(stop_secs=1.5), env-tunable.
A caller who really does resume just yields a follow-up turn, which is safe
now that interruption broadcasts are off.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
38
bot.py
38
bot.py
@@ -43,10 +43,13 @@ from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.turns.user_start import (
|
||||
TranscriptionUserTurnStartStrategy,
|
||||
VADUserTurnStartStrategy,
|
||||
)
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
|
||||
from pipecat.processors.audio.vad_processor import VADProcessor
|
||||
@@ -129,6 +132,12 @@ ECHO_TAIL_SECS = float(os.environ.get("ECHO_TAIL_SECS", "0.25"))
|
||||
SILENCE_WATCHDOG = os.environ.get("SILENCE_WATCHDOG", "true").lower() not in ("false", "0", "no")
|
||||
SILENCE_REPROMPT_SECS = float(os.environ.get("SILENCE_REPROMPT_SECS", "7.0"))
|
||||
MAX_REPROMPTS = int(os.environ.get("MAX_REPROMPTS", "2"))
|
||||
# When the smart-turn model judges an utterance INCOMPLETE (trailing intonation), it waits
|
||||
# this much extra silence before ending the turn anyway. The library default of 3s produced
|
||||
# 3.5s of dead air on turns like "I'm due to my annual exam." (live call 2026-07-04). 1.5s
|
||||
# keeps some room for the caller to finish a thought without the reply feeling stalled; with
|
||||
# interruptions off, a caller who does continue simply gets a second reply in order.
|
||||
SMART_TURN_STOP_SECS = float(os.environ.get("SMART_TURN_STOP_SECS", "1.5"))
|
||||
# Deterministic slot-state tracking (callstate.py): after each agent turn, extract what the
|
||||
# caller already provided and inject an explicit ALREADY-COLLECTED / STILL-NEEDED checklist
|
||||
# into the system message, plus merge VAD-fragmented user turns. Fixes the 8B re-asking for
|
||||
@@ -692,18 +701,25 @@ async def run_agent(transport, caller_number=None, call_sid=None, do_capture=Tru
|
||||
# an interruption to do, so don't broadcast them at all. UserStartedSpeakingFrame is still
|
||||
# emitted (SilenceWatchdog reset keeps working); if the caller talks over generation, both
|
||||
# replies simply play in order instead of one being thrown away.
|
||||
if HALF_DUPLEX:
|
||||
user_params = LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
start=[
|
||||
VADUserTurnStartStrategy(enable_interruptions=False),
|
||||
TranscriptionUserTurnStartStrategy(enable_interruptions=False),
|
||||
],
|
||||
),
|
||||
# Turn-stop: same smart-turn analyzer as the default, but with the INCOMPLETE-verdict
|
||||
# silence wait tuned down from 3s (see SMART_TURN_STOP_SECS above).
|
||||
stop_strategies = [
|
||||
TurnAnalyzerUserTurnStopStrategy(
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(
|
||||
params=SmartTurnParams(stop_secs=SMART_TURN_STOP_SECS)
|
||||
)
|
||||
)
|
||||
agg = LLMContextAggregatorPair(context, user_params=user_params)
|
||||
else:
|
||||
agg = LLMContextAggregatorPair(context)
|
||||
]
|
||||
user_params = LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
start=[
|
||||
VADUserTurnStartStrategy(enable_interruptions=False),
|
||||
TranscriptionUserTurnStartStrategy(enable_interruptions=False),
|
||||
] if HALF_DUPLEX else None, # None -> library defaults (interruptions on)
|
||||
stop=stop_strategies,
|
||||
),
|
||||
)
|
||||
agg = LLMContextAggregatorPair(context, user_params=user_params)
|
||||
# Deterministic slot memory: merges fragmented user turns + injects the live
|
||||
# collected/needed checklist into the system message before each generation.
|
||||
groomer = CallStateGroomer(
|
||||
|
||||
Reference in New Issue
Block a user