From 94e2ca1902a6d5817933a1ebbe5ba75d0bd420e1 Mon Sep 17 00:00:00 2001
From: tocmo0nlord <mr.garcia09@gmail.com>
Date: Sat, 4 Jul 2026 03:23:03 +0000
Subject: [PATCH] Cut smart-turn INCOMPLETE wait 3s -> 1.5s
 (SMART_TURN_STOP_SECS)

Follow-up test call: no more cancelled replies, but 3-5s response gaps on
turns the smart-turn model judged INCOMPLETE ("I'm due to my annual exam.") -
it waited the library-default 3s of silence before triggering the LLM. Build
the stop strategy explicitly with SmartTurnParams(stop_secs=1.5), env-tunable.
A caller who really does resume just yields a follow-up turn, which is safe
now that interruption broadcasts are off.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .env.example |  3 +++
 CLAUDE.md    |  9 +++++++++
 bot.py       | 38 +++++++++++++++++++++++++++-----------
 3 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/.env.example b/.env.example
index 3199d4c..f648600 100644
--- a/.env.example
+++ b/.env.example
@@ -68,6 +68,9 @@ VAD_CONFIDENCE=0.5
 VAD_MIN_VOLUME=0.15
 VAD_START_SECS=0.1
 VAD_STOP_SECS=0.5
+# Extra silence the smart-turn analyzer waits when it judges an utterance INCOMPLETE
+# before ending the turn anyway (library default 3s caused ~3.5s dead air on some turns).
+#SMART_TURN_STOP_SECS=1.5
 # Deterministic slot memory (callstate.py): injects an ALREADY-COLLECTED / STILL-NEEDED
 # checklist into the system prompt each turn + merges VAD-fragmented user turns, so the
 # local 8B stops re-asking for name/reason/phone. Default: on for ollama, off for anthropic.
diff --git a/CLAUDE.md b/CLAUDE.md
index 506ab4d..4fc24fb 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -96,6 +96,15 @@ interruption broadcasts at all (there's nothing legitimate for them to do in a n
 bot). `UserStartedSpeakingFrame` is still emitted, so the watchdog reset keeps working. If
 the caller talks over generation, both replies play in order instead of one being dropped.
 
+**Smart-turn INCOMPLETE wait tuned 3s → 1.5s (2026-07-04, `SMART_TURN_STOP_SECS`).** The
+follow-up test call had no cancellations but still 3–5s gaps on some turns: the smart-turn
+model judged utterances like "I'm due to my annual exam." INCOMPLETE and then waited the
+library-default 3s of silence ("End of Turn complete due to stop_secs. Silence in ms: 3032")
+before triggering the LLM. The stop strategy is now built explicitly with
+`LocalSmartTurnAnalyzerV3(params=SmartTurnParams(stop_secs=1.5))`. Worst-case perceived
+response gap drops from ~3.5s+synthesis to ~2s+synthesis; a caller who really does resume
+just produces a follow-up turn (safe now that interruptions are off).
+
 **`CallStateGroomer` (`callstate.py`) — deterministic slot memory (2026-07-03).** Fixes the
 8B re-asking for things the caller already gave (name, reason, phone — seen repeatedly in the
 historical call logs: "Didn't you say you had my phone number?", "I already gave you my full
diff --git a/bot.py b/bot.py
index d36e45b..8a56a51 100644
--- a/bot.py
+++ b/bot.py
@@ -43,10 +43,13 @@ from pipecat.processors.aggregators.llm_response_universal import (
     LLMContextAggregatorPair,
     LLMUserAggregatorParams,
 )
+from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
+from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
 from pipecat.turns.user_start import (
     TranscriptionUserTurnStartStrategy,
     VADUserTurnStartStrategy,
 )
+from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
 from pipecat.turns.user_turn_strategies import UserTurnStrategies
 from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
 from pipecat.processors.audio.vad_processor import VADProcessor
@@ -129,6 +132,12 @@ ECHO_TAIL_SECS = float(os.environ.get("ECHO_TAIL_SECS", "0.25"))
 SILENCE_WATCHDOG = os.environ.get("SILENCE_WATCHDOG", "true").lower() not in ("false", "0", "no")
 SILENCE_REPROMPT_SECS = float(os.environ.get("SILENCE_REPROMPT_SECS", "7.0"))
 MAX_REPROMPTS = int(os.environ.get("MAX_REPROMPTS", "2"))
+# When the smart-turn model judges an utterance INCOMPLETE (trailing intonation), it waits
+# this much extra silence before ending the turn anyway. The library default of 3s produced
+# 3.5s of dead air on turns like "I'm due to my annual exam." (live call 2026-07-04). 1.5s
+# keeps some room for the caller to finish a thought without the reply feeling stalled; with
+# interruptions off, a caller who does continue simply gets a second reply in order.
+SMART_TURN_STOP_SECS = float(os.environ.get("SMART_TURN_STOP_SECS", "1.5"))
 # Deterministic slot-state tracking (callstate.py): after each agent turn, extract what the
 # caller already provided and inject an explicit ALREADY-COLLECTED / STILL-NEEDED checklist
 # into the system message, plus merge VAD-fragmented user turns. Fixes the 8B re-asking for
@@ -692,18 +701,25 @@ async def run_agent(transport, caller_number=None, call_sid=None, do_capture=Tru
     # an interruption to do, so don't broadcast them at all. UserStartedSpeakingFrame is still
     # emitted (SilenceWatchdog reset keeps working); if the caller talks over generation, both
     # replies simply play in order instead of one being thrown away.
-    if HALF_DUPLEX:
-        user_params = LLMUserAggregatorParams(
-            user_turn_strategies=UserTurnStrategies(
-                start=[
-                    VADUserTurnStartStrategy(enable_interruptions=False),
-                    TranscriptionUserTurnStartStrategy(enable_interruptions=False),
-                ],
-            ),
+    # Turn-stop: same smart-turn analyzer as the default, but with the INCOMPLETE-verdict
+    # silence wait tuned down from 3s (see SMART_TURN_STOP_SECS above).
+    stop_strategies = [
+        TurnAnalyzerUserTurnStopStrategy(
+            turn_analyzer=LocalSmartTurnAnalyzerV3(
+                params=SmartTurnParams(stop_secs=SMART_TURN_STOP_SECS)
+            )
         )
-        agg = LLMContextAggregatorPair(context, user_params=user_params)
-    else:
-        agg = LLMContextAggregatorPair(context)
+    ]
+    user_params = LLMUserAggregatorParams(
+        user_turn_strategies=UserTurnStrategies(
+            start=[
+                VADUserTurnStartStrategy(enable_interruptions=False),
+                TranscriptionUserTurnStartStrategy(enable_interruptions=False),
+            ] if HALF_DUPLEX else None,  # None -> library defaults (interruptions on)
+            stop=stop_strategies,
+        ),
+    )
+    agg = LLMContextAggregatorPair(context, user_params=user_params)
     # Deterministic slot memory: merges fragmented user turns + injects the live
     # collected/needed checklist into the system message before each generation.
     groomer = CallStateGroomer(