diff --git a/CLAUDE.md b/CLAUDE.md
index 67e9f8c..506ab4d 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -83,6 +83,19 @@ audio while the bot is speaking (+`ECHO_TAIL_SECS`, default 0.5s) so echo never
 Trade-off: half-duplex — the caller can't barge in mid-utterance (fine for short replies).
 `HALF_DUPLEX=false` restores barge-in. Keep it on for telephony.
 
+**Interruption broadcasts OFF under half-duplex (2026-07-04).** The gate left one window
+open: between "LLM starts generating" and "first audio on the wire" the bot isn't speaking
+yet, so caller-side audio still reaches the VAD — and a false VAD blip (breath/background
+noise, no transcript ever produced) made the user aggregator `broadcast_interruption`,
+silently discarding the queued reply. Live call showed 20–35s of dead air, the caller saying
+"Hello?" and repeating themselves; SilenceWatchdog never fired because the cancelled reply
+never produced a `BotStoppedSpeakingFrame` to arm it. Fix: when `HALF_DUPLEX` is on, the
+aggregator is built with `VADUserTurnStartStrategy(enable_interruptions=False)` +
+`TranscriptionUserTurnStartStrategy(enable_interruptions=False)` — strict turn-taking, no
+interruption broadcasts at all (there's nothing legitimate for them to do in a no-barge-in
+bot). `UserStartedSpeakingFrame` is still emitted, so the watchdog reset keeps working. If
+the caller talks over generation, both replies play in order instead of one being dropped.
+
 **`CallStateGroomer` (`callstate.py`) — deterministic slot memory (2026-07-03).** Fixes the
 8B re-asking for things the caller already gave (name, reason, phone — seen repeatedly in the
 historical call logs: "Didn't you say you had my phone number?", "I already gave you my full
diff --git a/bot.py b/bot.py
index 9f2d613..d36e45b 100644
--- a/bot.py
+++ b/bot.py
@@ -39,7 +39,15 @@ from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
-from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.processors.aggregators.llm_response_universal import (
+    LLMContextAggregatorPair,
+    LLMUserAggregatorParams,
+)
+from pipecat.turns.user_start import (
+    TranscriptionUserTurnStartStrategy,
+    VADUserTurnStartStrategy,
+)
+from pipecat.turns.user_turn_strategies import UserTurnStrategies
 from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
 from pipecat.processors.audio.vad_processor import VADProcessor
 from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
@@ -675,7 +683,27 @@ async def run_agent(transport, caller_number=None, call_sid=None, do_capture=Tru
     if ENABLE_TOOLS:
         context_kwargs["tools"] = _build_tools()
     context = LLMContext(**context_kwargs)
-    agg = LLMContextAggregatorPair(context)
+    # STRICT TURN-TAKING — no interruption broadcasts (live-call diagnosis 2026-07-04):
+    # interruptions are VAD-driven and fire on ANY turn start. HalfDuplexGate already blocks
+    # barge-in while the bot SPEAKS, but between "LLM starts generating" and "first audio on
+    # the wire" the gate is open — a false VAD blip (breath/background noise, no transcript) in that
+    # window broadcast an interruption that silently discarded the queued reply: caller heard
+    # 20-35s of dead air and said "Hello?". With HALF_DUPLEX there is nothing legitimate for
+    # an interruption to do, so don't broadcast them at all. UserStartedSpeakingFrame is still
+    # emitted (SilenceWatchdog reset keeps working); if the caller talks over generation, both
+    # replies simply play in order instead of one being thrown away.
+    if HALF_DUPLEX:
+        user_params = LLMUserAggregatorParams(
+            user_turn_strategies=UserTurnStrategies(
+                start=[
+                    VADUserTurnStartStrategy(enable_interruptions=False),
+                    TranscriptionUserTurnStartStrategy(enable_interruptions=False),
+                ],
+            ),
+        )
+        agg = LLMContextAggregatorPair(context, user_params=user_params)
+    else:
+        agg = LLMContextAggregatorPair(context)
     # Deterministic slot memory: merges fragmented user turns + injects the live
     # collected/needed checklist into the system message before each generation.
     groomer = CallStateGroomer(