Deterministic phone confirmation safety net + docs

EndCallProcessor now guarantees the callback number is confirmed on booking
calls: the 8B reads it back only ~half the time, so if a closing is reached on a
booking call (booking keyword seen) without the agent having spoken the number
(phone_marker absent from its replies), the hang-up is suppressed and a scripted
confirmation line (caller-ID spelled out) is injected as a TTSSpeakFrame first.
The agent's own readback satisfies the gate (no double-ask); info-only calls are
never asked for a number. Runtime-tested all four paths (inject / no-inject /
info-only / inject-then-end).

CLAUDE.md: document the safety net, the "never claim a booking" rule, the direct
phone-confirm phrasing, and the insurance "never say we accept" rule.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
tocmo0nlord
2026-06-27 15:52:22 +00:00
parent 1e0472e864
commit d7bfe2dbe8
2 changed files with 79 additions and 24 deletions

72
bot.py
View File

@@ -203,20 +203,35 @@ def _build_tools() -> ToolsSchema:
class EndCallProcessor(FrameProcessor):
"""Lets the agent hang up. MUST sit between the LLM and the TTS: there it sees her reply
text (LLMTextFrame, flowing downstream) AND the upstream copy of BotStoppedSpeakingFrame
the output transport emits. It accumulates each reply; if the finished reply contains a
closing ('goodbye'/'adiós'), it waits until she's done speaking, pauses HANGUP_DELAY_SECS
so the caller isn't clipped, then pushes EndTaskFrame upstream — the task ends and
TwilioFrameSerializer (auto_hang_up) drops the call."""
"""Lets the agent hang up AND guarantees the callback number is confirmed once.
Sits between the LLM and the TTS: it sees reply text (LLMTextFrame, downstream) and the
upstream BotStoppedSpeakingFrame. On a closing ('goodbye'/'adiós') it waits for TTS to
finish, pauses HANGUP_DELAY_SECS so the caller isn't clipped, then pushes EndTaskFrame
(TwilioFrameSerializer auto_hang_up drops the call).
Deterministic phone confirmation: the prompt asks the agent to read the callback number
back, but the 8B skips it ~half the time. So if a closing is reached and the agent never
spoke the number this call (`phone_marker` not seen in its replies), we suppress the
hang-up and inject a scripted confirmation turn first — guaranteeing it happens exactly
once (the agent's own readback satisfies the gate, so no double-ask in the common case)."""
_CLOSINGS = ("goodbye", "good-bye", "good bye", "adiós", "adios", "hasta luego")
# Only force phone confirmation when a booking was actually underway (not info-only calls).
_BOOKING_KWS = ("appointment", "schedule", "book", "insurance", "what day", "what time",
"come in", "preferred")
def __init__(self):
def __init__(self, phone_confirm_line: str | None = None, phone_marker: str | None = None):
super().__init__()
self._buf = ""
self._should_end = False
self._end_task = None
self._phone_confirm_line = phone_confirm_line
self._phone_marker = (phone_marker or "").lower()
# Nothing to confirm (no caller ID) → treat as already handled.
self._phone_confirmed = not phone_confirm_line
self._assistant_seen = ""
self._pending_phone_inject = False
@classmethod
def _is_closing(cls, text: str) -> bool:
@@ -235,17 +250,31 @@ class EndCallProcessor(FrameProcessor):
await super().process_frame(frame, direction)
if isinstance(frame, LLMTextFrame):
self._buf += frame.text
self._assistant_seen += frame.text.lower()
if self._phone_marker and self._phone_marker in self._assistant_seen:
self._phone_confirmed = True # the agent read the number back itself
elif isinstance(frame, LLMFullResponseEndFrame):
if self._is_closing(self._buf):
self._should_end = True
logger.info(f"{AGENT_NAME} signalled closing -- will hang up "
f"{HANGUP_DELAY_SECS:.0f}s after she finishes speaking")
booking = any(k in self._assistant_seen for k in self._BOOKING_KWS)
if self._phone_confirmed or not booking:
self._should_end = True
logger.info(f"{AGENT_NAME} signalled closing -- will hang up "
f"{HANGUP_DELAY_SECS:.0f}s after she finishes speaking")
else:
# Booking call closing without the number confirmed — do it deterministically.
self._pending_phone_inject = True
logger.info(f"{AGENT_NAME} reached closing w/o phone confirmation -- injecting it")
self._buf = ""
elif isinstance(frame, BotStoppedSpeakingFrame) and self._should_end:
self._should_end = False
# Schedule the teardown so we don't block the pipeline during the grace pause.
if self._end_task is None:
self._end_task = asyncio.create_task(self._hang_up_after_delay())
elif isinstance(frame, BotStoppedSpeakingFrame):
if self._pending_phone_inject:
self._pending_phone_inject = False
self._phone_confirmed = True
await self.push_frame(TTSSpeakFrame(self._phone_confirm_line), FrameDirection.DOWNSTREAM)
elif self._should_end:
self._should_end = False
# Schedule the teardown so we don't block the pipeline during the grace pause.
if self._end_task is None:
self._end_task = asyncio.create_task(self._hang_up_after_delay())
await self.push_frame(frame, direction)
@@ -455,7 +484,18 @@ async def run_agent(transport, caller_number=None, call_sid=None, do_capture=Tru
context_kwargs["tools"] = _build_tools()
context = LLMContext(**context_kwargs)
agg = LLMContextAggregatorPair(context)
endcall = EndCallProcessor()
# Deterministic phone-confirmation safety net: if the agent reaches a closing without
# having read the caller-ID back, EndCallProcessor speaks this scripted line first.
if caller_number:
_spoken = _spoken_phone(caller_number)
phone_confirm_line = (
f"Before you go, let me make sure I have the best number to reach you: "
f"{_spoken}. Is that correct?"
)
phone_marker = _spoken.split(",")[0].strip() # e.g. "nine seven three"
else:
phone_confirm_line = phone_marker = None
endcall = EndCallProcessor(phone_confirm_line=phone_confirm_line, phone_marker=phone_marker)
pipeline = Pipeline(
[