Fix phone readback, lead-with-number flow, and AVA pronunciation

- Phone: inject the caller-ID into the prompt already spelled digit-by-digit so
  the model repeats clean words instead of mangling raw digits (it had emitted
  "197-three five seven three..." -> Kokoro read "one hundred ninety-seven").
- Flow: stop leading with the phone number. Prompt now flows naturally and
  saves the callback-number confirmation for the END; the caller-ID line says
  not to recite it early. Verified 3/3 openings no longer recite the number.
- Name: Kokoro spelled all-caps "AVA" as "A-V-A". Respell to AGENT_NAME_SPOKEN
  (default "Ava") in TTS only; logs/Odoo keep AGENT_NAME. Override e.g.
  AGENT_NAME_SPOKEN=Eva for an "EE-vuh" sound.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
tocmo0nlord
2026-06-26 02:08:52 +00:00
parent 24d4efd7ed
commit 703c902d0f

59
bot.py
View File

@@ -104,6 +104,10 @@ VAD_STOP_SECS = float(os.environ.get("VAD_STOP_SECS", "0.5"))
# Agent persona name — purely for warmth; change/remove freely.
AGENT_NAME = os.environ.get("AGENT_NAME", "Sofia")
# How the name should be SPOKEN. Kokoro reads all-caps "AVA" as letters ("A-V-A"), so we
# respell it as a word for TTS only (logs/Odoo keep AGENT_NAME). Override to taste, e.g.
# AGENT_NAME_SPOKEN=Eva for an "EE-vuh" sound.
AGENT_NAME_SPOKEN = os.environ.get("AGENT_NAME_SPOKEN", "Ava")
# Grace period after the agent finishes the goodbye before we drop the carrier leg, so
# the caller isn't cut off mid-word. The hang-up itself (EndTaskFrame -> auto_hang_up)
# is unchanged — this only delays it.
@@ -115,21 +119,17 @@ SYSTEM_PROMPT = (
"talk like a helpful human being: natural, relaxed, and genuinely conversational. Keep every "
"reply to ONE short sentence — two at the very most, never a paragraph. Speak in English. Say "
"numbers, dates, and times as words a person would say.\n\n"
"Your job is to answer callers' questions and to take appointment requests. For a "
"booking, gather these SIX things naturally as the conversation flows — don't "
"interrogate, and never ask for something the caller already told you:\n"
" 1. Their FULL name (first and last). If they give only a first name, warmly ask for "
"their last name too.\n"
" 2. The phone number to reach them. Their caller-ID number is given to you below — read "
"it back and ask if that is the best number. If they say no, ask for the right number and "
"use that instead.\n"
" 3. Which office or city is most convenient.\n"
" 4. The reason for the visit.\n"
" 5. Their insurance — ask what insurance they have and simply note it (see the insurance "
"rule below).\n"
" 6. The day and time they prefer (take it in their own words — see the date rule below).\n"
"When you have the details, repeat them back in one warm sentence to confirm, and let them "
"know a staff member will call to finalize the time.\n\n"
"Your job is to answer callers' questions and take appointment requests. Have a natural, "
"flowing conversation: follow the caller's lead, help with what they ask, and gather the "
"booking details as they naturally come up — never interrogate, and never re-ask for "
"something they already told you. To book a visit you need their FULL name (first and "
"last — if they give only a first name, warmly ask their last name), which office or city, "
"the reason for the visit, their insurance (log only — see below), and their preferred day "
"and time.\n"
"Do NOT open the call or early turns by reciting their phone number. Leave the phone number "
"for the END: once the other details are gathered, confirm the callback number in one short "
"line (it's given to you below), then repeat the details back in one warm sentence and let "
"them know a staff member will call to finalize.\n\n"
"Stay truthful and within your limits:\n"
"- Use ONLY the facts below for addresses, phone numbers, insurance, and services. Never "
"make any of these up.\n"
@@ -301,8 +301,10 @@ def _say_digits(s: str) -> str:
return " ".join(_DIGIT_WORDS[c] for c in s if c in _DIGIT_WORDS)
def _phone_to_words(m: re.Match) -> str:
d = re.sub(r"\D", "", m.group(0))
def _spoken_phone(number: str) -> str:
"""Phone number as grouped, digit-by-digit words: '+19735731671' ->
'nine seven three, five seven three, one six seven one'."""
d = re.sub(r"\D", "", number or "")
if len(d) == 11 and d[0] == "1": # drop US country code
d = d[1:]
if len(d) == 10: # group as area / prefix / line for natural cadence
@@ -310,8 +312,15 @@ def _phone_to_words(m: re.Match) -> str:
return _say_digits(d)
def _phone_to_words(m: re.Match) -> str:
return _spoken_phone(m.group(0))
def tts_normalize(text: str) -> str:
"""Make phone numbers, street numbers, and zips speak naturally (digit by digit)."""
"""Make phone numbers, street numbers, and zips speak naturally (digit by digit), and
respell the all-caps agent name so it's said as a word, not letter-by-letter."""
if AGENT_NAME != AGENT_NAME_SPOKEN:
text = re.sub(rf"\b{re.escape(AGENT_NAME)}\b", AGENT_NAME_SPOKEN, text)
text = _PHONE_RE.sub(_phone_to_words, text)
text = _LONGNUM_RE.sub(lambda m: _say_digits(m.group(0)), text)
return text
@@ -408,16 +417,20 @@ async def run_agent(transport, caller_number=None, call_sid=None, do_capture=Tru
)))
heartbeat = AudioHeartbeat()
# Per-call system message = static prompt + the caller-ID number to confirm.
# Per-call system message = static prompt + the caller-ID number to confirm. Inject it
# ALREADY spelled out digit-by-digit so the model repeats clean words instead of mangling
# the raw digits (e.g. reading 197 as "one hundred ninety-seven").
if caller_number:
caller_line = (
f"\n\nCALLER ID: the caller's number on file is {caller_number}. Read it back and "
"ask if it's the best number to reach them; if they say no, use the number they give."
f"\n\nCALLER ID: the caller's number on file, written so you read it digit by digit, "
f"is: {_spoken_phone(caller_number)}. When it's time to confirm it (near the end), say "
"it back exactly like that and ask if it's the best number; if they say no, use the "
"number they give. Do not say it any earlier in the call."
)
else:
caller_line = (
"\n\nCALLER ID: no number is available — ask the caller for the best phone number "
"to reach them."
"\n\nCALLER ID: no number is available — near the end, ask the caller for the best "
"phone number to reach them."
)
system_content = SYSTEM_PROMPT + caller_line
context_kwargs = {"messages": [{"role": "system", "content": system_content}]}