diff --git a/bot.py b/bot.py index c777d1d..96d5587 100644 --- a/bot.py +++ b/bot.py @@ -104,6 +104,10 @@ VAD_STOP_SECS = float(os.environ.get("VAD_STOP_SECS", "0.5")) # Agent persona name — purely for warmth; change/remove freely. AGENT_NAME = os.environ.get("AGENT_NAME", "Sofia") +# How the name should be SPOKEN. Kokoro reads all-caps "AVA" as letters ("A-V-A"), so we +# respell it as a word for TTS only (logs/Odoo keep AGENT_NAME). Override to taste, e.g. +# AGENT_NAME_SPOKEN=Eva for an "EE-vuh" sound. +AGENT_NAME_SPOKEN = os.environ.get("AGENT_NAME_SPOKEN", "Ava") # Grace period after the agent finishes the goodbye before we drop the carrier leg, so # the caller isn't cut off mid-word. The hang-up itself (EndTaskFrame -> auto_hang_up) # is unchanged — this only delays it. @@ -115,21 +119,17 @@ SYSTEM_PROMPT = ( "talk like a helpful human being: natural, relaxed, and genuinely conversational. Keep every " "reply to ONE short sentence — two at the very most, never a paragraph. Speak in English. Say " "numbers, dates, and times as words a person would say.\n\n" - "Your job is to answer callers' questions and to take appointment requests. For a " - "booking, gather these SIX things naturally as the conversation flows — don't " - "interrogate, and never ask for something the caller already told you:\n" - " 1. Their FULL name (first and last). If they give only a first name, warmly ask for " - "their last name too.\n" - " 2. The phone number to reach them. Their caller-ID number is given to you below — read " - "it back and ask if that is the best number. If they say no, ask for the right number and " - "use that instead.\n" - " 3. Which office or city is most convenient.\n" - " 4. The reason for the visit.\n" - " 5. Their insurance — ask what insurance they have and simply note it (see the insurance " - "rule below).\n" - " 6. The day and time they prefer (take it in their own words — see the date rule below).\n" - "When you have the details, repeat them back in one warm sentence to confirm, and let them " - "know a staff member will call to finalize the time.\n\n" + "Your job is to answer callers' questions and take appointment requests. Have a natural, " + "flowing conversation: follow the caller's lead, help with what they ask, and gather the " + "booking details as they naturally come up — never interrogate, and never re-ask for " + "something they already told you. To book a visit you need their FULL name (first and " + "last — if they give only a first name, warmly ask their last name), which office or city, " + "the reason for the visit, their insurance (log only — see below), and their preferred day " + "and time.\n" + "Do NOT open the call or early turns by reciting their phone number. Leave the phone number " + "for the END: once the other details are gathered, confirm the callback number in one short " + "line (it's given to you below), then repeat the details back in one warm sentence and let " + "them know a staff member will call to finalize.\n\n" "Stay truthful and within your limits:\n" "- Use ONLY the facts below for addresses, phone numbers, insurance, and services. Never " "make any of these up.\n" @@ -301,8 +301,10 @@ def _say_digits(s: str) -> str: return " ".join(_DIGIT_WORDS[c] for c in s if c in _DIGIT_WORDS) -def _phone_to_words(m: re.Match) -> str: - d = re.sub(r"\D", "", m.group(0)) +def _spoken_phone(number: str) -> str: + """Phone number as grouped, digit-by-digit words: '+19735731671' -> + 'nine seven three, five seven three, one six seven one'.""" + d = re.sub(r"\D", "", number or "") if len(d) == 11 and d[0] == "1": # drop US country code d = d[1:] if len(d) == 10: # group as area / prefix / line for natural cadence @@ -310,8 +312,15 @@ def _phone_to_words(m: re.Match) -> str: return _say_digits(d) +def _phone_to_words(m: re.Match) -> str: + return _spoken_phone(m.group(0)) + + def tts_normalize(text: str) -> str: - """Make phone numbers, street numbers, and zips speak naturally (digit by digit).""" + """Make phone numbers, street numbers, and zips speak naturally (digit by digit), and + respell the all-caps agent name so it's said as a word, not letter-by-letter.""" + if AGENT_NAME != AGENT_NAME_SPOKEN: + text = re.sub(rf"\b{re.escape(AGENT_NAME)}\b", AGENT_NAME_SPOKEN, text) text = _PHONE_RE.sub(_phone_to_words, text) text = _LONGNUM_RE.sub(lambda m: _say_digits(m.group(0)), text) return text @@ -408,16 +417,20 @@ async def run_agent(transport, caller_number=None, call_sid=None, do_capture=Tru ))) heartbeat = AudioHeartbeat() - # Per-call system message = static prompt + the caller-ID number to confirm. + # Per-call system message = static prompt + the caller-ID number to confirm. Inject it + # ALREADY spelled out digit-by-digit so the model repeats clean words instead of mangling + # the raw digits (e.g. reading 197 as "one hundred ninety-seven"). if caller_number: caller_line = ( - f"\n\nCALLER ID: the caller's number on file is {caller_number}. Read it back and " - "ask if it's the best number to reach them; if they say no, use the number they give." + f"\n\nCALLER ID: the caller's number on file, written so you read it digit by digit, " + f"is: {_spoken_phone(caller_number)}. When it's time to confirm it (near the end), say " + "it back exactly like that and ask if it's the best number; if they say no, use the " + "number they give. Do not say it any earlier in the call." ) else: caller_line = ( - "\n\nCALLER ID: no number is available — ask the caller for the best phone number " - "to reach them." + "\n\nCALLER ID: no number is available — near the end, ask the caller for the best " + "phone number to reach them." ) system_content = SYSTEM_PROMPT + caller_line context_kwargs = {"messages": [{"role": "system", "content": system_content}]}