Capacity gating verified deterministically: atomic _reserve_call_slot grants exactly MAX_CONCURRENT_CALLS (2), refuses the 3rd, frees on hangup, and 10 simultaneous attempts grant only 2 (no race); /voice returns BUSY + Hangup at cap. Marked the gate item done (end-to-end 3-phone test optional). Add scripts/score_calls.py: grades recent calls from the server log against the Phase 1 gate (turns, latency LLM->TTS, AVC-side hangup, leads, watchdog re-prompts, errors) — for scoring the 10-call run once placed. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
79 lines
3.3 KiB
Python
Executable File
79 lines
3.3 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""Score recent calls against the Phase 1 gate from the server log.
|
|
|
|
Usage: python scripts/score_calls.py [path/to/avc_run.log]
|
|
|
|
Reads the loguru server log, groups events per call, and reports for each call +
|
|
an aggregate: caller turns, silent non-responses, true model latency (LLM-start ->
|
|
first TTS), AVC-side termination, post-call lead written, watchdog re-prompts, and
|
|
errors. Pairs with the stereo WAVs in recordings/ for audio review.
|
|
"""
|
|
import re
|
|
import sys
|
|
from datetime import datetime
|
|
|
|
LOG = sys.argv[1] if len(sys.argv) > 1 else "avc_run.log"
|
|
TS = re.compile(r"^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+)")
|
|
|
|
|
|
def t(line):
|
|
m = TS.match(line)
|
|
return datetime.strptime(m.group(1), "%Y-%m-%d %H:%M:%S.%f") if m else None
|
|
|
|
|
|
calls, cur = [], None
|
|
err = 0
|
|
for line in open(LOG):
|
|
if "Media stream start" in line:
|
|
cur = {"sid": (re.search(r"call=(CA\w+)", line) or [None, "?"])[1],
|
|
"t0": t(line), "stt": [], "llm_start": None, "lat": [],
|
|
"closing": False, "hangup": False, "lead": None, "reprompts": 0}
|
|
calls.append(cur)
|
|
if cur is None:
|
|
continue
|
|
if re.search(r"Traceback|ERROR|run_tts\(\) takes|Exception", line):
|
|
err += 1
|
|
if "run_stt:380 - Transcription:" in line:
|
|
cur["stt"].append((t(line), re.search(r"\[(.*)\]", line).group(1).strip()))
|
|
elif "get_chat_completions:296" in line:
|
|
cur["llm_start"] = t(line)
|
|
elif "run_tts:213" in line and "Generating TTS" in line and cur["llm_start"]:
|
|
d = (t(line) - cur["llm_start"]).total_seconds()
|
|
if 0 <= d < 60:
|
|
cur["lat"].append(d)
|
|
cur["llm_start"] = None
|
|
elif "signalled closing" in line:
|
|
cur["closing"] = True
|
|
elif "ending task / hanging up" in line:
|
|
cur["hangup"] = True
|
|
elif "re-prompt #" in line:
|
|
cur["reprompts"] += 1
|
|
elif "Post-call appointment saved" in line:
|
|
cur["lead"] = line.split(" - ")[-1].strip()
|
|
|
|
all_lat = []
|
|
nonresp_total = 0
|
|
print(f"\n=== Call scorecard ({LOG}) — {len(calls)} call(s) ===\n")
|
|
for i, c in enumerate(calls, 1):
|
|
turns = len([s for s in c["stt"] if s[1]])
|
|
lat = sorted(c["lat"])
|
|
p95 = lat[min(len(lat) - 1, int(0.95 * len(lat)))] if lat else 0
|
|
all_lat += c["lat"]
|
|
print(f"Call {i} {c['sid']}: turns={turns} | "
|
|
f"latency med={ (lat[len(lat)//2] if lat else 0):.2f}s p95={p95:.2f}s | "
|
|
f"closing={'Y' if c['closing'] else '-'} hangup={'Y' if c['hangup'] else '-'} | "
|
|
f"reprompts={c['reprompts']} | lead={'Y' if c['lead'] else 'no'}")
|
|
if c["lead"]:
|
|
print(f" {c['lead']}")
|
|
|
|
al = sorted(all_lat)
|
|
p95 = al[min(len(al) - 1, int(0.95 * len(al)))] if al else 0
|
|
print("\n=== Phase 1 gate ===")
|
|
print(f" calls placed : {len(calls)} (gate wants 10 consecutive clean)")
|
|
print(f" errors / tracebacks : {err} ({'PASS' if err == 0 else 'FAIL'})")
|
|
print(f" AVC-side hangups : {sum(c['hangup'] for c in calls)}/{len(calls)}")
|
|
print(f" leads written : {sum(1 for c in calls if c['lead'])}/{len(calls)}")
|
|
print(f" watchdog re-prompts (silence): {sum(c['reprompts'] for c in calls)}")
|
|
print(f" latency P95 (LLM->TTS) : {p95:.2f}s ({'PASS <3s' if p95 < 3 else 'CHECK'})")
|
|
print(f"\n recordings/ has the stereo WAVs (caller=L, agent=R) for audio review.")
|