#!/usr/bin/env bash # # Launches Claude Code with AssemblyAI (u3-rt-pro) as the voice transcription backend. # # Claude Code streams voice audio over a WebSocket to an Anthropic endpoint. This script # runs a lightweight local bridge that accepts that connection and relays audio to # AssemblyAI's streaming API, translating the protocol in both directions. # # Usage: # ASSEMBLYAI_API_KEY= ./assemblyai-claude-voice.sh [claude args...] # # Debug: # ASSEMBLYAI_DEBUG=1 ASSEMBLYAI_API_KEY= ./assemblyai-claude-voice.sh # tail -f /tmp/assemblyai-bridge.log set -euo pipefail : "${ASSEMBLYAI_API_KEY:?Set ASSEMBLYAI_API_KEY to your AssemblyAI API key}" command -v node &>/dev/null || { echo "Error: node is required."; exit 1; } # --- Bootstrap a temp directory with the ws dependency --- BRIDGE_DIR=$(mktemp -d /tmp/assemblyai-bridge.XXXXXX) BRIDGE_LOG="/tmp/assemblyai-bridge.log" trap 'rm -rf "$BRIDGE_DIR"; [ -n "${BRIDGE_PID:-}" ] && kill "$BRIDGE_PID" 2>/dev/null || true' EXIT (cd "$BRIDGE_DIR" && npm init -y --silent && npm install ws --silent) >/dev/null 2>&1 # --- Write the bridge server --- cat > "$BRIDGE_DIR/bridge.js" << 'EOF' const { WebSocketServer, WebSocket } = require("ws"); const { createServer } = require("http"); const querystring = require("querystring"); // -- Config ------------------------------------------------------------------ const AAI_KEY = process.env.ASSEMBLYAI_API_KEY; const AAI_ENDPOINT = "wss://streaming.assemblyai.com/v3/ws?" + querystring.stringify({ sample_rate: 16000, speech_model: "u3-rt-pro" }); const DEBUG = process.env.ASSEMBLYAI_DEBUG === "1"; // AssemblyAI accepts audio frames between 50–1000 ms. // Claude Code emits ~10 ms chunks, so we accumulate to 100 ms before sending. // 16 kHz × 16-bit × mono = 32 bytes/ms → 100 ms = 3 200 bytes, 50 ms = 1 600 bytes. const SEND_THRESHOLD = 3200; const MIN_FRAME = 1600; // -- Helpers ----------------------------------------------------------------- function log(...a) { if (DEBUG) console.error("[bridge]", ...a); } function safeSend(ws, data) { if (ws.readyState === WebSocket.OPEN) ws.send(data); } // -- Server ------------------------------------------------------------------ const httpServer = createServer((_, r) => { r.writeHead(404); r.end(); }); const wss = new WebSocketServer({ server: httpServer }); wss.on("connection", (claude) => { log("Claude Code connected"); const aai = new WebSocket(AAI_ENDPOINT, { headers: { Authorization: AAI_KEY } }); let aaiOpen = false; let pending = []; // audio buffers queued before AAI opens let accum = Buffer.alloc(0); // -- Audio buffering ------------------------------------------------------- function pushAudio(buf) { accum = Buffer.concat([accum, buf]); drainAudio(); } function drainAudio() { while (accum.length >= SEND_THRESHOLD) { safeSend(aai, accum.subarray(0, SEND_THRESHOLD)); accum = accum.subarray(SEND_THRESHOLD); } } function drainRemaining() { if (accum.length === 0) return; // Zero-pad to the 50 ms minimum if the tail is shorter if (accum.length < MIN_FRAME) { accum = Buffer.concat([accum, Buffer.alloc(MIN_FRAME - accum.length)]); } safeSend(aai, accum); accum = Buffer.alloc(0); } // -- AssemblyAI events ----------------------------------------------------- aai.on("open", () => { log("AssemblyAI connected"); aaiOpen = true; for (const c of pending) pushAudio(c); pending = []; }); aai.on("message", (raw) => { try { const msg = JSON.parse(raw); if (msg.type === "Begin") { log("Session", msg.id); } else if (msg.type === "Turn") { const text = msg.transcript || ""; if (!text) return; log(msg.end_of_turn ? "Final:" : "Interim:", text); safeSend(claude, JSON.stringify({ type: "TranscriptText", data: text })); if (msg.end_of_turn) { safeSend(claude, JSON.stringify({ type: "TranscriptEndpoint" })); } } else if (msg.type === "Termination") { log("Terminated:", msg.audio_duration_seconds + "s"); if (claude.readyState === WebSocket.OPEN) claude.close(1000); } } catch {} }); aai.on("error", (e) => { log("AAI error:", e.message); safeSend(claude, JSON.stringify({ type: "TranscriptError", description: e.message, error_code: "assemblyai_error", })); }); aai.on("close", (code, reason) => { log("AAI closed:", code, reason?.toString()); if (claude.readyState === WebSocket.OPEN) claude.close(1000); }); aai.on("unexpected-response", (_, res) => { let body = ""; res.on("data", (c) => body += c); res.on("end", () => { console.error(`[bridge] AAI rejected: HTTP ${res.statusCode} – ${body}`); safeSend(claude, JSON.stringify({ type: "TranscriptError", description: body, error_code: "assemblyai_rejected", })); if (claude.readyState === WebSocket.OPEN) claude.close(1000); }); }); // -- Claude Code events ---------------------------------------------------- claude.on("message", (data, isBinary) => { if (isBinary) { const buf = Buffer.from(data); aaiOpen ? pushAudio(buf) : pending.push(buf); return; } try { const msg = JSON.parse(data.toString()); if (msg.type === "CloseStream") { drainRemaining(); safeSend(aai, JSON.stringify({ type: "Terminate" })); } } catch {} }); claude.on("close", () => { log("Claude Code disconnected"); safeSend(aai, JSON.stringify({ type: "Terminate" })); setTimeout(() => { if (aai.readyState === WebSocket.OPEN) aai.close(); }, 2000); }); }); // -- Start ------------------------------------------------------------------- httpServer.listen(0, "127.0.0.1", () => { console.log("BRIDGE_PORT=" + httpServer.address().port); }); for (const sig of ["SIGTERM", "SIGINT"]) { process.on(sig, () => { wss.close(); httpServer.close(); process.exit(0); }); } EOF # --- Launch the bridge and wait for the port --------------------------------- (cd "$BRIDGE_DIR" && node bridge.js) >"$BRIDGE_DIR/port" 2>"$BRIDGE_LOG" & BRIDGE_PID=$! for _ in $(seq 1 50); do grep -q "BRIDGE_PORT=" "$BRIDGE_DIR/port" 2>/dev/null && break sleep 0.1 done BRIDGE_PORT=$(grep "BRIDGE_PORT=" "$BRIDGE_DIR/port" 2>/dev/null | head -1 | cut -d= -f2) if [ -z "$BRIDGE_PORT" ]; then echo "Bridge failed to start:" >&2 cat "$BRIDGE_LOG" >&2 exit 1 fi echo "AssemblyAI voice bridge on :$BRIDGE_PORT (log: $BRIDGE_LOG)" # --- Hand off to Claude Code ------------------------------------------------- exec env VOICE_STREAM_BASE_URL="ws://127.0.0.1:${BRIDGE_PORT}" claude "$@"