Streaming STT Quickstart
This tutorial shows how to connect to the Truebar streaming API for speech-to-text (STT) without relying on any additional frameworks. You will authenticate, open a WebSocket pipeline, stream audio, and print interim and final transcripts.
All commands assume the following environment variables are set:
export TRUEBAR_USERNAME="alice@example.com"export TRUEBAR_PASSWORD="super-secret"export TRUEBAR_CLIENT_ID="truebar-client"export TRUEBAR_AUTH_URL="https://auth.true-bar.si/realms/truebar/protocol/openid-connect/token"export TRUEBAR_STT_WS_URL="wss://api.true-bar.si/api/pipelines/stream"Swap the hostnames if you are targeting a playground or bespoke environment.
1. Prepare audio#
Truebar expects mono 16Â kHz PCM. Convert any existing WAV/MP3 file before running the samples:
ffmpeg -i sample.wav -ac 1 -ar 16000 -f s16le sample.pcmYou can also capture audio from the microphone—the browser-focused guides cover that flow in detail.
Voice tag
Before you run the samples, export TRUEBAR_ASR_TAG with the online ASR stage you want to use (see GET /api/pipelines/stages or copy the tag from your existing .env.truebar). The KALDI:en-US:*:* tag used within examples works only if it is available for your account.
2. Run the sample#
- JavaScript (Node.js)
- Python
Install dependencies and run the script:
npm install ws axiosnode stt.jsimport axios from "axios";import WebSocket from "ws";import { readFileSync } from "node:fs";
const tokensToText = (tokens: any[]) => { let output = ""; let prevRight = false; tokens?.forEach((token: any, index: number) => { const text = token?.text ?? ""; if (!text) return; const left = Boolean(token?.isLeftHanded); if (index > 0 && !prevRight && !left) { output += " "; } output += text; prevRight = Boolean(token?.isRightHanded); }); return output;};
async function fetchToken() { const form = new URLSearchParams({ grant_type: "password", username: process.env.TRUEBAR_USERNAME!, password: process.env.TRUEBAR_PASSWORD!, client_id: process.env.TRUEBAR_CLIENT_ID ?? "truebar-client", });
const { data } = await axios.post(process.env.TRUEBAR_AUTH_URL!, form, { headers: { "Content-Type": "application/x-www-form-urlencoded" }, });
return data.access_token as string;}
const token = await fetchToken();const ws = new WebSocket(process.env.TRUEBAR_STT_WS_URL!, { headers: { Authorization: `Bearer ${token}` },});const pcm = readFileSync("sample.pcm");const chunkSize = 3200 * 2; // 100 ms @ 16 kHz (16-bit samples)let streamed = false;
ws.on("message", (payload, isBinary) => { if (isBinary) return; const msg = JSON.parse(payload.toString());
if (msg.type === "STATUS") console.log("STATUS:", msg.status); if (msg.type === "TEXT_SEGMENT") { const text = tokensToText(msg.textSegment.tokens); console.log(msg.textSegment.isFinal ? "FINAL" : "INTERIM", "-", text); }
if (msg.type === "STATUS" && msg.status === "CONFIGURED" && !streamed) { streamed = true; for (let offset = 0; offset < pcm.length; offset += chunkSize) { ws.send(pcm.subarray(offset, offset + chunkSize)); } ws.send(JSON.stringify({ type: "EOS", lockSession: false })); }
if (msg.type === "STATUS" && msg.status === "FINISHED") { ws.close(); }});
ws.once("message", () => { ws.send( JSON.stringify({ type: "CONFIG", pipeline: [ { task: "ASR", exceptionHandlingPolicy: "THROW", config: { tag: process.env.TRUEBAR_ASR_TAG ?? "KALDI:en-US:*:*", parameters: { enableInterims: true }, }, }, ], }), );});
ws.on("open", () => console.log("STT stream connected"));ws.on("close", () => console.log("STT stream closed"));ws.on("error", (err) => console.error("STT error", err));Install dependencies and run the script:
pip install websockets aiohttp soundfile numpypython stt.pyimport asyncioimport jsonimport os
import aiohttpimport soundfile as sfimport websockets
async def fetch_token(session: aiohttp.ClientSession) -> str: payload = { "grant_type": "password", "username": os.environ["TRUEBAR_USERNAME"], "password": os.environ["TRUEBAR_PASSWORD"], "client_id": os.getenv("TRUEBAR_CLIENT_ID", "truebar-client"), } async with session.post(os.environ["TRUEBAR_AUTH_URL"], data=payload) as resp: resp.raise_for_status() data = await resp.json() return data["access_token"]
def tokens_to_text(tokens: list[dict[str, object]]) -> str: pieces: list[str] = [] prev_right = False
for idx, token in enumerate(tokens): text = str(token.get("text", "") or "") if not text: continue
left = bool(token.get("isLeftHanded", False)) if idx > 0 and not prev_right and not left: pieces.append(" ")
pieces.append(text) prev_right = bool(token.get("isRightHanded", False))
return "".join(pieces)
async def main() -> None: async with aiohttp.ClientSession() as session: token = await fetch_token(session)
ws_url = f"{os.environ['TRUEBAR_STT_WS_URL']}?access_token={token}" pcm, sample_rate = sf.read("sample.wav", dtype="int16") assert sample_rate == 16000, "Convert audio to 16 kHz mono before streaming"
async with websockets.connect(ws_url) as ws: configured = asyncio.Event()
async def sender(): await configured.wait() await ws.send(json.dumps({ "type": "CONFIG", "pipeline": [ { "task": "ASR", "exceptionHandlingPolicy": "THROW", "config": { "tag": os.getenv("TRUEBAR_ASR_TAG", "KALDI:en-US:*:*"), "parameters": {"enableInterims": True}, }, } ], }))
chunk_samples = 3200 # 100 ms for start in range(0, len(pcm), chunk_samples): await ws.send(pcm[start:start + chunk_samples].tobytes()) await ws.send(json.dumps({"type": "EOS", "lockSession": False}))
async def receiver(): async for message in ws: if isinstance(message, bytes): continue msg = json.loads(message) if msg["type"] == "STATUS": status = msg["status"] print("STATUS:", status) if status in {"INITIALIZED"}: configured.set() if status == "FINISHED": break if msg["type"] == "ERROR": configured.set() raise RuntimeError(f"Streaming pipeline error: {msg}") if msg["type"] == "TEXT_SEGMENT": tokens = msg["textSegment"]["tokens"] text = tokens_to_text(tokens) label = "FINAL" if msg["textSegment"]["isFinal"] else "INTERIM" print(f"{label} - {text}")
await asyncio.gather(sender(), receiver())
if __name__ == "__main__": asyncio.run(main())Session cleanup
Always close the stream with {"type": "EOS", "lockSession": false} and wait for {"type": "STATUS", "status": FINISHED} which indicates the API has processed and flushed all data that was sent.
Switch lockSession to true only when you intend to resume or edit the same session later.
3. Next steps#
- Need microphone capture, diarisation, or browser-specific logic? Continue with the Streaming STT guide.
- To record transcriptions, query the History API after closing the session.
- Ready for synthesis? Jump to the Streaming TTS quickstart.