Streaming STT Quickstart
This tutorial shows how to connect to the Truebar streaming API for speech-to-text (STT) without relying on any additional frameworks. You will authenticate, open a WebSocket pipeline, stream audio, and print interim and final transcripts.
All commands assume the following environment variables are set:
export TRUEBAR_USERNAME="alice@example.com"export TRUEBAR_PASSWORD="super-secret"export TRUEBAR_CLIENT_ID="truebar-client"export TRUEBAR_AUTH_URL="https://auth.true-bar.si/realms/truebar/protocol/openid-connect/token"export TRUEBAR_STT_WS_URL="wss://api.true-bar.si/api/pipelines/stream"
Swap the hostnames if you are targeting a playground or bespoke environment.
#
1. Prepare audioTruebar expects mono 16Â kHz PCM. Convert any existing WAV/MP3 file before running the samples:
ffmpeg -i sample.wav -ac 1 -ar 16000 -f s16le sample.pcm
You can also capture audio from the microphone—the browser-focused guides cover that flow in detail.
Voice tag
Before you run the samples, export TRUEBAR_ASR_TAG
with the online ASR stage you want to use (see GET /api/pipelines/stages
or copy the tag from your existing .env.truebar
). The default KALDI:en-US:*:*
works only if that stage exists in your tenant.
#
2. Run the sample- JavaScript (Node.js)
- Python
Install dependencies and run the script:
npm install ws axiosnode stt.js
import axios from "axios";import WebSocket from "ws";import { readFileSync } from "node:fs";
const tokensToText = (tokens: any[]) => { let output = ""; let prevRight = false; tokens?.forEach((token: any, index: number) => { const text = token?.text ?? ""; if (!text) return; const left = Boolean(token?.isLeftHanded); if (index > 0 && !prevRight && !left) { output += " "; } output += text; prevRight = Boolean(token?.isRightHanded); }); return output;};
async function fetchToken() { const form = new URLSearchParams({ grant_type: "password", username: process.env.TRUEBAR_USERNAME!, password: process.env.TRUEBAR_PASSWORD!, client_id: process.env.TRUEBAR_CLIENT_ID ?? "truebar-client", });
const { data } = await axios.post(process.env.TRUEBAR_AUTH_URL!, form, { headers: { "Content-Type": "application/x-www-form-urlencoded" }, });
return data.access_token as string;}
const token = await fetchToken();const ws = new WebSocket(process.env.TRUEBAR_STT_WS_URL!, { headers: { Authorization: `Bearer ${token}` },});const pcm = readFileSync("sample.pcm");const chunkSize = 3200 * 2; // 100 ms @ 16 kHz (16-bit samples)let streamed = false;
ws.on("message", (payload, isBinary) => { if (isBinary) return; const msg = JSON.parse(payload.toString());
if (msg.type === "STATUS") console.log("STATUS:", msg.status); if (msg.type === "TEXT_SEGMENT") { const text = tokensToText(msg.textSegment.tokens); console.log(msg.textSegment.isFinal ? "FINAL" : "INTERIM", "-", text); }
if (msg.type === "STATUS" && msg.status === "CONFIGURED" && !streamed) { streamed = true; for (let offset = 0; offset < pcm.length; offset += chunkSize) { ws.send(pcm.subarray(offset, offset + chunkSize)); } ws.send(JSON.stringify({ type: "EOS", lockSession: false })); }
if (msg.type === "STATUS" && msg.status === "FINISHED") { ws.close(); }});
ws.once("message", () => { ws.send( JSON.stringify({ type: "CONFIG", pipeline: [ { task: "ASR", exceptionHandlingPolicy: "THROW", config: { tag: process.env.TRUEBAR_ASR_TAG ?? "KALDI:en-US:*:*", parameters: { enableInterims: true }, }, }, ], }), );});
ws.on("open", () => console.log("STT stream connected"));ws.on("close", () => console.log("STT stream closed"));ws.on("error", (err) => console.error("STT error", err));
Install dependencies and run the script:
pip install websockets aiohttp soundfile numpypython stt.py
import asyncioimport jsonimport os
import aiohttpimport soundfile as sfimport websockets
async def fetch_token(session: aiohttp.ClientSession) -> str: payload = { "grant_type": "password", "username": os.environ["TRUEBAR_USERNAME"], "password": os.environ["TRUEBAR_PASSWORD"], "client_id": os.getenv("TRUEBAR_CLIENT_ID", "truebar-client"), } async with session.post(os.environ["TRUEBAR_AUTH_URL"], data=payload) as resp: resp.raise_for_status() data = await resp.json() return data["access_token"]
def tokens_to_text(tokens: list[dict[str, object]]) -> str: pieces: list[str] = [] prev_right = False
for idx, token in enumerate(tokens): text = str(token.get("text", "") or "") if not text: continue
left = bool(token.get("isLeftHanded", False)) if idx > 0 and not prev_right and not left: pieces.append(" ")
pieces.append(text) prev_right = bool(token.get("isRightHanded", False))
return "".join(pieces)
async def main() -> None: async with aiohttp.ClientSession() as session: token = await fetch_token(session)
ws_url = f"{os.environ['TRUEBAR_STT_WS_URL']}?access_token={token}" pcm, sample_rate = sf.read("sample.wav", dtype="int16") assert sample_rate == 16000, "Convert audio to 16 kHz mono before streaming"
async with websockets.connect(ws_url) as ws: configured = asyncio.Event()
async def sender(): await configured.wait() await ws.send(json.dumps({ "type": "CONFIG", "pipeline": [ { "task": "ASR", "exceptionHandlingPolicy": "THROW", "config": { "tag": os.getenv("TRUEBAR_ASR_TAG", "KALDI:en-US:*:*"), "parameters": {"enableInterims": True}, }, } ], }))
chunk_samples = 3200 # 100 ms for start in range(0, len(pcm), chunk_samples): await ws.send(pcm[start:start + chunk_samples].tobytes()) await ws.send(json.dumps({"type": "EOS", "lockSession": False}))
async def receiver(): async for message in ws: if isinstance(message, bytes): continue msg = json.loads(message) if msg["type"] == "STATUS": status = msg["status"] print("STATUS:", status) if status in {"INITIALIZED", "CONFIG_REQUIRED"}: configured.set() if status == "FINISHED": break if msg["type"] == "ERROR": configured.set() raise RuntimeError(f"Streaming pipeline error: {msg}") if msg["type"] == "TEXT_SEGMENT": tokens = msg["textSegment"]["tokens"] text = tokens_to_text(tokens) label = "FINAL" if msg["textSegment"]["isFinal"] else "INTERIM" print(f"{label} - {text}")
await asyncio.gather(sender(), receiver())
if __name__ == "__main__": asyncio.run(main())
Session cleanup
Always close the stream with {"type": "EOS", "lockSession": false}
. Switch lockSession
to true
only when you intend to resume the same session later; otherwise keep it false
so Truebar frees the pipeline immediately.
#
3. Next steps- Need microphone capture, diarisation, or browser-specific logic? Continue with the Streaming STT guide.
- To record transcriptions, query the History API after closing the session.
- Ready for synthesis? Jump to the Streaming TTS quickstart.