Streaming STT Quickstart

This tutorial shows how to connect to the Truebar streaming API for speech-to-text (STT) without relying on any additional frameworks. You will authenticate, open a WebSocket pipeline, stream audio, and print interim and final transcripts.

All commands assume the following environment variables are set:

export TRUEBAR_USERNAME="alice@example.com"export TRUEBAR_PASSWORD="super-secret"export TRUEBAR_CLIENT_ID="truebar-client"export TRUEBAR_AUTH_URL="https://auth.true-bar.si/realms/truebar/protocol/openid-connect/token"export TRUEBAR_STT_WS_URL="wss://api.true-bar.si/api/pipelines/stream"

Swap the hostnames if you are targeting a playground or bespoke environment.

1. Prepare audio#

Truebar expects mono 16 kHz PCM. Convert any existing WAV/MP3 file before running the samples:

ffmpeg -i sample.wav -ac 1 -ar 16000 -f s16le sample.pcm

You can also capture audio from the microphone—the browser-focused guides cover that flow in detail.

Voice tag

Before you run the samples, export TRUEBAR_ASR_TAG with the online ASR stage you want to use (see GET /api/pipelines/stages or copy the tag from your existing .env.truebar). The default KALDI:en-US:*:* works only if that stage exists in your tenant.

2. Run the sample#

JavaScript (Node.js)
Python

Install dependencies and run the script:

npm install ws axiosnode stt.js

stt.js

import axios from "axios";import WebSocket from "ws";import { readFileSync } from "node:fs";
const tokensToText = (tokens: any[]) => {  let output = "";  let prevRight = false;  tokens?.forEach((token: any, index: number) => {    const text = token?.text ?? "";    if (!text) return;    const left = Boolean(token?.isLeftHanded);    if (index > 0 && !prevRight && !left) {      output += " ";    }    output += text;    prevRight = Boolean(token?.isRightHanded);  });  return output;};
async function fetchToken() {  const form = new URLSearchParams({    grant_type: "password",    username: process.env.TRUEBAR_USERNAME!,    password: process.env.TRUEBAR_PASSWORD!,    client_id: process.env.TRUEBAR_CLIENT_ID ?? "truebar-client",  });
  const { data } = await axios.post(process.env.TRUEBAR_AUTH_URL!, form, {    headers: { "Content-Type": "application/x-www-form-urlencoded" },  });
  return data.access_token as string;}
const token = await fetchToken();const ws = new WebSocket(process.env.TRUEBAR_STT_WS_URL!, {  headers: { Authorization: `Bearer ${token}` },});const pcm = readFileSync("sample.pcm");const chunkSize = 3200 * 2; // 100 ms @ 16 kHz (16-bit samples)let streamed = false;
ws.on("message", (payload, isBinary) => {  if (isBinary) return;  const msg = JSON.parse(payload.toString());
  if (msg.type === "STATUS") console.log("STATUS:", msg.status);  if (msg.type === "TEXT_SEGMENT") {    const text = tokensToText(msg.textSegment.tokens);    console.log(msg.textSegment.isFinal ? "FINAL" : "INTERIM", "-", text);  }
  if (msg.type === "STATUS" && msg.status === "CONFIGURED" && !streamed) {    streamed = true;    for (let offset = 0; offset < pcm.length; offset += chunkSize) {      ws.send(pcm.subarray(offset, offset + chunkSize));    }    ws.send(JSON.stringify({ type: "EOS", lockSession: false }));  }
  if (msg.type === "STATUS" && msg.status === "FINISHED") {    ws.close();  }});
ws.once("message", () => {  ws.send(    JSON.stringify({      type: "CONFIG",      pipeline: [        {          task: "ASR",          exceptionHandlingPolicy: "THROW",          config: {            tag: process.env.TRUEBAR_ASR_TAG ?? "KALDI:en-US:*:*",            parameters: { enableInterims: true },          },        },      ],    }),  );});
ws.on("open", () => console.log("STT stream connected"));ws.on("close", () => console.log("STT stream closed"));ws.on("error", (err) => console.error("STT error", err));

Install dependencies and run the script:

pip install websockets aiohttp soundfile numpypython stt.py

stt.py

import asyncioimport jsonimport os
import aiohttpimport soundfile as sfimport websockets

async def fetch_token(session: aiohttp.ClientSession) -> str:    payload = {        "grant_type": "password",        "username": os.environ["TRUEBAR_USERNAME"],        "password": os.environ["TRUEBAR_PASSWORD"],        "client_id": os.getenv("TRUEBAR_CLIENT_ID", "truebar-client"),    }    async with session.post(os.environ["TRUEBAR_AUTH_URL"], data=payload) as resp:        resp.raise_for_status()        data = await resp.json()        return data["access_token"]

def tokens_to_text(tokens: list[dict[str, object]]) -> str:    pieces: list[str] = []    prev_right = False
    for idx, token in enumerate(tokens):        text = str(token.get("text", "") or "")        if not text:            continue
        left = bool(token.get("isLeftHanded", False))        if idx > 0 and not prev_right and not left:            pieces.append(" ")
        pieces.append(text)        prev_right = bool(token.get("isRightHanded", False))
    return "".join(pieces)

async def main() -> None:    async with aiohttp.ClientSession() as session:        token = await fetch_token(session)
    ws_url = f"{os.environ['TRUEBAR_STT_WS_URL']}?access_token={token}"    pcm, sample_rate = sf.read("sample.wav", dtype="int16")    assert sample_rate == 16000, "Convert audio to 16 kHz mono before streaming"
    async with websockets.connect(ws_url) as ws:        configured = asyncio.Event()
        async def sender():            await configured.wait()            await ws.send(json.dumps({                "type": "CONFIG",                "pipeline": [                    {                        "task": "ASR",                        "exceptionHandlingPolicy": "THROW",                        "config": {                            "tag": os.getenv("TRUEBAR_ASR_TAG", "KALDI:en-US:*:*"),                            "parameters": {"enableInterims": True},                        },                    }                ],            }))
            chunk_samples = 3200  # 100 ms            for start in range(0, len(pcm), chunk_samples):                await ws.send(pcm[start:start + chunk_samples].tobytes())            await ws.send(json.dumps({"type": "EOS", "lockSession": False}))
        async def receiver():            async for message in ws:                if isinstance(message, bytes):                    continue                msg = json.loads(message)                if msg["type"] == "STATUS":                    status = msg["status"]                    print("STATUS:", status)                    if status in {"INITIALIZED", "CONFIG_REQUIRED"}:                        configured.set()                    if status == "FINISHED":                        break                if msg["type"] == "ERROR":                    configured.set()                    raise RuntimeError(f"Streaming pipeline error: {msg}")                if msg["type"] == "TEXT_SEGMENT":                    tokens = msg["textSegment"]["tokens"]                    text = tokens_to_text(tokens)                    label = "FINAL" if msg["textSegment"]["isFinal"] else "INTERIM"                    print(f"{label} - {text}")
        await asyncio.gather(sender(), receiver())

if __name__ == "__main__":    asyncio.run(main())

Session cleanup

Always close the stream with {"type": "EOS", "lockSession": false}. Switch lockSession to true only when you intend to resume the same session later; otherwise keep it false so Truebar frees the pipeline immediately.

3. Next steps#

Need microphone capture, diarisation, or browser-specific logic? Continue with the Streaming STT guide.
To record transcriptions, query the History API after closing the session.
Ready for synthesis? Jump to the Streaming TTS quickstart.