OpenAI Lid Client¶
Source https://github.com/vllm-project/vllm/blob/main/examples/online_serving/openai_lid_client.py.
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
Language Identification (LID) demo using the FireRedLID model on vLLM.
FireRedLID is an audio encoder-decoder model that identifies the spoken
language of an audio clip. Unlike ASR models that output full transcriptions,
FireRedLID outputs at most 2 tokens representing the detected language
(e.g. "en", "zh mandarin").
Start the vLLM server:
vllm serve PatchyTisa/FireRedLID-vllm
Then run this script:
# Use the built-in sample audio
python examples/online_serving/openai_lid_client.py
# Use your own audio file(s)
python examples/online_serving/openai_lid_client.py \
--audio_paths audio_en.wav audio_zh.wav audio_fr.wav
# Batch-identify multiple files in one run
python examples/online_serving/openai_lid_client.py \
--audio_paths /path/to/dir/*.wav
Requirements:
- vLLM with audio support
- openai Python SDK
- kaldi_native_fbank (pulled in by the model)
"""
import argparse
import json
import os
from openai import OpenAI
from vllm.assets.audio import AudioAsset
# ──────────────────────────────────────────────────────────────────────
# Helpers
# ──────────────────────────────────────────────────────────────────────
def identify_language(
audio_path: str,
client: OpenAI,
model: str,
) -> str:
"""
Send a single audio file to the vLLM transcription endpoint and return
the detected language tag.
FireRedLID re-uses the OpenAI-compatible ``/v1/audio/transcriptions``
endpoint. The "transcription" it returns is actually the language label
(e.g. ``"en"`` or ``"zh mandarin"``).
"""
with open(audio_path, "rb") as f:
result = client.audio.transcriptions.create(
file=f,
model=model,
response_format="json",
temperature=0.0,
)
return result.text.strip()
def identify_language_raw(
audio_path: str,
model: str,
api_base: str,
) -> str:
"""
Same as :func:`identify_language` but uses raw HTTP so that the demo
works without the ``openai`` SDK (useful for quick debugging).
"""
import requests
url = f"{api_base}/audio/transcriptions"
with open(audio_path, "rb") as f:
files = {"file": (os.path.basename(audio_path), f)}
data = {
"model": model,
"response_format": "json",
}
resp = requests.post(url, files=files, data=data)
resp.raise_for_status()
return resp.json()["text"].strip()
def identify_language_streaming(
audio_path: str,
model: str,
api_base: str,
) -> str:
"""
Streaming variant – demonstrates the streaming transcription endpoint.
For a 1-2 token output the stream finishes almost instantly, but this
shows that the API path works end-to-end.
"""
import requests
url = f"{api_base}/audio/transcriptions"
with open(audio_path, "rb") as f:
files = {"file": (os.path.basename(audio_path), f)}
data = {
"stream": "true",
"model": model,
"response_format": "json",
}
response = requests.post(url, files=files, data=data, stream=True)
response.raise_for_status()
tokens: list[str] = []
for chunk in response.iter_lines(
chunk_size=8192, decode_unicode=False, delimiter=b"\n"
):
if not chunk:
continue
payload = json.loads(chunk[len("data: ") :].decode("utf-8"))
choice = payload["choices"][0]
delta = choice.get("delta", {}).get("content", "")
if delta:
tokens.append(delta)
if choice.get("finish_reason") is not None:
break
return "".join(tokens).strip()
# ──────────────────────────────────────────────────────────────────────
# Main
# ──────────────────────────────────────────────────────────────────────
def main(args: argparse.Namespace) -> None:
api_base = args.api_base.rstrip("/")
client = OpenAI(api_key="EMPTY", base_url=api_base)
model = client.models.list().data[0].id
print(f"Model : {model}")
print(f"Server: {api_base}\n")
# Resolve audio paths ------------------------------------------------
if args.audio_paths:
audio_paths = args.audio_paths
else:
# Fall back to the built-in vLLM sample audios (both are English).
audio_paths = [
str(AudioAsset("mary_had_lamb").get_local_path()),
str(AudioAsset("winning_call").get_local_path()),
]
# Run LID for each file ----------------------------------------------
print(f"{'Audio File':<50} {'Language (sync)':<20} {'Language (stream)'}")
print("-" * 90)
for path in audio_paths:
basename = os.path.basename(path)
# 1) Synchronous via OpenAI SDK
lang_sync = identify_language(path, client, model)
# 2) Streaming via raw HTTP
lang_stream = identify_language_streaming(path, model, api_base)
print(f"{basename:<50} {lang_sync:<20} {lang_stream}")
print()
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="FireRedLID – Language Identification demo via vLLM",
)
parser.add_argument(
"--audio_paths",
nargs="+",
default=None,
help=(
"One or more audio files to identify. "
"If omitted, uses vLLM's built-in sample audios."
),
)
parser.add_argument(
"--api_base",
type=str,
default="http://localhost:8000/v1",
help="vLLM API base URL (default: http://localhost:8000/v1)",
)
args = parser.parse_args()
main(args)