# Qwen3-ASR https://github.com/QwenLM/Qwen3-ASR ## 📦 Version History | Version | Date | Summary | |---------|------------|---------------------------------| | 0.0.1 | 2026-04-22 | Initial version | ### 🔄 Version Details #### 🆕 0.0.1 – *2026-04-22* - ✅ **Core Features** - Initial Qwen3-ASR integration --- # Start ```bash docker pull harbor.bwgdi.com/library/qwen3-asr:0.0.1 # Run with custom model path # -e ASR_MODEL_PATH: Model name or local path inside container docker run -d --restart always -p 5051:5000 --gpus '"device=2"' \ -e ASR_MODEL_PATH="Qwen/Qwen3-ASR-1.7B" \ -e GPU_MEMORY_UTILIZATION=0.8 \ --mount type=bind,source=/path/to/your/models,target=/models \ harbor.bwgdi.com/library/qwen3-asr:0.0.1 ``` # Usage ## Non-streaming (HTTP POST) Transcribe an entire audio file. ```bash curl -X POST http://localhost:8000/asr/transcribe \ -F "file=@audio.wav" \ -F "language=Chinese" ``` ## Streaming (WebSocket) Real-time incremental transcription. - **URL**: `ws://localhost:8000/asr/stream` - **Protocol**: - Client sends `bytes`: float32 PCM 16kHz audio chunks. - Client sends `text`: `{"command": "finish"}` to stop. - Server sends `text`: `{"session_id": ..., "language": ..., "text": ..., "is_final": bool}` Example using Python `websockets`: ```python # coding=utf-8 import argparse import asyncio import io import json import logging import urllib.request import numpy as np import soundfile as sf import websockets # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) async def stream_audio_to_api(uri: str, audio_path: str, chunk_size_ms: int = 500): """ Load audio and stream it in chunks to the ASR WebSocket API. """ logger.info(f"Loading audio from {audio_path}...") # Load audio data if audio_path.startswith("http"): # Download from URL req = urllib.request.Request(audio_path, headers={"User-Agent": "Mozilla/5.0"}) with urllib.request.urlopen(req, timeout=30) as resp: audio_bytes = resp.read() f = io.BytesIO(audio_bytes) else: # Load local file f = audio_path # Read audio as Float32 wav, sr = sf.read(f, dtype="float32", always_2d=False) # Simple resample to 16k if needed (for better accuracy) if sr != 16000: logger.warning(f"Audio sample rate is {sr}, resampling to 16000...") dur = wav.shape[0] / float(sr) n16 = int(round(dur * 16000)) x_old = np.linspace(0.0, dur, num=wav.shape[0], endpoint=False) x_new = np.linspace(0.0, dur, num=n16, endpoint=False) wav = np.interp(x_new, x_old, wav).astype(np.float32) sr = 16000 # Calculate samples per chunk chunk_samples = int(sr * chunk_size_ms / 1000) logger.info(f"Connecting to WebSocket at {uri}...") try: async with websockets.connect(uri) as websocket: logger.info("Connected. Streaming audio...") pos = 0 call_id = 0 while pos < len(wav): chunk = wav[pos : pos + chunk_samples] pos += len(chunk) call_id += 1 # Send binary Float32 data await websocket.send(chunk.tobytes()) # Wait for immediate response (intermediate result) try: response = await asyncio.wait_for(websocket.recv(), timeout=2.0) result = json.loads(response) if "error" in result: logger.error(f"API Error: {result['error']}") return lang = result.get("language", "unknown") text = result.get("text", "") print(f"[Chunk {call_id:03d}] Lang: {lang:7s} | Text: {text}") except asyncio.TimeoutError: logger.warning(f"Timeout waiting for response on chunk {call_id}") # Optional: simulate real-time performance # await asyncio.sleep(chunk_size_ms / 1000) # Send finish command logger.info("Finished streaming audio. Sending 'finish' command...") await websocket.send(json.dumps({"command": "finish"})) # Wait for final response try: final_response = await asyncio.wait_for(websocket.recv(), timeout=5.0) final_result = json.loads(final_response) print("\n" + "="*50) print("FINAL RESULT:") print(f"Language: {final_result.get('language')}") print(f"Text: {final_result.get('text')}") print("="*50) except asyncio.TimeoutError: logger.error("Timeout waiting for final response") except Exception as e: logger.error(f"WebSocket Error: {e}") def main(): parser = argparse.ArgumentParser(description="Qwen3-ASR Streaming API Client Test") parser.add_argument("--url", default="ws://localhost:8000/asr/stream", help="WebSocket API URI") parser.add_argument("--audio", default="https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen3-ASR-Repo/asr_en.wav", help="Path or URL to audio file") parser.add_argument("--chunk-ms", type=int, default=1000, help="Chunk size in milliseconds") args = parser.parse_args() asyncio.run(stream_audio_to_api(args.url, args.audio, args.chunk_ms)) if __name__ == "__main__": main() ```