import asyncio import logging import wave from custom_agent import SenseVoiceSTT from livekit import rtc from livekit.agents import utils # 设置日志级别以查看输出 logging.basicConfig(level=logging.INFO) logger = logging.getLogger("test-asr") async def test(): # 替换为你本地的一个音频文件路径 audio_path = "/home/verachen/Music/voice/2food.wav" # 初始化 ASR stt = SenseVoiceSTT(url="http://10.6.80.21:5003/asr-blackbox") print(f"Testing ASR connectivity with file: {audio_path}") try: # 读取音频文件 with wave.open(audio_path, 'rb') as wf: frames = wf.readframes(wf.getnframes()) # 简单构造一个 AudioBuffer (假设是单声道 16kHz) # 实际上 SenseVoiceSTT._recognize_impl 会用 combine_audio_frames(buffer).to_wav_bytes() # 所以我们需要传递一个包含 AudioFrame 的 list # 这里我们模拟一个 Frame frame = rtc.AudioFrame( data=frames, sample_rate=wf.getframerate(), num_channels=wf.getnchannels(), samples_per_channel=wf.getnframes() ) # 调用 recognize result = await stt.recognize(buffer=[frame]) if result.alternatives: print(f"\n--- ASR Result ---") print(f"Text: {result.alternatives[0].text}") print(f"------------------\n") else: print("ASR returned no text.") except FileNotFoundError: print(f"Error: Audio file not found at {audio_path}") except Exception as e: print(f"An error occurred: {e}") if __name__ == "__main__": asyncio.run(test())