livekit_agents/test_asr.py

import asyncio
import logging
import wave
from custom_agent import SenseVoiceSTT
from livekit import rtc
from livekit.agents import utils

# 设置日志级别以查看输出
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("test-asr")

async def test():
    # 替换为你本地的一个音频文件路径
    audio_path = "/home/verachen/Music/voice/2food.wav"

    # 初始化 ASR
    stt = SenseVoiceSTT(url="http://10.6.80.21:5003/asr-blackbox")

    print(f"Testing ASR connectivity with file: {audio_path}")

    try:
        # 读取音频文件
        with wave.open(audio_path, 'rb') as wf:
            frames = wf.readframes(wf.getnframes())
            # 简单构造一个 AudioBuffer (假设是单声道 16kHz)
            # 实际上 SenseVoiceSTT._recognize_impl 会用 combine_audio_frames(buffer).to_wav_bytes()
            # 所以我们需要传递一个包含 AudioFrame 的 list

            # 这里我们模拟一个 Frame
            frame = rtc.AudioFrame(
                data=frames,
                sample_rate=wf.getframerate(),
                num_channels=wf.getnchannels(),
                samples_per_channel=wf.getnframes()
            )

            # 调用 recognize
            result = await stt.recognize(buffer=[frame])

            if result.alternatives:
                print(f"\n--- ASR Result ---")
                print(f"Text: {result.alternatives[0].text}")
                print(f"------------------\n")
            else:
                print("ASR returned no text.")

    except FileNotFoundError:
        print(f"Error: Audio file not found at {audio_path}")
    except Exception as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    asyncio.run(test())