54 lines
1.8 KiB
Python
54 lines
1.8 KiB
Python
import asyncio
|
|
import logging
|
|
import wave
|
|
from custom_agent import SenseVoiceSTT
|
|
from livekit import rtc
|
|
from livekit.agents import utils
|
|
|
|
# 设置日志级别以查看输出
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger("test-asr")
|
|
|
|
async def test():
|
|
# 替换为你本地的一个音频文件路径
|
|
audio_path = "/home/verachen/Music/voice/2food.wav"
|
|
|
|
# 初始化 ASR
|
|
stt = SenseVoiceSTT(url="http://10.6.80.21:5003/asr-blackbox")
|
|
|
|
print(f"Testing ASR connectivity with file: {audio_path}")
|
|
|
|
try:
|
|
# 读取音频文件
|
|
with wave.open(audio_path, 'rb') as wf:
|
|
frames = wf.readframes(wf.getnframes())
|
|
# 简单构造一个 AudioBuffer (假设是单声道 16kHz)
|
|
# 实际上 SenseVoiceSTT._recognize_impl 会用 combine_audio_frames(buffer).to_wav_bytes()
|
|
# 所以我们需要传递一个包含 AudioFrame 的 list
|
|
|
|
# 这里我们模拟一个 Frame
|
|
frame = rtc.AudioFrame(
|
|
data=frames,
|
|
sample_rate=wf.getframerate(),
|
|
num_channels=wf.getnchannels(),
|
|
samples_per_channel=wf.getnframes()
|
|
)
|
|
|
|
# 调用 recognize
|
|
result = await stt.recognize(buffer=[frame])
|
|
|
|
if result.alternatives:
|
|
print(f"\n--- ASR Result ---")
|
|
print(f"Text: {result.alternatives[0].text}")
|
|
print(f"------------------\n")
|
|
else:
|
|
print("ASR returned no text.")
|
|
|
|
except FileNotFoundError:
|
|
print(f"Error: Audio file not found at {audio_path}")
|
|
except Exception as e:
|
|
print(f"An error occurred: {e}")
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(test())
|