feat: supported different models
This commit is contained in:
@ -1,50 +1,66 @@
|
||||
import asyncio
|
||||
import os
|
||||
import logging
|
||||
from tts_voxcpm import VoxCPMTTS
|
||||
from livekit.agents import tts
|
||||
import os
|
||||
|
||||
from tts import BlackboxTTS
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
|
||||
async def test_tts():
|
||||
# Use the URL from the user's curl command
|
||||
url = "http://10.6.80.21:5002/tts-blackbox"
|
||||
|
||||
|
||||
# Check if we have a real wav file to test with
|
||||
# In the earlier find_by_name, we found tests/change-sophie.wav
|
||||
prompt_wav = "/home/verachen/Music/voice/2food.wav"
|
||||
prompt_wav = "/home/verachen/Music/voice/2food.wav"
|
||||
if not os.path.exists(prompt_wav):
|
||||
prompt_wav = "/home/verachen/Music/voice/2food.wav" # fallback to the one in curl
|
||||
prompt_wav = "/home/verachen/Music/voice/2food.wav" # fallback to the one in curl
|
||||
|
||||
print(f"Testing VoxCPMTTS with URL: {url}")
|
||||
print(f"Testing BlackboxTTS with URL: {url}")
|
||||
print(f"Using prompt wav: {prompt_wav}")
|
||||
|
||||
vox_tts = VoxCPMTTS(
|
||||
|
||||
blackbox_tts = BlackboxTTS(
|
||||
url=url,
|
||||
prompt_wav_path=prompt_wav
|
||||
model_name="voxcpmtts",
|
||||
prompt_wav_path=prompt_wav,
|
||||
params={
|
||||
"streaming": "false",
|
||||
"prompt_text": "澳门有乜嘢好食嘅",
|
||||
"cfg_value": "2.0",
|
||||
"inference_timesteps": "10",
|
||||
"do_normalize": "true",
|
||||
"denoise": "true",
|
||||
"retry_badcase": "true",
|
||||
"retry_badcase_max_times": "3",
|
||||
"retry_badcase_ratio_threshold": "6.0",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
text = "你好,这是一段测试文本"
|
||||
print(f"Synthesizing text: {text}")
|
||||
|
||||
|
||||
try:
|
||||
stream = vox_tts.synthesize(text)
|
||||
stream = blackbox_tts.synthesize(text)
|
||||
audio_frame = await stream.collect()
|
||||
|
||||
print(f"Successfully synthesized audio!")
|
||||
print(f"Audio duration: {audio_frame.sample_rate * len(audio_frame.data) / (audio_frame.num_channels * 2)} samples?")
|
||||
|
||||
print("Successfully synthesized audio!")
|
||||
print(
|
||||
f"Audio duration: {audio_frame.sample_rate * len(audio_frame.data) / (audio_frame.num_channels * 2)} samples?"
|
||||
)
|
||||
# Actually AudioFrame has duration or samples
|
||||
print(f"Samples: {len(audio_frame.data) // 2}")
|
||||
|
||||
|
||||
# Save to file for manual check if possible
|
||||
with open("test_output.wav", "wb") as f:
|
||||
# This won't be a valid WAV yet if it's just raw PCM,
|
||||
# This won't be a valid WAV yet if it's just raw PCM,
|
||||
# but if collect() returns combined frames, we can use to_wav_bytes()
|
||||
f.write(audio_frame.to_wav_bytes())
|
||||
print("Saved output to test_output.wav")
|
||||
|
||||
|
||||
except Exception as e:
|
||||
print(f"TTS test failed: {e}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(test_tts())
|
||||
|
||||
Reference in New Issue
Block a user