diff --git a/src/blackbox/tts.py b/src/blackbox/tts.py index b4d5b78..793567c 100644 --- a/src/blackbox/tts.py +++ b/src/blackbox/tts.py @@ -242,7 +242,8 @@ class TTS(Blackbox): elif chroma_collection_id == 'boss': if self.cosyvoice_mode == 'local': set_all_random_seed(35616313) - audio = self.cosyvoicetts.inference_sft(text, '中文男', speed=1.5, stream=False) + # audio = self.cosyvoicetts.inference_sft(text, '中文男', speed=1.5, stream=False) + audio = self.cosyvoicetts.inference_instruct2(text, '用粤语说这句话', self.prompt_speech_16k, stream=False) for i, j in enumerate(audio): f = io.BytesIO() sf.write(f, j['tts_speech'].cpu().numpy().squeeze(0), 22050, format='wav') @@ -262,8 +263,8 @@ class TTS(Blackbox): if self.cosyvoice_mode == 'local': set_all_random_seed(56056558) print("*"*90) - # audio = self.cosyvoicetts.inference_sft(text, self.cosyvoice_language, stream=True) - audio = self.cosyvoicetts.inference_instruct2(text, '用粤语说这句话', self.prompt_speech_16k, stream=False) + audio = self.cosyvoicetts.inference_sft(text, self.cosyvoice_language, stream=True) + # audio = self.cosyvoicetts.inference_instruct2(text, '用粤语说这句话', self.prompt_speech_16k, stream=False) # for i, j in enumerate(audio): # f = io.BytesIO() # sf.write(f, j['tts_speech'].cpu().numpy().squeeze(0), 22050, format='wav') @@ -311,7 +312,8 @@ class TTS(Blackbox): elif chroma_collection_id == 'boss': if self.cosyvoice_mode == 'local': set_all_random_seed(35616313) - audio = self.cosyvoicetts.inference_sft(text, '中文男', speed=1.5, stream=False) + # audio = self.cosyvoicetts.inference_sft(text, '中文男', speed=1.5, stream=False) + audio = self.cosyvoicetts.inference_instruct2(text, '用粤语说这句话', self.prompt_speech_16k, stream=False) for i, j in enumerate(audio): f = io.BytesIO() sf.write(f, j['tts_speech'].cpu().numpy().squeeze(0), 22050, format='wav')