fix: tts

2025-12-13 16:53:24 +00:00 · 2025-04-07 19:26:04 +08:00
parent ff170ece19
commit e07cf12ae7
1 changed files with 6 additions and 4 deletions
--- a/src/blackbox/tts.py
+++ b/src/blackbox/tts.py
@ -242,7 +242,8 @@ class TTS(Blackbox):
            elif chroma_collection_id == 'boss':
                if self.cosyvoice_mode == 'local':
                    set_all_random_seed(35616313)
-                    audio = self.cosyvoicetts.inference_sft(text, '中文男', speed=1.5, stream=False)
+                    # audio = self.cosyvoicetts.inference_sft(text, '中文男', speed=1.5, stream=False)
                    audio = self.cosyvoicetts.inference_instruct2(text, '用粤语说这句话', self.prompt_speech_16k, stream=False)
                    for i, j in enumerate(audio):
                        f = io.BytesIO()
                        sf.write(f, j['tts_speech'].cpu().numpy().squeeze(0), 22050, format='wav')
@ -262,8 +263,8 @@ class TTS(Blackbox):
                if self.cosyvoice_mode == 'local':
                    set_all_random_seed(56056558)
                    print("*"*90)
-                    # audio = self.cosyvoicetts.inference_sft(text, self.cosyvoice_language, stream=True)
+                    audio = self.cosyvoicetts.inference_sft(text, self.cosyvoice_language, stream=True)
-                    audio = self.cosyvoicetts.inference_instruct2(text, '用粤语说这句话', self.prompt_speech_16k, stream=False)
+                    # audio = self.cosyvoicetts.inference_instruct2(text, '用粤语说这句话', self.prompt_speech_16k, stream=False)
                    # for i, j in enumerate(audio):
                    #     f = io.BytesIO()
                    #     sf.write(f, j['tts_speech'].cpu().numpy().squeeze(0), 22050, format='wav')   
@ -311,7 +312,8 @@ class TTS(Blackbox):
            elif chroma_collection_id == 'boss':
                if self.cosyvoice_mode == 'local':
                    set_all_random_seed(35616313)
-                    audio = self.cosyvoicetts.inference_sft(text, '中文男', speed=1.5, stream=False)
+                    # audio = self.cosyvoicetts.inference_sft(text, '中文男', speed=1.5, stream=False)
                    audio = self.cosyvoicetts.inference_instruct2(text, '用粤语说这句话', self.prompt_speech_16k, stream=False)
                    for i, j in enumerate(audio):
                        f = io.BytesIO()
                        sf.write(f, j['tts_speech'].cpu().numpy().squeeze(0), 22050, format='wav')