This commit is contained in:
0Xiao0
2025-04-07 19:26:04 +08:00
parent ff170ece19
commit e07cf12ae7

View File

@ -242,7 +242,8 @@ class TTS(Blackbox):
elif chroma_collection_id == 'boss': elif chroma_collection_id == 'boss':
if self.cosyvoice_mode == 'local': if self.cosyvoice_mode == 'local':
set_all_random_seed(35616313) set_all_random_seed(35616313)
audio = self.cosyvoicetts.inference_sft(text, '中文男', speed=1.5, stream=False) # audio = self.cosyvoicetts.inference_sft(text, '中文男', speed=1.5, stream=False)
audio = self.cosyvoicetts.inference_instruct2(text, '用粤语说这句话', self.prompt_speech_16k, stream=False)
for i, j in enumerate(audio): for i, j in enumerate(audio):
f = io.BytesIO() f = io.BytesIO()
sf.write(f, j['tts_speech'].cpu().numpy().squeeze(0), 22050, format='wav') sf.write(f, j['tts_speech'].cpu().numpy().squeeze(0), 22050, format='wav')
@ -262,8 +263,8 @@ class TTS(Blackbox):
if self.cosyvoice_mode == 'local': if self.cosyvoice_mode == 'local':
set_all_random_seed(56056558) set_all_random_seed(56056558)
print("*"*90) print("*"*90)
# audio = self.cosyvoicetts.inference_sft(text, self.cosyvoice_language, stream=True) audio = self.cosyvoicetts.inference_sft(text, self.cosyvoice_language, stream=True)
audio = self.cosyvoicetts.inference_instruct2(text, '用粤语说这句话', self.prompt_speech_16k, stream=False) # audio = self.cosyvoicetts.inference_instruct2(text, '用粤语说这句话', self.prompt_speech_16k, stream=False)
# for i, j in enumerate(audio): # for i, j in enumerate(audio):
# f = io.BytesIO() # f = io.BytesIO()
# sf.write(f, j['tts_speech'].cpu().numpy().squeeze(0), 22050, format='wav') # sf.write(f, j['tts_speech'].cpu().numpy().squeeze(0), 22050, format='wav')
@ -311,7 +312,8 @@ class TTS(Blackbox):
elif chroma_collection_id == 'boss': elif chroma_collection_id == 'boss':
if self.cosyvoice_mode == 'local': if self.cosyvoice_mode == 'local':
set_all_random_seed(35616313) set_all_random_seed(35616313)
audio = self.cosyvoicetts.inference_sft(text, '中文男', speed=1.5, stream=False) # audio = self.cosyvoicetts.inference_sft(text, '中文男', speed=1.5, stream=False)
audio = self.cosyvoicetts.inference_instruct2(text, '用粤语说这句话', self.prompt_speech_16k, stream=False)
for i, j in enumerate(audio): for i, j in enumerate(audio):
f = io.BytesIO() f = io.BytesIO()
sf.write(f, j['tts_speech'].cpu().numpy().squeeze(0), 22050, format='wav') sf.write(f, j['tts_speech'].cpu().numpy().squeeze(0), 22050, format='wav')