fix: man tts

This commit is contained in:
0Xiao0
2024-09-12 15:56:00 +08:00
parent ff232f0f96
commit 280d4fd55d

View File

@ -15,7 +15,7 @@ from injector import singleton
import sys,os
sys.path.append('/home/gpu/Workspace/CosyVoice')
from cosyvoice.cli.cosyvoice import CosyVoice
from cosyvoice.utils.file_utils import load_wav
from cosyvoice.utils.file_utils import load_wav, speed_change
import soundfile
import pyloudnorm as pyln
@ -68,6 +68,7 @@ class TTS(Blackbox):
else:
self.melo_url = melo_config.url
logging.info('#### Initializing MeloTTS Service in ' + self.melo_device + ' mode...')
print('1.#### Initializing MeloTTS Service in ' + self.melo_device + ' mode...')
@logging_time(logger=logger)
def cosyvoice_model_init(self, cosyvoice_config: CosyVoiceConf) -> None:
@ -84,7 +85,8 @@ class TTS(Blackbox):
else:
self.cosyvoice_url = cosyvoice_config.url
logging.info('#### Initializing CosyVoiceTTS Service in cuda:' + self.cosyvoice_device + ' mode...')
logging.info('#### Initializing CosyVoiceTTS Service in ' + self.cosyvoice_device + ' mode...')
print('1.#### Initializing CosyVoiceTTS Service in ' + self.cosyvoice_device + ' mode...')
@inject
def __init__(self, melo_config: MeloConf, cosyvoice_config: CosyVoiceConf, settings: dict) -> None:
@ -108,9 +110,8 @@ class TTS(Blackbox):
text = args[0]
current_time = time.time()
if user_model_name == 'melotts':
if chroma_collection_id == 'kiki':
if chroma_collection_id == 'kiki' or chroma_collection_id is None:
if self.melo_mode == 'local':
audio = self.melotts.tts_to_file(text, self.speaker_ids[self.melo_speaker], speed=self.melo_speed)
f = io.BytesIO()
@ -161,7 +162,7 @@ class TTS(Blackbox):
return response.content
elif user_model_name == 'cosyvoicetts':
if chroma_collection_id == 'kiki':
if chroma_collection_id == 'kiki' or chroma_collection_id is None:
if self.cosyvoice_mode == 'local':
set_all_random_seed(56056558)
audio = self.cosyvoicetts.inference_sft(text, self.cosyvoice_language)
@ -192,7 +193,29 @@ class TTS(Blackbox):
}
response = requests.post(self.cosyvoice_url, json=message)
print("#### CosyVoiceTTS Service consume - docker : ", (time.time()-current_time))
return response.content
return response.content
elif user_model_name == 'man':
if self.cosyvoice_mode == 'local':
set_all_random_seed(35616313)
audio = self.cosyvoicetts.inference_sft(text, '中文男')
try:
audio, sample_rate = speed_change(audio["tts_speech"], 22050, str(1.5))
audio = audio.numpy().flatten()
except Exception as e:
print(f"Failed to change speed of audio: \n{e}")
f = io.BytesIO()
soundfile.write(f, audio, 22050, format='wav')
f.seek(0)
print("#### CosyVoiceTTS Service consume - local : ", (time.time() - current_time))
return f.read()
else:
message = {
"text": text
}
response = requests.post(self.cosyvoice_url, json=message)
print("#### CosyVoiceTTS Service consume - docker : ", (time.time()-current_time))
return response.content
else:
audio = self.tts_service.read(text)
print("#### TTS Service consume : ", (time.time()-current_time))