mirror of
https://github.com/BoardWare-Genius/jarvis-models.git
synced 2025-12-13 16:53:24 +00:00
fix: man tts
This commit is contained in:
@ -15,7 +15,7 @@ from injector import singleton
|
|||||||
import sys,os
|
import sys,os
|
||||||
sys.path.append('/home/gpu/Workspace/CosyVoice')
|
sys.path.append('/home/gpu/Workspace/CosyVoice')
|
||||||
from cosyvoice.cli.cosyvoice import CosyVoice
|
from cosyvoice.cli.cosyvoice import CosyVoice
|
||||||
from cosyvoice.utils.file_utils import load_wav
|
from cosyvoice.utils.file_utils import load_wav, speed_change
|
||||||
|
|
||||||
import soundfile
|
import soundfile
|
||||||
import pyloudnorm as pyln
|
import pyloudnorm as pyln
|
||||||
@ -68,6 +68,7 @@ class TTS(Blackbox):
|
|||||||
else:
|
else:
|
||||||
self.melo_url = melo_config.url
|
self.melo_url = melo_config.url
|
||||||
logging.info('#### Initializing MeloTTS Service in ' + self.melo_device + ' mode...')
|
logging.info('#### Initializing MeloTTS Service in ' + self.melo_device + ' mode...')
|
||||||
|
print('1.#### Initializing MeloTTS Service in ' + self.melo_device + ' mode...')
|
||||||
|
|
||||||
@logging_time(logger=logger)
|
@logging_time(logger=logger)
|
||||||
def cosyvoice_model_init(self, cosyvoice_config: CosyVoiceConf) -> None:
|
def cosyvoice_model_init(self, cosyvoice_config: CosyVoiceConf) -> None:
|
||||||
@ -84,7 +85,8 @@ class TTS(Blackbox):
|
|||||||
|
|
||||||
else:
|
else:
|
||||||
self.cosyvoice_url = cosyvoice_config.url
|
self.cosyvoice_url = cosyvoice_config.url
|
||||||
logging.info('#### Initializing CosyVoiceTTS Service in cuda:' + self.cosyvoice_device + ' mode...')
|
logging.info('#### Initializing CosyVoiceTTS Service in ' + self.cosyvoice_device + ' mode...')
|
||||||
|
print('1.#### Initializing CosyVoiceTTS Service in ' + self.cosyvoice_device + ' mode...')
|
||||||
|
|
||||||
@inject
|
@inject
|
||||||
def __init__(self, melo_config: MeloConf, cosyvoice_config: CosyVoiceConf, settings: dict) -> None:
|
def __init__(self, melo_config: MeloConf, cosyvoice_config: CosyVoiceConf, settings: dict) -> None:
|
||||||
@ -108,9 +110,8 @@ class TTS(Blackbox):
|
|||||||
|
|
||||||
text = args[0]
|
text = args[0]
|
||||||
current_time = time.time()
|
current_time = time.time()
|
||||||
|
|
||||||
if user_model_name == 'melotts':
|
if user_model_name == 'melotts':
|
||||||
if chroma_collection_id == 'kiki':
|
if chroma_collection_id == 'kiki' or chroma_collection_id is None:
|
||||||
if self.melo_mode == 'local':
|
if self.melo_mode == 'local':
|
||||||
audio = self.melotts.tts_to_file(text, self.speaker_ids[self.melo_speaker], speed=self.melo_speed)
|
audio = self.melotts.tts_to_file(text, self.speaker_ids[self.melo_speaker], speed=self.melo_speed)
|
||||||
f = io.BytesIO()
|
f = io.BytesIO()
|
||||||
@ -161,7 +162,7 @@ class TTS(Blackbox):
|
|||||||
return response.content
|
return response.content
|
||||||
|
|
||||||
elif user_model_name == 'cosyvoicetts':
|
elif user_model_name == 'cosyvoicetts':
|
||||||
if chroma_collection_id == 'kiki':
|
if chroma_collection_id == 'kiki' or chroma_collection_id is None:
|
||||||
if self.cosyvoice_mode == 'local':
|
if self.cosyvoice_mode == 'local':
|
||||||
set_all_random_seed(56056558)
|
set_all_random_seed(56056558)
|
||||||
audio = self.cosyvoicetts.inference_sft(text, self.cosyvoice_language)
|
audio = self.cosyvoicetts.inference_sft(text, self.cosyvoice_language)
|
||||||
@ -192,7 +193,29 @@ class TTS(Blackbox):
|
|||||||
}
|
}
|
||||||
response = requests.post(self.cosyvoice_url, json=message)
|
response = requests.post(self.cosyvoice_url, json=message)
|
||||||
print("#### CosyVoiceTTS Service consume - docker : ", (time.time()-current_time))
|
print("#### CosyVoiceTTS Service consume - docker : ", (time.time()-current_time))
|
||||||
return response.content
|
return response.content
|
||||||
|
elif user_model_name == 'man':
|
||||||
|
if self.cosyvoice_mode == 'local':
|
||||||
|
set_all_random_seed(35616313)
|
||||||
|
audio = self.cosyvoicetts.inference_sft(text, '中文男')
|
||||||
|
try:
|
||||||
|
audio, sample_rate = speed_change(audio["tts_speech"], 22050, str(1.5))
|
||||||
|
audio = audio.numpy().flatten()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Failed to change speed of audio: \n{e}")
|
||||||
|
f = io.BytesIO()
|
||||||
|
soundfile.write(f, audio, 22050, format='wav')
|
||||||
|
f.seek(0)
|
||||||
|
print("#### CosyVoiceTTS Service consume - local : ", (time.time() - current_time))
|
||||||
|
return f.read()
|
||||||
|
else:
|
||||||
|
message = {
|
||||||
|
"text": text
|
||||||
|
}
|
||||||
|
response = requests.post(self.cosyvoice_url, json=message)
|
||||||
|
print("#### CosyVoiceTTS Service consume - docker : ", (time.time()-current_time))
|
||||||
|
return response.content
|
||||||
|
|
||||||
else:
|
else:
|
||||||
audio = self.tts_service.read(text)
|
audio = self.tts_service.read(text)
|
||||||
print("#### TTS Service consume : ", (time.time()-current_time))
|
print("#### TTS Service consume : ", (time.time()-current_time))
|
||||||
|
|||||||
Reference in New Issue
Block a user