fix: tts parameters
This commit is contained in:
@ -186,7 +186,7 @@ async def entrypoint(ctx: JobContext) -> None:
|
||||
url=TTS_URL,
|
||||
model_name=TTS_MODEL,
|
||||
params=_tts_params_from_env(TTS_MODEL),
|
||||
prompt_wav_path=os.getenv("CUSTOM_TTS_PROMPT_WAV") or os.getenv("VOXCPM_PROMPT_WAV"),
|
||||
prompt_wav_path=_tts_prompt_wav_from_env(TTS_MODEL),
|
||||
sample_rate=TTS_SAMPLE_RATE,
|
||||
num_channels=TTS_NUM_CHANNELS,
|
||||
),
|
||||
@ -240,48 +240,54 @@ def _tts_params_from_env(model_name: str) -> dict[str, str]:
|
||||
model_name = model_name.lower()
|
||||
|
||||
if model_name == "voxcpmtts":
|
||||
params.update(
|
||||
{
|
||||
"streaming": os.getenv("CUSTOM_TTS_STREAMING", "false"),
|
||||
"prompt_text": os.getenv(
|
||||
"CUSTOM_TTS_PROMPT_TEXT",
|
||||
os.getenv("VOXCPM_PROMPT_TEXT", "澳门有乜嘢好食嘅"),
|
||||
),
|
||||
"cfg_value": os.getenv("VOXCPM_CFG_VALUE", "2.0"),
|
||||
"inference_timesteps": os.getenv("VOXCPM_INFERENCE_TIMESTEPS", "10"),
|
||||
"do_normalize": os.getenv("VOXCPM_DO_NORMALIZE", "true"),
|
||||
"denoise": os.getenv("VOXCPM_DENOISE", "true"),
|
||||
"retry_badcase": os.getenv("VOXCPM_RETRY_BADCASE", "true"),
|
||||
"retry_badcase_max_times": os.getenv("VOXCPM_RETRY_BADCASE_MAX_TIMES", "3"),
|
||||
"retry_badcase_ratio_threshold": os.getenv(
|
||||
"VOXCPM_RETRY_BADCASE_RATIO_THRESHOLD", "6.0"
|
||||
),
|
||||
}
|
||||
_set_if_present(params, "streaming", os.getenv("CUSTOM_TTS_STREAMING"))
|
||||
_set_if_present(
|
||||
params,
|
||||
"prompt_text",
|
||||
os.getenv("CUSTOM_TTS_PROMPT_TEXT") or os.getenv("VOXCPM_PROMPT_TEXT"),
|
||||
)
|
||||
_set_if_present(params, "cfg_value", os.getenv("VOXCPM_CFG_VALUE"))
|
||||
_set_if_present(params, "inference_timesteps", os.getenv("VOXCPM_INFERENCE_TIMESTEPS"))
|
||||
_set_if_present(params, "do_normalize", os.getenv("VOXCPM_DO_NORMALIZE"))
|
||||
_set_if_present(params, "denoise", os.getenv("VOXCPM_DENOISE"))
|
||||
_set_if_present(params, "retry_badcase", os.getenv("VOXCPM_RETRY_BADCASE"))
|
||||
_set_if_present(
|
||||
params,
|
||||
"retry_badcase_max_times",
|
||||
os.getenv("VOXCPM_RETRY_BADCASE_MAX_TIMES"),
|
||||
)
|
||||
_set_if_present(
|
||||
params,
|
||||
"retry_badcase_ratio_threshold",
|
||||
os.getenv("VOXCPM_RETRY_BADCASE_RATIO_THRESHOLD"),
|
||||
)
|
||||
elif model_name == "melotts":
|
||||
params["speed"] = os.getenv("CUSTOM_TTS_SPEED", "1.0")
|
||||
_set_if_present(params, "speed", os.getenv("CUSTOM_TTS_SPEED"))
|
||||
elif model_name == "cosyvoicetts":
|
||||
_set_if_present(params, "spk_id", os.getenv("CUSTOM_TTS_SPK_ID"))
|
||||
_set_if_present(params, "model", os.getenv("CUSTOM_TTS_MODE"))
|
||||
_set_if_present(params, "prompt_text", os.getenv("CUSTOM_TTS_PROMPT_TEXT"))
|
||||
_set_if_present(params, "instruct_text", os.getenv("CUSTOM_TTS_INSTRUCT_TEXT"))
|
||||
elif model_name == "sovitstts":
|
||||
params.update(
|
||||
{
|
||||
"text_lang": os.getenv("CUSTOM_TTS_TEXT_LANG", "zh"),
|
||||
"prompt_lang": os.getenv("CUSTOM_TTS_PROMPT_LANG", "zh"),
|
||||
"text_split_method": os.getenv("CUSTOM_TTS_TEXT_SPLIT_METHOD", "cut0"),
|
||||
"batch_size": os.getenv("CUSTOM_TTS_BATCH_SIZE", "1"),
|
||||
"media_type": os.getenv("CUSTOM_TTS_MEDIA_TYPE", "wav"),
|
||||
"streaming_mode": os.getenv("CUSTOM_TTS_STREAMING", "false"),
|
||||
}
|
||||
)
|
||||
_set_if_present(params, "text_lang", os.getenv("CUSTOM_TTS_TEXT_LANG"))
|
||||
_set_if_present(params, "prompt_lang", os.getenv("CUSTOM_TTS_PROMPT_LANG"))
|
||||
_set_if_present(params, "text_split_method", os.getenv("CUSTOM_TTS_TEXT_SPLIT_METHOD"))
|
||||
_set_if_present(params, "batch_size", os.getenv("CUSTOM_TTS_BATCH_SIZE"))
|
||||
_set_if_present(params, "media_type", os.getenv("CUSTOM_TTS_MEDIA_TYPE"))
|
||||
_set_if_present(params, "streaming_mode", os.getenv("CUSTOM_TTS_STREAMING"))
|
||||
_set_if_present(params, "ref_audio_path", os.getenv("CUSTOM_TTS_REF_AUDIO_PATH"))
|
||||
_set_if_present(params, "prompt_text", os.getenv("CUSTOM_TTS_PROMPT_TEXT"))
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def _tts_prompt_wav_from_env(model_name: str) -> str | None:
|
||||
if model_name.lower() != "voxcpmtts":
|
||||
return None
|
||||
|
||||
return os.getenv("CUSTOM_TTS_PROMPT_WAV") or os.getenv("VOXCPM_PROMPT_WAV") or None
|
||||
|
||||
|
||||
def _set_if_present(params: dict[str, str], key: str, value: str | None) -> None:
|
||||
if value:
|
||||
params[key] = value
|
||||
|
||||
Reference in New Issue
Block a user