fix: tts parameters

This commit is contained in:
0Xiao0
2026-05-14 15:33:20 +08:00
parent 89011fed81
commit b18c5b40da

View File

@ -186,7 +186,7 @@ async def entrypoint(ctx: JobContext) -> None:
url=TTS_URL, url=TTS_URL,
model_name=TTS_MODEL, model_name=TTS_MODEL,
params=_tts_params_from_env(TTS_MODEL), params=_tts_params_from_env(TTS_MODEL),
prompt_wav_path=os.getenv("CUSTOM_TTS_PROMPT_WAV") or os.getenv("VOXCPM_PROMPT_WAV"), prompt_wav_path=_tts_prompt_wav_from_env(TTS_MODEL),
sample_rate=TTS_SAMPLE_RATE, sample_rate=TTS_SAMPLE_RATE,
num_channels=TTS_NUM_CHANNELS, num_channels=TTS_NUM_CHANNELS,
), ),
@ -240,48 +240,54 @@ def _tts_params_from_env(model_name: str) -> dict[str, str]:
model_name = model_name.lower() model_name = model_name.lower()
if model_name == "voxcpmtts": if model_name == "voxcpmtts":
params.update( _set_if_present(params, "streaming", os.getenv("CUSTOM_TTS_STREAMING"))
{ _set_if_present(
"streaming": os.getenv("CUSTOM_TTS_STREAMING", "false"), params,
"prompt_text": os.getenv( "prompt_text",
"CUSTOM_TTS_PROMPT_TEXT", os.getenv("CUSTOM_TTS_PROMPT_TEXT") or os.getenv("VOXCPM_PROMPT_TEXT"),
os.getenv("VOXCPM_PROMPT_TEXT", "澳门有乜嘢好食嘅"), )
), _set_if_present(params, "cfg_value", os.getenv("VOXCPM_CFG_VALUE"))
"cfg_value": os.getenv("VOXCPM_CFG_VALUE", "2.0"), _set_if_present(params, "inference_timesteps", os.getenv("VOXCPM_INFERENCE_TIMESTEPS"))
"inference_timesteps": os.getenv("VOXCPM_INFERENCE_TIMESTEPS", "10"), _set_if_present(params, "do_normalize", os.getenv("VOXCPM_DO_NORMALIZE"))
"do_normalize": os.getenv("VOXCPM_DO_NORMALIZE", "true"), _set_if_present(params, "denoise", os.getenv("VOXCPM_DENOISE"))
"denoise": os.getenv("VOXCPM_DENOISE", "true"), _set_if_present(params, "retry_badcase", os.getenv("VOXCPM_RETRY_BADCASE"))
"retry_badcase": os.getenv("VOXCPM_RETRY_BADCASE", "true"), _set_if_present(
"retry_badcase_max_times": os.getenv("VOXCPM_RETRY_BADCASE_MAX_TIMES", "3"), params,
"retry_badcase_ratio_threshold": os.getenv( "retry_badcase_max_times",
"VOXCPM_RETRY_BADCASE_RATIO_THRESHOLD", "6.0" os.getenv("VOXCPM_RETRY_BADCASE_MAX_TIMES"),
), )
} _set_if_present(
params,
"retry_badcase_ratio_threshold",
os.getenv("VOXCPM_RETRY_BADCASE_RATIO_THRESHOLD"),
) )
elif model_name == "melotts": elif model_name == "melotts":
params["speed"] = os.getenv("CUSTOM_TTS_SPEED", "1.0") _set_if_present(params, "speed", os.getenv("CUSTOM_TTS_SPEED"))
elif model_name == "cosyvoicetts": elif model_name == "cosyvoicetts":
_set_if_present(params, "spk_id", os.getenv("CUSTOM_TTS_SPK_ID")) _set_if_present(params, "spk_id", os.getenv("CUSTOM_TTS_SPK_ID"))
_set_if_present(params, "model", os.getenv("CUSTOM_TTS_MODE")) _set_if_present(params, "model", os.getenv("CUSTOM_TTS_MODE"))
_set_if_present(params, "prompt_text", os.getenv("CUSTOM_TTS_PROMPT_TEXT")) _set_if_present(params, "prompt_text", os.getenv("CUSTOM_TTS_PROMPT_TEXT"))
_set_if_present(params, "instruct_text", os.getenv("CUSTOM_TTS_INSTRUCT_TEXT")) _set_if_present(params, "instruct_text", os.getenv("CUSTOM_TTS_INSTRUCT_TEXT"))
elif model_name == "sovitstts": elif model_name == "sovitstts":
params.update( _set_if_present(params, "text_lang", os.getenv("CUSTOM_TTS_TEXT_LANG"))
{ _set_if_present(params, "prompt_lang", os.getenv("CUSTOM_TTS_PROMPT_LANG"))
"text_lang": os.getenv("CUSTOM_TTS_TEXT_LANG", "zh"), _set_if_present(params, "text_split_method", os.getenv("CUSTOM_TTS_TEXT_SPLIT_METHOD"))
"prompt_lang": os.getenv("CUSTOM_TTS_PROMPT_LANG", "zh"), _set_if_present(params, "batch_size", os.getenv("CUSTOM_TTS_BATCH_SIZE"))
"text_split_method": os.getenv("CUSTOM_TTS_TEXT_SPLIT_METHOD", "cut0"), _set_if_present(params, "media_type", os.getenv("CUSTOM_TTS_MEDIA_TYPE"))
"batch_size": os.getenv("CUSTOM_TTS_BATCH_SIZE", "1"), _set_if_present(params, "streaming_mode", os.getenv("CUSTOM_TTS_STREAMING"))
"media_type": os.getenv("CUSTOM_TTS_MEDIA_TYPE", "wav"),
"streaming_mode": os.getenv("CUSTOM_TTS_STREAMING", "false"),
}
)
_set_if_present(params, "ref_audio_path", os.getenv("CUSTOM_TTS_REF_AUDIO_PATH")) _set_if_present(params, "ref_audio_path", os.getenv("CUSTOM_TTS_REF_AUDIO_PATH"))
_set_if_present(params, "prompt_text", os.getenv("CUSTOM_TTS_PROMPT_TEXT")) _set_if_present(params, "prompt_text", os.getenv("CUSTOM_TTS_PROMPT_TEXT"))
return params return params
def _tts_prompt_wav_from_env(model_name: str) -> str | None:
if model_name.lower() != "voxcpmtts":
return None
return os.getenv("CUSTOM_TTS_PROMPT_WAV") or os.getenv("VOXCPM_PROMPT_WAV") or None
def _set_if_present(params: dict[str, str], key: str, value: str | None) -> None: def _set_if_present(params: dict[str, str], key: str, value: str | None) -> None:
if value: if value:
params[key] = value params[key] = value