fix: tts parameters

This commit is contained in:
0Xiao0
2026-05-14 15:33:20 +08:00
parent 89011fed81
commit b18c5b40da

View File

@ -186,7 +186,7 @@ async def entrypoint(ctx: JobContext) -> None:
url=TTS_URL,
model_name=TTS_MODEL,
params=_tts_params_from_env(TTS_MODEL),
prompt_wav_path=os.getenv("CUSTOM_TTS_PROMPT_WAV") or os.getenv("VOXCPM_PROMPT_WAV"),
prompt_wav_path=_tts_prompt_wav_from_env(TTS_MODEL),
sample_rate=TTS_SAMPLE_RATE,
num_channels=TTS_NUM_CHANNELS,
),
@ -240,48 +240,54 @@ def _tts_params_from_env(model_name: str) -> dict[str, str]:
model_name = model_name.lower()
if model_name == "voxcpmtts":
params.update(
{
"streaming": os.getenv("CUSTOM_TTS_STREAMING", "false"),
"prompt_text": os.getenv(
"CUSTOM_TTS_PROMPT_TEXT",
os.getenv("VOXCPM_PROMPT_TEXT", "澳门有乜嘢好食嘅"),
),
"cfg_value": os.getenv("VOXCPM_CFG_VALUE", "2.0"),
"inference_timesteps": os.getenv("VOXCPM_INFERENCE_TIMESTEPS", "10"),
"do_normalize": os.getenv("VOXCPM_DO_NORMALIZE", "true"),
"denoise": os.getenv("VOXCPM_DENOISE", "true"),
"retry_badcase": os.getenv("VOXCPM_RETRY_BADCASE", "true"),
"retry_badcase_max_times": os.getenv("VOXCPM_RETRY_BADCASE_MAX_TIMES", "3"),
"retry_badcase_ratio_threshold": os.getenv(
"VOXCPM_RETRY_BADCASE_RATIO_THRESHOLD", "6.0"
),
}
_set_if_present(params, "streaming", os.getenv("CUSTOM_TTS_STREAMING"))
_set_if_present(
params,
"prompt_text",
os.getenv("CUSTOM_TTS_PROMPT_TEXT") or os.getenv("VOXCPM_PROMPT_TEXT"),
)
_set_if_present(params, "cfg_value", os.getenv("VOXCPM_CFG_VALUE"))
_set_if_present(params, "inference_timesteps", os.getenv("VOXCPM_INFERENCE_TIMESTEPS"))
_set_if_present(params, "do_normalize", os.getenv("VOXCPM_DO_NORMALIZE"))
_set_if_present(params, "denoise", os.getenv("VOXCPM_DENOISE"))
_set_if_present(params, "retry_badcase", os.getenv("VOXCPM_RETRY_BADCASE"))
_set_if_present(
params,
"retry_badcase_max_times",
os.getenv("VOXCPM_RETRY_BADCASE_MAX_TIMES"),
)
_set_if_present(
params,
"retry_badcase_ratio_threshold",
os.getenv("VOXCPM_RETRY_BADCASE_RATIO_THRESHOLD"),
)
elif model_name == "melotts":
params["speed"] = os.getenv("CUSTOM_TTS_SPEED", "1.0")
_set_if_present(params, "speed", os.getenv("CUSTOM_TTS_SPEED"))
elif model_name == "cosyvoicetts":
_set_if_present(params, "spk_id", os.getenv("CUSTOM_TTS_SPK_ID"))
_set_if_present(params, "model", os.getenv("CUSTOM_TTS_MODE"))
_set_if_present(params, "prompt_text", os.getenv("CUSTOM_TTS_PROMPT_TEXT"))
_set_if_present(params, "instruct_text", os.getenv("CUSTOM_TTS_INSTRUCT_TEXT"))
elif model_name == "sovitstts":
params.update(
{
"text_lang": os.getenv("CUSTOM_TTS_TEXT_LANG", "zh"),
"prompt_lang": os.getenv("CUSTOM_TTS_PROMPT_LANG", "zh"),
"text_split_method": os.getenv("CUSTOM_TTS_TEXT_SPLIT_METHOD", "cut0"),
"batch_size": os.getenv("CUSTOM_TTS_BATCH_SIZE", "1"),
"media_type": os.getenv("CUSTOM_TTS_MEDIA_TYPE", "wav"),
"streaming_mode": os.getenv("CUSTOM_TTS_STREAMING", "false"),
}
)
_set_if_present(params, "text_lang", os.getenv("CUSTOM_TTS_TEXT_LANG"))
_set_if_present(params, "prompt_lang", os.getenv("CUSTOM_TTS_PROMPT_LANG"))
_set_if_present(params, "text_split_method", os.getenv("CUSTOM_TTS_TEXT_SPLIT_METHOD"))
_set_if_present(params, "batch_size", os.getenv("CUSTOM_TTS_BATCH_SIZE"))
_set_if_present(params, "media_type", os.getenv("CUSTOM_TTS_MEDIA_TYPE"))
_set_if_present(params, "streaming_mode", os.getenv("CUSTOM_TTS_STREAMING"))
_set_if_present(params, "ref_audio_path", os.getenv("CUSTOM_TTS_REF_AUDIO_PATH"))
_set_if_present(params, "prompt_text", os.getenv("CUSTOM_TTS_PROMPT_TEXT"))
return params
def _tts_prompt_wav_from_env(model_name: str) -> str | None:
if model_name.lower() != "voxcpmtts":
return None
return os.getenv("CUSTOM_TTS_PROMPT_WAV") or os.getenv("VOXCPM_PROMPT_WAV") or None
def _set_if_present(params: dict[str, str], key: str, value: str | None) -> None:
if value:
params[key] = value