From f2e203d5e227ee9d895ad4d05fda9e0d1b8ac9d2 Mon Sep 17 00:00:00 2001 From: vytskalt Date: Fri, 9 Jan 2026 20:05:52 +0200 Subject: [PATCH] print debug messages to stderr instead of stdout --- src/voxcpm/core.py | 13 +++++++------ src/voxcpm/model/voxcpm.py | 13 +++++++------ 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/voxcpm/core.py b/src/voxcpm/core.py index f4ccc5b..5241040 100644 --- a/src/voxcpm/core.py +++ b/src/voxcpm/core.py @@ -1,4 +1,5 @@ import os +import sys import re import tempfile import numpy as np @@ -30,7 +31,7 @@ class VoxCPM: lora_weights_path: Path to pre-trained LoRA weights (.pth file or directory containing lora_weights.ckpt). If provided, LoRA weights will be loaded. """ - print(f"voxcpm_model_path: {voxcpm_model_path}, zipenhancer_model_path: {zipenhancer_model_path}, enable_denoiser: {enable_denoiser}") + print(f"voxcpm_model_path: {voxcpm_model_path}, zipenhancer_model_path: {zipenhancer_model_path}, enable_denoiser: {enable_denoiser}", file=sys.stderr) # If lora_weights_path is provided but no lora_config, create a default one if lora_weights_path is not None and lora_config is None: @@ -39,15 +40,15 @@ class VoxCPM: enable_dit=True, enable_proj=False, ) - print(f"Auto-created default LoRAConfig for loading weights from: {lora_weights_path}") + print(f"Auto-created default LoRAConfig for loading weights from: {lora_weights_path}", file=sys.stderr) self.tts_model = VoxCPMModel.from_local(voxcpm_model_path, optimize=optimize, lora_config=lora_config) # Load LoRA weights if path is provided if lora_weights_path is not None: - print(f"Loading LoRA weights from: {lora_weights_path}") + print(f"Loading LoRA weights from: {lora_weights_path}", file=sys.stderr) loaded_keys, skipped_keys = self.tts_model.load_lora_weights(lora_weights_path) - print(f"Loaded {len(loaded_keys)} LoRA parameters, skipped {len(skipped_keys)}") + print(f"Loaded {len(loaded_keys)} LoRA parameters, skipped {len(skipped_keys)}", file=sys.stderr) self.text_normalizer = None if enable_denoiser and zipenhancer_model_path is not None: @@ -56,7 +57,7 @@ class VoxCPM: else: self.denoiser = None if optimize: - print("Warm up VoxCPMModel...") + print("Warm up VoxCPMModel...", file=sys.stderr) self.tts_model.generate( target_text="Hello, this is the first test sentence.", max_len=10, @@ -278,4 +279,4 @@ class VoxCPM: @property def lora_enabled(self) -> bool: """Check if LoRA is currently configured.""" - return self.tts_model.lora_config is not None \ No newline at end of file + return self.tts_model.lora_config is not None diff --git a/src/voxcpm/model/voxcpm.py b/src/voxcpm/model/voxcpm.py index 3ce59eb..c96125d 100644 --- a/src/voxcpm/model/voxcpm.py +++ b/src/voxcpm/model/voxcpm.py @@ -19,6 +19,7 @@ limitations under the License. """ import os +import sys from typing import Tuple, Union, Generator, List, Optional import torch @@ -120,7 +121,7 @@ class VoxCPMModel(nn.Module): self.device = "mps" else: self.device = "cpu" - print(f"Running on device: {self.device}, dtype: {self.config.dtype}") + print(f"Running on device: {self.device}, dtype: {self.config.dtype}", file=sys.stderr) # Text-Semantic LM self.base_lm = MiniCPMModel(config.lm_config) @@ -228,7 +229,7 @@ class VoxCPMModel(nn.Module): self.feat_encoder = torch.compile(self.feat_encoder, mode="reduce-overhead", fullgraph=True) self.feat_decoder.estimator = torch.compile(self.feat_decoder.estimator, mode="reduce-overhead", fullgraph=True) except Exception as e: - print(f"Warning: torch.compile disabled - {e}") + print(f"Warning: torch.compile disabled - {e}", file=sys.stderr) return self def forward( @@ -459,7 +460,7 @@ class VoxCPMModel(nn.Module): latent_pred, pred_audio_feat = next(inference_result) if retry_badcase: if pred_audio_feat.shape[0] >= target_text_length * retry_badcase_ratio_threshold: - print(f" Badcase detected, audio_text_ratio={pred_audio_feat.shape[0] / target_text_length}, retrying...") + print(f" Badcase detected, audio_text_ratio={pred_audio_feat.shape[0] / target_text_length}, retrying...", file=sys.stderr) retry_badcase_times += 1 continue else: @@ -683,7 +684,7 @@ class VoxCPMModel(nn.Module): latent_pred, pred_audio_feat = next(inference_result) if retry_badcase: if pred_audio_feat.shape[0] >= target_text_length * retry_badcase_ratio_threshold: - print(f" Badcase detected, audio_text_ratio={pred_audio_feat.shape[0] / target_text_length}, retrying...") + print(f" Badcase detected, audio_text_ratio={pred_audio_feat.shape[0] / target_text_length}, retrying...", file=sys.stderr) retry_badcase_times += 1 continue else: @@ -868,10 +869,10 @@ class VoxCPMModel(nn.Module): pytorch_model_path = os.path.join(path, "pytorch_model.bin") if os.path.exists(safetensors_path) and SAFETENSORS_AVAILABLE: - print(f"Loading model from safetensors: {safetensors_path}") + print(f"Loading model from safetensors: {safetensors_path}", file=sys.stderr) model_state_dict = load_file(safetensors_path) elif os.path.exists(pytorch_model_path): - print(f"Loading model from pytorch_model.bin: {pytorch_model_path}") + print(f"Loading model from pytorch_model.bin: {pytorch_model_path}", file=sys.stderr) checkpoint = torch.load( pytorch_model_path, map_location="cpu",