TTS & cuda processing updated

2025-12-13 16:53:24 +00:00 · 2024-04-10 10:13:36 +08:00
parent c104ea52b5
commit bab055e7d6
4 changed files with 12 additions and 4 deletions
--- a/cuda.py
+++ b/cuda.py
@ -0,0 +1,5 @@
+import torch
+
+print("Torch version:",torch.__version__)
+
+print("Is CUDA enabled?",torch.cuda.is_available())
--- a/src/blackbox/tts.py
+++ b/src/blackbox/tts.py
@ -1,4 +1,5 @@
 import io
+import time
 from ntpath import join

 from fastapi import Request, Response, status
@ -16,7 +17,9 @@ class TTS(Blackbox):

    def processing(self, *args, **kwargs) -> io.BytesIO:
        text = args[0]
+        current_time = time.time()
        audio = self.tts_service.read(text)
+        print("#### TTS Service consume : ", (time.time()-current_time))
        return audio

    def valid(self, *args, **kwargs) -> bool:
--- a/src/tts/tts_service.py
+++ b/src/tts/tts_service.py
@ -53,7 +53,7 @@ class TTService():
            len(symbols),
            self.hps.data.filter_length // 2 + 1,
            self.hps.train.segment_size // self.hps.data.hop_length,
-            **self.hps.model).cpu()
+            **self.hps.model).cuda()
        _ = self.net_g.eval()
        _ = utils.load_checkpoint(cfg["model"], self.net_g, None)

@ -69,8 +69,8 @@ class TTService():
        stn_tst = self.get_text(text, self.hps)
        
        with torch.no_grad():
-            x_tst = stn_tst.cpu().unsqueeze(0)
-            x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).cpu()
+            x_tst = stn_tst.cuda().unsqueeze(0)
+            x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).cuda()
            # tp = self.net_g.infer(x_tst, x_tst_lengths, noise_scale=.667, noise_scale_w=0.2, length_scale=self.speed)
            audio = self.net_g.infer(x_tst, x_tst_lengths, noise_scale=.667, noise_scale_w=0.2, length_scale=self.speed)[0][
                0, 0].data.cpu().float().numpy()
--- a/src/tts/vits/monotonic_align/init.py
+++ b/src/tts/vits/monotonic_align/init.py
@ -1,6 +1,6 @@
 import numpy as np
 import torch
-from .monotonic_align.core import maximum_path_c
+from .core import maximum_path_c


 def maximum_path(neg_cent, mask):