Merge pull request #4 from BoardWare-Genius/refactor

Refactor
2025-12-13 16:53:24 +00:00 · 2024-04-10 10:35:43 +08:00
parent e3e29e20d9 005acc0874
commit 335ff7cb8d
5 changed files with 13 additions and 5 deletions
--- a/cuda.py
+++ b/cuda.py
@ -0,0 +1,5 @@
 import torch
 print("Torch version:",torch.__version__)
 print("Is CUDA enabled?",torch.cuda.is_available())
--- a/src/blackbox/tesou.py
+++ b/src/blackbox/tesou.py
@ -31,7 +31,7 @@ class Tesou(Blackbox):
        except:
            return JSONResponse(content={"error": "json parse error"}, status_code=status.HTTP_400_BAD_REQUEST)
        user_id = data.get("user_id")
-        user_prompt = data.get("prompt")    
+        user_prompt = data.get("prompt")
        if user_prompt is None:
            return JSONResponse(content={"error": "question is required"}, status_code=status.HTTP_400_BAD_REQUEST)
        return JSONResponse(content={"Response": self.processing(user_id, user_prompt)}, status_code=status.HTTP_200_OK)
--- a/src/blackbox/tts.py
+++ b/src/blackbox/tts.py
@ -1,4 +1,5 @@
 import io
 import time
 from ntpath import join
 from fastapi import Request, Response, status
@ -16,7 +17,9 @@ class TTS(Blackbox):
    def processing(self, *args, **kwargs) -> io.BytesIO:
        text = args[0]
        current_time = time.time()
        audio = self.tts_service.read(text)
        print("#### TTS Service consume : ", (time.time()-current_time))
        return audio
    def valid(self, *args, **kwargs) -> bool:
--- a/src/tts/tts_service.py
+++ b/src/tts/tts_service.py
@ -53,7 +53,7 @@ class TTService():
            len(symbols),
            self.hps.data.filter_length // 2 + 1,
            self.hps.train.segment_size // self.hps.data.hop_length,
-            **self.hps.model).cpu()
+            **self.hps.model).cuda()
        _ = self.net_g.eval()
        _ = utils.load_checkpoint(cfg["model"], self.net_g, None)
@ -69,8 +69,8 @@ class TTService():
        stn_tst = self.get_text(text, self.hps)
        with torch.no_grad():
-            x_tst = stn_tst.cpu().unsqueeze(0)
+            x_tst = stn_tst.cuda().unsqueeze(0)
-            x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).cpu()
+            x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).cuda()
            # tp = self.net_g.infer(x_tst, x_tst_lengths, noise_scale=.667, noise_scale_w=0.2, length_scale=self.speed)
            audio = self.net_g.infer(x_tst, x_tst_lengths, noise_scale=.667, noise_scale_w=0.2, length_scale=self.speed)[0][
                0, 0].data.cpu().float().numpy()
--- a/src/tts/vits/monotonic_align/init.py
+++ b/src/tts/vits/monotonic_align/init.py
@ -1,6 +1,6 @@
 import numpy as np
 import torch
-from .monotonic_align.core import maximum_path_c
+from .core import maximum_path_c
 def maximum_path(neg_cent, mask):