From d7577746987440dde69d2ec3c6a967c257ead8e2 Mon Sep 17 00:00:00 2001 From: 0Xiao0 <511201264@qq.com> Date: Thu, 22 Aug 2024 16:24:58 +0800 Subject: [PATCH] delete asr tts unnecessary files --- src/blackbox/asr_bak.py | 45 -------------- src/blackbox/asrsensevoice.py | 103 -------------------------------- src/blackbox/cosyvoicetts.py | 93 ----------------------------- src/blackbox/melotts.py | 108 ---------------------------------- src/blackbox/tts_bak.py | 40 ------------- 5 files changed, 389 deletions(-) delete mode 100644 src/blackbox/asr_bak.py delete mode 100644 src/blackbox/asrsensevoice.py delete mode 100644 src/blackbox/cosyvoicetts.py delete mode 100644 src/blackbox/melotts.py delete mode 100644 src/blackbox/tts_bak.py diff --git a/src/blackbox/asr_bak.py b/src/blackbox/asr_bak.py deleted file mode 100644 index 9f6f614..0000000 --- a/src/blackbox/asr_bak.py +++ /dev/null @@ -1,45 +0,0 @@ -from io import BytesIO -from typing import Any, Coroutine - -from fastapi import Request, Response, status -from fastapi.responses import JSONResponse - -from ..asr.rapid_paraformer.utils import read_yaml -from ..asr.rapid_paraformer import RapidParaformer -from .blackbox import Blackbox -from injector import singleton, inject - -@singleton -class ASR(Blackbox): - - @inject - def __init__(self,path = ".env.yaml") -> None: - config = read_yaml(path) - self.paraformer = RapidParaformer(config) - - def __call__(self, *args, **kwargs): - return self.processing(*args, **kwargs) - - async def processing(self, *args, **kwargs): - data = args[0] - results = self.paraformer([BytesIO(data)]) - if len(results) == 0: - return None - return results[0] - - def valid(self, data: any) -> bool: - if isinstance(data, bytes): - return True - return False - - async def fast_api_handler(self, request: Request) -> Response: - data = (await request.form()).get("audio") - if data is None: - # self.logger.warn("asr bag request","type", "fast_api_handler", "api", "asr") - return JSONResponse(content={"error": "data is required"}, status_code=status.HTTP_400_BAD_REQUEST) - d = await data.read() - try: - txt = await self.processing(d) - except ValueError as e: - return JSONResponse(content={"error": str(e)}, status_code=status.HTTP_400_BAD_REQUEST) - return JSONResponse(content={"text": txt}, status_code=status.HTTP_200_OK) \ No newline at end of file diff --git a/src/blackbox/asrsensevoice.py b/src/blackbox/asrsensevoice.py deleted file mode 100644 index d3d0e78..0000000 --- a/src/blackbox/asrsensevoice.py +++ /dev/null @@ -1,103 +0,0 @@ -from io import BytesIO -from typing import Any, Coroutine - -from fastapi import Request, Response, status -from fastapi.responses import JSONResponse - -from funasr import AutoModel -from funasr.utils.postprocess_utils import rich_transcription_postprocess -from .blackbox import Blackbox -from injector import singleton, inject - -import tempfile - -import os -from ..configuration import SenseVoiceConf - -from ..log.logging_time import logging_time -import logging -logger = logging.getLogger(__name__) - -@singleton -class ASR(Blackbox): - mode: str - url: str - speed: int - device: str - language: str - speaker: str - - @logging_time(logger=logger) - def model_init(self, sensevoice_config: SenseVoiceConf) -> None: - - model_dir = "/home/gpu/Workspace/Models/SenseVoice/SenseVoiceSmall" - - self.speed = sensevoice_config.speed - self.device = sensevoice_config.device - self.language = sensevoice_config.language - self.speaker = sensevoice_config.speaker - self.device = sensevoice_config.device - self.url = '' - self.mode = sensevoice_config.mode - self.asr = None - self.speaker_ids = None - os.environ['CUDA_VISIBLE_DEVICES'] = str(sensevoice_config.device) - if self.mode == 'local': - self.asr = AutoModel( - model=model_dir, - trust_remote_code=True, - remote_code= "/home/gpu/Workspace/SenseVoice/model.py", - vad_model="fsmn-vad", - vad_kwargs={"max_single_segment_time": 30000}, - device="cuda:0", - ) - - else: - self.url = sensevoice_config.url - logging.info('#### Initializing SenseVoiceASR Service in cuda:' + str(sensevoice_config.device) + ' mode...') - - @inject - def __init__(self, sensevoice_config: SenseVoiceConf) -> None: - self.model_init(sensevoice_config) - - def __call__(self, *args, **kwargs): - return self.processing(*args, **kwargs) - - async def processing(self, *args, **kwargs): - data = args[0] - # 创建一个临时文件 - with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file: - temp_audio_file.write(data) - temp_audio_path = temp_audio_file.name - res = self.asr.generate( - input=temp_audio_path, - cache={}, - language="auto", # "zh", "en", "yue", "ja", "ko", "nospeech" - use_itn=True, - batch_size_s=60, - merge_vad=True, # - merge_length_s=15, - ) - # results = self.paraformer([BytesIO(data)]) - results = rich_transcription_postprocess(res[0]["text"]) - os.remove(temp_audio_path) - if len(results) == 0: - return None - return results - - def valid(self, data: any) -> bool: - if isinstance(data, bytes): - return True - return False - - async def fast_api_handler(self, request: Request) -> Response: - data = (await request.form()).get("audio") - if data is None: - # self.logger.warn("asr bag request","type", "fast_api_handler", "api", "asr") - return JSONResponse(content={"error": "data is required"}, status_code=status.HTTP_400_BAD_REQUEST) - d = await data.read() - try: - txt = await self.processing(d) - except ValueError as e: - return JSONResponse(content={"error": str(e)}, status_code=status.HTTP_400_BAD_REQUEST) - return JSONResponse(content={"text": txt}, status_code=status.HTTP_200_OK) \ No newline at end of file diff --git a/src/blackbox/cosyvoicetts.py b/src/blackbox/cosyvoicetts.py deleted file mode 100644 index c444aaf..0000000 --- a/src/blackbox/cosyvoicetts.py +++ /dev/null @@ -1,93 +0,0 @@ -import io -import time - -import requests -from fastapi import Request, Response, status -from fastapi.responses import JSONResponse -from injector import inject -from injector import singleton - -from ..log.logging_time import logging_time - -from ..configuration import CosyVoiceConf -from .blackbox import Blackbox - -import soundfile -import pyloudnorm as pyln -import sys -sys.path.append('/home/gpu/Workspace/CosyVoice') -from cosyvoice.cli.cosyvoice import CosyVoice -from cosyvoice.utils.file_utils import load_wav -import torchaudio - -import os -import logging -logger = logging.getLogger(__name__) - -@singleton -class CosyVoiceTTS(Blackbox): - mode: str - url: str - speed: int - device: str - language: str - speaker: str - - @logging_time(logger=logger) - def model_init(self, cosyvoice_config: CosyVoiceConf) -> None: - self.speed = cosyvoice_config.speed - self.device = cosyvoice_config.device - self.language = cosyvoice_config.language - self.speaker = cosyvoice_config.speaker - self.device = cosyvoice_config.device - self.url = '' - self.mode = cosyvoice_config.mode - self.cosyvoicetts = None - self.speaker_ids = None - os.environ['CUDA_VISIBLE_DEVICES'] = str(cosyvoice_config.device) - if self.mode == 'local': - self.cosyvoicetts = CosyVoice('/home/gpu/Workspace/Models/CosyVoice/pretrained_models/CosyVoice-300M') - - else: - self.url = cosyvoice_config.url - logging.info('#### Initializing CosyVoiceTTS Service in cuda:' + str(cosyvoice_config.device) + ' mode...') - - @inject - def __init__(self, cosyvoice_config: CosyVoiceConf) -> None: - self.model_init(cosyvoice_config) - - def __call__(self, *args, **kwargs): - return self.processing(*args, **kwargs) - - def valid(self, *args, **kwargs) -> bool: - text = args[0] - return isinstance(text, str) - - @logging_time(logger=logger) - def processing(self, *args, **kwargs) -> io.BytesIO | bytes: - text = args[0] - current_time = time.time() - if self.mode == 'local': - audio = self.cosyvoicetts.inference_sft(text, self.language) - f = io.BytesIO() - soundfile.write(f, audio['tts_speech'].cpu().numpy().squeeze(0), 22050, format='wav') - f.seek(0) - print("#### CosyVoiceTTS Service consume - local : ", (time.time() - current_time)) - return f.read() - else: - message = { - "text": text - } - response = requests.post(self.url, json=message) - print("#### CosyVoiceTTS Service consume - docker : ", (time.time()-current_time)) - return response.content - - async def fast_api_handler(self, request: Request) -> Response: - try: - data = await request.json() - except: - return JSONResponse(content={"error": "json parse error"}, status_code=status.HTTP_400_BAD_REQUEST) - text = data.get("text") - if text is None: - return JSONResponse(content={"error": "text is required"}, status_code=status.HTTP_400_BAD_REQUEST) - return Response(content=self.processing(text), media_type="audio/wav", headers={"Content-Disposition": "attachment; filename=audio.wav"}) \ No newline at end of file diff --git a/src/blackbox/melotts.py b/src/blackbox/melotts.py deleted file mode 100644 index f0e7812..0000000 --- a/src/blackbox/melotts.py +++ /dev/null @@ -1,108 +0,0 @@ -import io -import time - -import requests -from fastapi import Request, Response, status -from fastapi.responses import JSONResponse -from injector import inject -from injector import singleton - -from ..log.logging_time import logging_time - -from ..configuration import MeloConf -from .blackbox import Blackbox - -import soundfile -import pyloudnorm as pyln -from melo.api import TTS - -import logging -logger = logging.getLogger(__name__) - -@singleton -class MeloTTS(Blackbox): - mode: str - url: str - speed: int - device: str - language: str - speaker: str - - @logging_time(logger=logger) - def model_init(self, melo_config: MeloConf) -> None: - self.speed = melo_config.speed - self.device = melo_config.device - self.language = melo_config.language - self.speaker = melo_config.speaker - self.device = melo_config.device - self.url = '' - self.mode = melo_config.mode - self.melotts = None - self.speaker_ids = None - if self.mode == 'local': - self.melotts = TTS(language=self.language, device=self.device) - self.speaker_ids = self.melotts.hps.data.spk2id - else: - self.url = melo_config.url - logging.info('#### Initializing MeloTTS Service in ' + self.device + ' mode...') - - @inject - def __init__(self, melo_config: MeloConf) -> None: - self.model_init(melo_config) - - def __call__(self, *args, **kwargs): - return self.processing(*args, **kwargs) - - def valid(self, *args, **kwargs) -> bool: - text = args[0] - return isinstance(text, str) - - @logging_time(logger=logger) - def processing(self, *args, **kwargs) -> io.BytesIO | bytes: - text = args[0] - current_time = time.time() - if self.mode == 'local': - audio = self.melotts.tts_to_file(text, self.speaker_ids[self.speaker], speed=self.speed) - f = io.BytesIO() - soundfile.write(f, audio, 44100, format='wav') - f.seek(0) - # print("#### MeloTTS Service consume - local : ", (time.time() - current_time)) - # return f.read() - - - # Read the audio data from the buffer - data, rate = soundfile.read(f, dtype='float32') - - # Peak normalization - peak_normalized_audio = pyln.normalize.peak(data, -1.0) - - # Integrated loudness normalization - meter = pyln.Meter(rate) - loudness = meter.integrated_loudness(peak_normalized_audio) - loudness_normalized_audio = pyln.normalize.loudness(peak_normalized_audio, loudness, -12.0) - - # Write the loudness normalized audio to an in-memory buffer - normalized_audio_buffer = io.BytesIO() - soundfile.write(normalized_audio_buffer, loudness_normalized_audio, rate, format='wav') - normalized_audio_buffer.seek(0) - - print("#### MeloTTS Service consume - local : ", (time.time() - current_time)) - return normalized_audio_buffer.read() - - else: - message = { - "text": text - } - response = requests.post(self.url, json=message) - print("#### MeloTTS Service consume - docker : ", (time.time()-current_time)) - return response.content - - async def fast_api_handler(self, request: Request) -> Response: - try: - data = await request.json() - except: - return JSONResponse(content={"error": "json parse error"}, status_code=status.HTTP_400_BAD_REQUEST) - text = data.get("text") - if text is None: - return JSONResponse(content={"error": "text is required"}, status_code=status.HTTP_400_BAD_REQUEST) - return Response(content=self.processing(text), media_type="audio/wav", headers={"Content-Disposition": "attachment; filename=audio.wav"}) \ No newline at end of file diff --git a/src/blackbox/tts_bak.py b/src/blackbox/tts_bak.py deleted file mode 100644 index 583e525..0000000 --- a/src/blackbox/tts_bak.py +++ /dev/null @@ -1,40 +0,0 @@ -import io -import time -from ntpath import join - -from fastapi import Request, Response, status -from fastapi.responses import JSONResponse -from .blackbox import Blackbox -from ..tts.tts_service import TTService -from injector import singleton - -@singleton -class TTS(Blackbox): - - def __init__(self, *args, **kwargs) -> None: - self.tts_service = TTService("yunfeineo") - - def __call__(self, *args, **kwargs): - return self.processing(*args, **kwargs) - - def processing(self, *args, **kwargs) -> io.BytesIO: - text = args[0] - current_time = time.time() - audio = self.tts_service.read(text) - print("#### TTS Service consume : ", (time.time()-current_time)) - return audio - - def valid(self, *args, **kwargs) -> bool: - text = args[0] - return isinstance(text, str) - - async def fast_api_handler(self, request: Request) -> Response: - try: - data = await request.json() - except: - return JSONResponse(content={"error": "json parse error"}, status_code=status.HTTP_400_BAD_REQUEST) - text = data.get("text") - if text is None: - return JSONResponse(content={"error": "text is required"}, status_code=status.HTTP_400_BAD_REQUEST) - by = self.processing(text) - return Response(content=by.read(), media_type="audio/wav", headers={"Content-Disposition": "attachment; filename=audio.wav"}) \ No newline at end of file