Blackbox MeloTTS created

This commit is contained in:
gdw6463
2024-05-13 19:17:30 +08:00
parent 4c51fa24da
commit 68d5088552
6 changed files with 95 additions and 18 deletions

View File

@ -1,8 +1,13 @@
# jarvis-models
## Dependencies
## Conda Environment and Python Library Requirement
```bash
conda create -n jarvis-models python==3.10.11
pip install -r sample/requirement_out_of_pytorch.txt
pip install torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cu118
```
## More Dependencies
| System | package | web | install command |
| --- | --- | --- | --- |
| --- | | --- | --- |
| python | filetype | https://pypi.org/project/filetype/ | pip install filetype |
| python | fastAPI | https://fastapi.tiangolo.com/ | pip install fastapi |
| python | python-multipart | https://pypi.org/project/python-multipart/ | pip install python-multipart |
@ -13,15 +18,25 @@
| python | injector | https://github.com/python-injector/injector | pip install injector |
| python | langchain | https://github.com/langchain-ai/langchain | pip install langchain |
| python | chromadb | https://docs.trychroma.com/getting-started | pip install chromadb |
| python | lagent | https://github.com/InternLM/lagent/blob/main/README.md | pip install lagent |
## Start
Dev rh
Start the jarvis-models service via
```bash
uvicorn main:app --reload
```
or
```bash
python main.py
```
## Configuration
Create ".env.yaml" at the root of jarvis-models, and copy the following yaml configuration
```yaml
melotts:
url: http://{IP running docker melotts-api}:18080/convert/tts
tesou:
url: http://120.196.116.194:48891/chat/
@ -57,7 +72,3 @@ Model:
do_copy_in_default_stream: true
batch_size: 3
```
## Python library need to install
```bash
```

View File

@ -4,6 +4,9 @@ pip install fastapi
pip install python-multipart
pip install "uvicorn[standard]"
pip install SpeechRecognition
pip install gTTS
pip install PyYAML
pip install injector
pip install landchain
pip install chromadb
pip install lagent

View File

@ -13,6 +13,7 @@ from .text_and_image import TextAndImage
from .chroma_query import ChromaQuery
from .chroma_upsert import ChromaUpsert
from .chroma_chat import ChromaChat
from .melotts import MeloTTS
from .vlms import VLMS
from injector import inject, singleton
@ -36,6 +37,7 @@ class BlackboxFactory:
chroma_query: ChromaQuery,
chroma_upsert: ChromaUpsert,
chroma_chat: ChromaChat,
melotts: MeloTTS,
vlms: VLMS) -> None:
self.models["audio_to_text"] = audio_to_text
self.models["text_to_audio"] = text_to_audio
@ -51,6 +53,7 @@ class BlackboxFactory:
self.models["chroma_query"] = chroma_query
self.models["chroma_upsert"] = chroma_upsert
self.models["chroma_chat"] = chroma_chat
self.models["melotts"] = melotts
self.models["vlms"] = vlms
def __call__(self, *args, **kwargs):
@ -59,5 +62,5 @@ class BlackboxFactory:
def call_blackbox(self, blackbox_name: str) -> Blackbox:
model = self.models.get(blackbox_name)
if model is None:
raise ValueError("Invalid blockbox type")
raise ValueError("Invalid Blackbox Type...")
return model

47
src/blackbox/melotts.py Normal file
View File

@ -0,0 +1,47 @@
import io
import time
import requests
from fastapi import Request, Response, status
from fastapi.responses import JSONResponse
from injector import inject
from injector import singleton
from ..configuration import MeloConf
from .blackbox import Blackbox
@singleton
class MeloTTS(Blackbox):
melotts: str
@inject
def __init__(self, melo_config: MeloConf) -> None:
self.melotts = melo_config.melotts
def __call__(self, *args, **kwargs):
return self.processing(*args, **kwargs)
def valid(self, *args, **kwargs) -> bool:
text = args[0]
return isinstance(text, str)
def processing(self, *args, **kwargs) -> io.BytesIO:
text = args[0]
current_time = time.time()
audio = self.tts_service.read(text)
print("#### MeloTTS Service consume : ", (time.time()-current_time))
return audio
async def fast_api_handler(self, request: Request) -> Response:
try:
data = await request.json()
except:
return JSONResponse(content={"error": "json parse error"}, status_code=status.HTTP_400_BAD_REQUEST)
text = data.get("text")
if text is None:
return JSONResponse(content={"error": "text is required"}, status_code=status.HTTP_400_BAD_REQUEST)
by = self.processing(text)
return Response(content=by.read(), media_type="audio/mp3", headers={"Content-Disposition": "attachment; filename=audio.mp3"})

View File

@ -31,10 +31,10 @@ class Configuration():
if cfg is None:
cfg = self.cfg
return self.get(path.split("."), cfg)
lenght = len(path)
if lenght == 0 or not isinstance(cfg, dict):
length = len(path)
if length == 0 or not isinstance(cfg, dict):
return None
if lenght == 1:
if length == 1:
return cfg.get(path[0])
return self.get(path[1:], cfg.get(path[0]))
@ -43,5 +43,13 @@ class TesouConf():
url: str
@inject
def __init__(self,config: Configuration) -> None:
def __init__(self, config: Configuration) -> None:
self.url = config.get("tesou.url")
class MeloConf():
melotts: str
@inject
def __init__(self, config: Configuration) -> None:
self.melotts = config.get("melotts.url")

View File

@ -1,11 +1,11 @@
openapi: 3.0.3
info:
title: Jarvis models APIs
title: J.A.R.V.I.S. Models APIs
description: |-
boardware
BoardWare J.A.R.V.I.S. Models APIs
contact:
email: chenyunda218@gmail.com
version: 0.0.1
email: jarvis-support@boardware.com
version: 0.1.0
servers:
- url: http://localhost:8080
description: Local server
@ -91,3 +91,8 @@ components:
- audio_chat
- g2e
- text_and_image
- chroma_query
- chroma_chat
- chroma_upsert
- melotts
- vlms