Blackbox MeloTTS created

This commit is contained in:
gdw6463
2024-05-13 19:17:30 +08:00
parent 4c51fa24da
commit 68d5088552
6 changed files with 95 additions and 18 deletions

View File

@ -1,8 +1,13 @@
# jarvis-models # jarvis-models
## Dependencies ## Conda Environment and Python Library Requirement
```bash
conda create -n jarvis-models python==3.10.11
pip install -r sample/requirement_out_of_pytorch.txt
pip install torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cu118
```
## More Dependencies
| System | package | web | install command | | System | package | web | install command |
| --- | --- | --- | --- | | --- | | --- | --- |
| python | filetype | https://pypi.org/project/filetype/ | pip install filetype | | python | filetype | https://pypi.org/project/filetype/ | pip install filetype |
| python | fastAPI | https://fastapi.tiangolo.com/ | pip install fastapi | | python | fastAPI | https://fastapi.tiangolo.com/ | pip install fastapi |
| python | python-multipart | https://pypi.org/project/python-multipart/ | pip install python-multipart | | python | python-multipart | https://pypi.org/project/python-multipart/ | pip install python-multipart |
@ -13,15 +18,25 @@
| python | injector | https://github.com/python-injector/injector | pip install injector | | python | injector | https://github.com/python-injector/injector | pip install injector |
| python | langchain | https://github.com/langchain-ai/langchain | pip install langchain | | python | langchain | https://github.com/langchain-ai/langchain | pip install langchain |
| python | chromadb | https://docs.trychroma.com/getting-started | pip install chromadb | | python | chromadb | https://docs.trychroma.com/getting-started | pip install chromadb |
| python | lagent | https://github.com/InternLM/lagent/blob/main/README.md | pip install lagent |
## Start ## Start
Dev rh Start the jarvis-models service via
```bash ```bash
uvicorn main:app --reload uvicorn main:app --reload
``` ```
or
```bash
python main.py
```
## Configuration ## Configuration
Create ".env.yaml" at the root of jarvis-models, and copy the following yaml configuration
```yaml ```yaml
melotts:
url: http://{IP running docker melotts-api}:18080/convert/tts
tesou: tesou:
url: http://120.196.116.194:48891/chat/ url: http://120.196.116.194:48891/chat/
@ -57,7 +72,3 @@ Model:
do_copy_in_default_stream: true do_copy_in_default_stream: true
batch_size: 3 batch_size: 3
``` ```
## Python library need to install
```bash
```

View File

@ -4,6 +4,9 @@ pip install fastapi
pip install python-multipart pip install python-multipart
pip install "uvicorn[standard]" pip install "uvicorn[standard]"
pip install SpeechRecognition pip install SpeechRecognition
pip install gTTS
pip install PyYAML pip install PyYAML
pip install injector pip install injector
pip install lagent pip install landchain
pip install chromadb
pip install lagent

View File

@ -13,6 +13,7 @@ from .text_and_image import TextAndImage
from .chroma_query import ChromaQuery from .chroma_query import ChromaQuery
from .chroma_upsert import ChromaUpsert from .chroma_upsert import ChromaUpsert
from .chroma_chat import ChromaChat from .chroma_chat import ChromaChat
from .melotts import MeloTTS
from .vlms import VLMS from .vlms import VLMS
from injector import inject, singleton from injector import inject, singleton
@ -36,6 +37,7 @@ class BlackboxFactory:
chroma_query: ChromaQuery, chroma_query: ChromaQuery,
chroma_upsert: ChromaUpsert, chroma_upsert: ChromaUpsert,
chroma_chat: ChromaChat, chroma_chat: ChromaChat,
melotts: MeloTTS,
vlms: VLMS) -> None: vlms: VLMS) -> None:
self.models["audio_to_text"] = audio_to_text self.models["audio_to_text"] = audio_to_text
self.models["text_to_audio"] = text_to_audio self.models["text_to_audio"] = text_to_audio
@ -51,6 +53,7 @@ class BlackboxFactory:
self.models["chroma_query"] = chroma_query self.models["chroma_query"] = chroma_query
self.models["chroma_upsert"] = chroma_upsert self.models["chroma_upsert"] = chroma_upsert
self.models["chroma_chat"] = chroma_chat self.models["chroma_chat"] = chroma_chat
self.models["melotts"] = melotts
self.models["vlms"] = vlms self.models["vlms"] = vlms
def __call__(self, *args, **kwargs): def __call__(self, *args, **kwargs):
@ -59,5 +62,5 @@ class BlackboxFactory:
def call_blackbox(self, blackbox_name: str) -> Blackbox: def call_blackbox(self, blackbox_name: str) -> Blackbox:
model = self.models.get(blackbox_name) model = self.models.get(blackbox_name)
if model is None: if model is None:
raise ValueError("Invalid blockbox type") raise ValueError("Invalid Blackbox Type...")
return model return model

47
src/blackbox/melotts.py Normal file
View File

@ -0,0 +1,47 @@
import io
import time
import requests
from fastapi import Request, Response, status
from fastapi.responses import JSONResponse
from injector import inject
from injector import singleton
from ..configuration import MeloConf
from .blackbox import Blackbox
@singleton
class MeloTTS(Blackbox):
melotts: str
@inject
def __init__(self, melo_config: MeloConf) -> None:
self.melotts = melo_config.melotts
def __call__(self, *args, **kwargs):
return self.processing(*args, **kwargs)
def valid(self, *args, **kwargs) -> bool:
text = args[0]
return isinstance(text, str)
def processing(self, *args, **kwargs) -> io.BytesIO:
text = args[0]
current_time = time.time()
audio = self.tts_service.read(text)
print("#### MeloTTS Service consume : ", (time.time()-current_time))
return audio
async def fast_api_handler(self, request: Request) -> Response:
try:
data = await request.json()
except:
return JSONResponse(content={"error": "json parse error"}, status_code=status.HTTP_400_BAD_REQUEST)
text = data.get("text")
if text is None:
return JSONResponse(content={"error": "text is required"}, status_code=status.HTTP_400_BAD_REQUEST)
by = self.processing(text)
return Response(content=by.read(), media_type="audio/mp3", headers={"Content-Disposition": "attachment; filename=audio.mp3"})

View File

@ -31,10 +31,10 @@ class Configuration():
if cfg is None: if cfg is None:
cfg = self.cfg cfg = self.cfg
return self.get(path.split("."), cfg) return self.get(path.split("."), cfg)
lenght = len(path) length = len(path)
if lenght == 0 or not isinstance(cfg, dict): if length == 0 or not isinstance(cfg, dict):
return None return None
if lenght == 1: if length == 1:
return cfg.get(path[0]) return cfg.get(path[0])
return self.get(path[1:], cfg.get(path[0])) return self.get(path[1:], cfg.get(path[0]))
@ -43,5 +43,13 @@ class TesouConf():
url: str url: str
@inject @inject
def __init__(self,config: Configuration) -> None: def __init__(self, config: Configuration) -> None:
self.url = config.get("tesou.url") self.url = config.get("tesou.url")
class MeloConf():
melotts: str
@inject
def __init__(self, config: Configuration) -> None:
self.melotts = config.get("melotts.url")

View File

@ -1,11 +1,11 @@
openapi: 3.0.3 openapi: 3.0.3
info: info:
title: Jarvis models APIs title: J.A.R.V.I.S. Models APIs
description: |- description: |-
boardware BoardWare J.A.R.V.I.S. Models APIs
contact: contact:
email: chenyunda218@gmail.com email: jarvis-support@boardware.com
version: 0.0.1 version: 0.1.0
servers: servers:
- url: http://localhost:8080 - url: http://localhost:8080
description: Local server description: Local server
@ -91,3 +91,8 @@ components:
- audio_chat - audio_chat
- g2e - g2e
- text_and_image - text_and_image
- chroma_query
- chroma_chat
- chroma_upsert
- melotts
- vlms