Blackbox MeloTTS created

2025-12-13 16:53:24 +00:00 · 2024-05-13 19:17:30 +08:00
parent 4c51fa24da
commit 68d5088552
6 changed files with 95 additions and 18 deletions
--- a/README.md
+++ b/README.md
@ -1,8 +1,13 @@
 # jarvis-models
-## Dependencies
-
+## Conda Environment and Python Library Requirement
+```bash
+conda create -n jarvis-models python==3.10.11
+pip install -r sample/requirement_out_of_pytorch.txt
+pip install torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cu118
+```
+## More Dependencies
 | System | package | web | install command |
-| --- | --- | --- | --- |
+| --- |  | --- | --- |
 | python | filetype | https://pypi.org/project/filetype/ | pip install filetype |
 | python | fastAPI | https://fastapi.tiangolo.com/ | pip install fastapi |
 | python | python-multipart | https://pypi.org/project/python-multipart/ | pip install python-multipart |
@ -13,15 +18,25 @@
 | python | injector | https://github.com/python-injector/injector | pip install injector |
 | python | langchain | https://github.com/langchain-ai/langchain | pip install langchain |
 | python | chromadb | https://docs.trychroma.com/getting-started | pip install chromadb |
+| python | lagent | https://github.com/InternLM/lagent/blob/main/README.md | pip install lagent |
+

 ## Start
-Dev rh 
+Start the jarvis-models service via
 ```bash
 uvicorn main:app --reload
 ```
+or
+```bash
+python main.py
+```

 ## Configuration
+Create ".env.yaml" at the root of jarvis-models, and copy the following yaml configuration
 ```yaml
+melotts:
+  url: http://{IP running docker melotts-api}:18080/convert/tts
+
 tesou:
  url: http://120.196.116.194:48891/chat/

@ -57,7 +72,3 @@ Model:
      do_copy_in_default_stream: true
  batch_size: 3
 ```
-
-## Python library need to install
-```bash
-```
--- a/install-dependencies.sh
+++ b/install-dependencies.sh
@ -4,6 +4,9 @@ pip install fastapi
 pip install python-multipart
 pip install "uvicorn[standard]"
 pip install SpeechRecognition
+pip install gTTS
 pip install PyYAML
 pip install injector
+pip install landchain
+pip install chromadb
 pip install lagent
--- a/src/blackbox/blackbox_factory.py
+++ b/src/blackbox/blackbox_factory.py
@ -13,6 +13,7 @@ from .text_and_image import TextAndImage
 from .chroma_query import ChromaQuery
 from .chroma_upsert import ChromaUpsert
 from .chroma_chat import ChromaChat
+from .melotts import MeloTTS
 from .vlms import VLMS
 from injector import inject, singleton

@ -36,6 +37,7 @@ class BlackboxFactory:
                 chroma_query: ChromaQuery,
                 chroma_upsert: ChromaUpsert,
                 chroma_chat: ChromaChat,
+                 melotts: MeloTTS,
                 vlms: VLMS) -> None:
        self.models["audio_to_text"] = audio_to_text
        self.models["text_to_audio"] = text_to_audio
@ -51,6 +53,7 @@ class BlackboxFactory:
        self.models["chroma_query"] = chroma_query
        self.models["chroma_upsert"] = chroma_upsert
        self.models["chroma_chat"] = chroma_chat
+        self.models["melotts"] = melotts
        self.models["vlms"] = vlms

    def __call__(self, *args, **kwargs):
@ -59,5 +62,5 @@ class BlackboxFactory:
    def call_blackbox(self, blackbox_name: str) -> Blackbox:
        model = self.models.get(blackbox_name)
        if model is None:
-            raise ValueError("Invalid blockbox type")
+            raise ValueError("Invalid Blackbox Type...")
        return model
--- a/src/blackbox/melotts.py
+++ b/src/blackbox/melotts.py
@ -0,0 +1,47 @@
+import io
+import time
+
+import requests
+from fastapi import Request, Response, status
+from fastapi.responses import JSONResponse
+from injector import inject
+from injector import singleton
+
+from ..configuration import MeloConf
+from .blackbox import Blackbox
+
+
+@singleton
+class MeloTTS(Blackbox):
+    melotts: str
+
+    @inject
+    def __init__(self, melo_config: MeloConf) -> None:
+        self.melotts = melo_config.melotts
+
+    def __call__(self, *args, **kwargs):
+        return self.processing(*args, **kwargs)
+
+    def valid(self, *args, **kwargs) -> bool:
+        text = args[0]
+        return isinstance(text, str)
+
+    def processing(self, *args, **kwargs) -> io.BytesIO:
+        text = args[0]
+        current_time = time.time()
+        audio = self.tts_service.read(text)
+        print("#### MeloTTS Service consume : ", (time.time()-current_time))
+        return audio
+
+    async def fast_api_handler(self, request: Request) -> Response:
+        try:
+            data = await request.json()
+        except:
+            return JSONResponse(content={"error": "json parse error"}, status_code=status.HTTP_400_BAD_REQUEST)
+        text = data.get("text")
+        if text is None:
+            return JSONResponse(content={"error": "text is required"}, status_code=status.HTTP_400_BAD_REQUEST)
+        by = self.processing(text)
+        return Response(content=by.read(), media_type="audio/mp3", headers={"Content-Disposition": "attachment; filename=audio.mp3"})
+
+
--- a/src/configuration.py
+++ b/src/configuration.py
@ -31,10 +31,10 @@ class Configuration():
            if cfg is None:
                cfg = self.cfg
            return self.get(path.split("."), cfg)
-        lenght = len(path)
-        if lenght == 0  or not isinstance(cfg, dict):
+        length = len(path)
+        if length == 0  or not isinstance(cfg, dict):
            return None
-        if lenght == 1:
+        if length == 1:
            return cfg.get(path[0])
        return self.get(path[1:], cfg.get(path[0]))
        
@ -43,5 +43,13 @@ class TesouConf():
    url: str
    
    @inject
-    def __init__(self,config: Configuration) -> None:
+    def __init__(self, config: Configuration) -> None:
        self.url = config.get("tesou.url")
+
+
+class MeloConf():
+    melotts: str
+
+    @inject
+    def __init__(self, config: Configuration) -> None:
+        self.melotts = config.get("melotts.url")
--- a/swagger.yml
+++ b/swagger.yml
@ -1,11 +1,11 @@
 openapi: 3.0.3
 info:
-  title: Jarvis models APIs
+  title: J.A.R.V.I.S. Models APIs
  description: |-
-    boardware
+    BoardWare J.A.R.V.I.S. Models APIs
  contact:
-    email: chenyunda218@gmail.com
-  version: 0.0.1
+    email: jarvis-support@boardware.com
+  version: 0.1.0
 servers:
  - url: http://localhost:8080
    description: Local server
@ -91,3 +91,8 @@ components:
        - audio_chat
        - g2e
        - text_and_image
+        - chroma_query
+        - chroma_chat
+        - chroma_upsert
+        - melotts
+        - vlms