feat: text to audio

2025-12-13 16:53:24 +00:00 · 2024-03-18 17:31:26 +08:00
parent 59d257b391
commit 8513270e9e
9 changed files with 63 additions and 14 deletions
--- a/.gitignore
+++ b/.gitignore
@ -160,4 +160,5 @@ cython_debug/
 #.idea/
 # Macos
-.DS_Store
+.DS_Store
 playground.py
--- a/README.md
+++ b/README.md
@ -7,9 +7,10 @@
 | python | fastAPI | https://fastapi.tiangolo.com/ | pip install fastapi |
 | python | python-multipart | https://pypi.org/project/python-multipart/ | pip install python-multipart |
 | python | uvicorn | https://www.uvicorn.org/ | pip install "uvicorn[standard]" | 
-
+| python | SpeechRecognition |  https://pypi.org/project/SpeechRecognition/ |  pip install SpeechRecognition |
 ## Start
 Dev
 ```bash
-src git:(main) pip install "uvicorn[standard]"
+cd src
 uvicorn main:app --reload
 ```
--- a/audio.mp3
+++ b/audio.mp3
--- a/src/blackbox/audio_to_text.py
+++ b/src/blackbox/audio_to_text.py
@ -14,7 +14,7 @@ class AudioToText(Blackbox):
            return False
        return kind.extension == "wav"
-    def processing(self, data: bytes | io.BytesIO):
+    async def processing(self, data: bytes | io.BytesIO):
        if data is None:
            raise ValueError("Data is required")
        if isinstance(data, bytes):
@ -23,9 +23,9 @@ class AudioToText(Blackbox):
            raise ValueError("Invalid data")
        r = sr.Recognizer()
        with sr.AudioFile(data) as source:
-          audio_data = r.record(source)
+            audio_data = r.record(source)
-          text = r.recognize_google(audio_data)
+            text = r.recognize_google(audio_data)
-          return text
+            return text
    async def fast_api_handler(self, request) -> Response:
        data = (await request.form()).get("data")
@ -33,7 +33,7 @@ class AudioToText(Blackbox):
            return JSONResponse(content={"error": "data is required"}, status_code=status.HTTP_400_BAD_REQUEST)
        d = await data.read()
        try:
-            txt = self.processing(d)
+            txt = await self.processing(d)
        except ValueError as e:
            return JSONResponse(content={"error": str(e)}, status_code=status.HTTP_400_BAD_REQUEST)
        return JSONResponse(content={"txt": txt}, status_code=status.HTTP_200_OK)
--- a/src/blackbox/blackbox.py
+++ b/src/blackbox/blackbox.py
@ -2,20 +2,37 @@ from abc import ABC, abstractmethod
 from fastapi import Request, Response
 class Blackbox(ABC):
-
+    """Blackbox class that provides a standard way to create an blackbox class using
    inheritance. All blackbox classes should inherit from this class and implement
    the methods processing, valid and fast_api_handler.
    If implemented correctly, the blackbox class can be used in the main.py file
    """
    def __init__(self, config: any) -> None:
        pass
    """
    processing method should return the processed data. The data is passed as an argument
    to the method. All processing shouldn't interaction with the disk
    but dist / list / string / bytes / io.BytesIO or other data type that in memory.
    Output same as above.
    """
    @abstractmethod
-    def processing(self, data: any):
+    async def processing(self, data: any) -> any:
        pass
    """
    valid method should return True if the data is valid and False if the data is invalid
    """
    @abstractmethod
    def valid(self, data: any) -> bool:
        pass
-    
+
    """
    fast_api_handler method should return a fastapi Response object. This method is used
    to handle the request from the fastapi server. The request object is passed as an argument
    to the method.
    """
    @abstractmethod
    def fast_api_handler(self, request: Request) -> Response:
        pass
--- a/src/blackbox/blockbox_factory.py
+++ b/src/blackbox/blockbox_factory.py
@ -1,5 +1,6 @@
 from blackbox.audio_to_text import AudioToText
 from blackbox.blackbox import Blackbox
 from blackbox.text_to_audio import TextToAudio
 class BlockboxFactory:
@ -8,5 +9,6 @@ class BlockboxFactory:
        if blockbox_type == "audio_to_text":
            return AudioToText(blockbox_config)
-        
+        if blockbox_type == "text_to_audio":
            return TextToAudio(blockbox_config)
        raise ValueError("Invalid blockbox type")
--- a/src/blackbox/text_to_audio.py
+++ b/src/blackbox/text_to_audio.py
@ -0,0 +1,29 @@
 from fastapi import Response, status
 from fastapi.responses import FileResponse, JSONResponse
 from blackbox.blackbox import Blackbox
 from gtts import gTTS
 from io import BytesIO
 class TextToAudio(Blackbox):
    def valid(self, data: any) -> bool:
        return isinstance(data, str)
    def processing(self, text: any) -> BytesIO:
        if not self.valid(text):
            raise ValueError("Invalid data")
        tts = gTTS(text=text, lang="en")
        fp = BytesIO()
        tts.write_to_fp(fp)
        fp.seek(0)
        return fp
    async def fast_api_handler(self, request) -> Response:
        try:
            data = await request.json()
        except:
            return JSONResponse(content={"error": "json parse error"}, status_code=status.HTTP_400_BAD_REQUEST)
        text = data.get("text")
        if text is None:
            return JSONResponse(content={"error": "text is required"}, status_code=status.HTTP_400_BAD_REQUEST)
        by = self.processing(text)
        return Response(content=by.read(), media_type="audio/mpeg", headers={"Content-Disposition": "attachment; filename=audio.mp3"})
--- a/src/main.py
+++ b/src/main.py
@ -6,7 +6,6 @@ from fastapi.responses import JSONResponse
 from blackbox.blockbox_factory import BlockboxFactory
 app = FastAPI()
 blackbox_factory = BlockboxFactory()
@app.post("/")
--- a/src/my_file.mp3
+++ b/src/my_file.mp3