feat: text to audio

This commit is contained in:
Dan Chen
2024-03-18 17:31:26 +08:00
parent 59d257b391
commit 8513270e9e
9 changed files with 63 additions and 14 deletions

1
.gitignore vendored
View File

@ -161,3 +161,4 @@ cython_debug/
# Macos
.DS_Store
playground.py

View File

@ -7,9 +7,10 @@
| python | fastAPI | https://fastapi.tiangolo.com/ | pip install fastapi |
| python | python-multipart | https://pypi.org/project/python-multipart/ | pip install python-multipart |
| python | uvicorn | https://www.uvicorn.org/ | pip install "uvicorn[standard]" |
| python | SpeechRecognition | https://pypi.org/project/SpeechRecognition/ | pip install SpeechRecognition |
## Start
Dev
```bash
src git:(main) pip install "uvicorn[standard]"
cd src
uvicorn main:app --reload
```

BIN
audio.mp3 Normal file

Binary file not shown.

View File

@ -14,7 +14,7 @@ class AudioToText(Blackbox):
return False
return kind.extension == "wav"
def processing(self, data: bytes | io.BytesIO):
async def processing(self, data: bytes | io.BytesIO):
if data is None:
raise ValueError("Data is required")
if isinstance(data, bytes):
@ -23,9 +23,9 @@ class AudioToText(Blackbox):
raise ValueError("Invalid data")
r = sr.Recognizer()
with sr.AudioFile(data) as source:
audio_data = r.record(source)
text = r.recognize_google(audio_data)
return text
audio_data = r.record(source)
text = r.recognize_google(audio_data)
return text
async def fast_api_handler(self, request) -> Response:
data = (await request.form()).get("data")
@ -33,7 +33,7 @@ class AudioToText(Blackbox):
return JSONResponse(content={"error": "data is required"}, status_code=status.HTTP_400_BAD_REQUEST)
d = await data.read()
try:
txt = self.processing(d)
txt = await self.processing(d)
except ValueError as e:
return JSONResponse(content={"error": str(e)}, status_code=status.HTTP_400_BAD_REQUEST)
return JSONResponse(content={"txt": txt}, status_code=status.HTTP_200_OK)

View File

@ -2,20 +2,37 @@ from abc import ABC, abstractmethod
from fastapi import Request, Response
class Blackbox(ABC):
"""Blackbox class that provides a standard way to create an blackbox class using
inheritance. All blackbox classes should inherit from this class and implement
the methods processing, valid and fast_api_handler.
If implemented correctly, the blackbox class can be used in the main.py file
"""
def __init__(self, config: any) -> None:
pass
"""
processing method should return the processed data. The data is passed as an argument
to the method. All processing shouldn't interaction with the disk
but dist / list / string / bytes / io.BytesIO or other data type that in memory.
Output same as above.
"""
@abstractmethod
def processing(self, data: any):
async def processing(self, data: any) -> any:
pass
"""
valid method should return True if the data is valid and False if the data is invalid
"""
@abstractmethod
def valid(self, data: any) -> bool:
pass
"""
fast_api_handler method should return a fastapi Response object. This method is used
to handle the request from the fastapi server. The request object is passed as an argument
to the method.
"""
@abstractmethod
def fast_api_handler(self, request: Request) -> Response:
pass

View File

@ -1,5 +1,6 @@
from blackbox.audio_to_text import AudioToText
from blackbox.blackbox import Blackbox
from blackbox.text_to_audio import TextToAudio
class BlockboxFactory:
@ -8,5 +9,6 @@ class BlockboxFactory:
if blockbox_type == "audio_to_text":
return AudioToText(blockbox_config)
if blockbox_type == "text_to_audio":
return TextToAudio(blockbox_config)
raise ValueError("Invalid blockbox type")

View File

@ -0,0 +1,29 @@
from fastapi import Response, status
from fastapi.responses import FileResponse, JSONResponse
from blackbox.blackbox import Blackbox
from gtts import gTTS
from io import BytesIO
class TextToAudio(Blackbox):
def valid(self, data: any) -> bool:
return isinstance(data, str)
def processing(self, text: any) -> BytesIO:
if not self.valid(text):
raise ValueError("Invalid data")
tts = gTTS(text=text, lang="en")
fp = BytesIO()
tts.write_to_fp(fp)
fp.seek(0)
return fp
async def fast_api_handler(self, request) -> Response:
try:
data = await request.json()
except:
return JSONResponse(content={"error": "json parse error"}, status_code=status.HTTP_400_BAD_REQUEST)
text = data.get("text")
if text is None:
return JSONResponse(content={"error": "text is required"}, status_code=status.HTTP_400_BAD_REQUEST)
by = self.processing(text)
return Response(content=by.read(), media_type="audio/mpeg", headers={"Content-Disposition": "attachment; filename=audio.mp3"})

View File

@ -6,7 +6,6 @@ from fastapi.responses import JSONResponse
from blackbox.blockbox_factory import BlockboxFactory
app = FastAPI()
blackbox_factory = BlockboxFactory()
@app.post("/")

0
src/my_file.mp3 Normal file
View File