Files
jarvis-models/src/blackbox/audio_to_text.py
2024-04-30 15:44:14 +08:00

45 lines
1.5 KiB
Python

from fastapi import Response,status
from fastapi.responses import JSONResponse
import speech_recognition as sr
import filetype
import io
from injector import singleton
from .blackbox import Blackbox
@singleton
class AudioToText(Blackbox):
def __call__(self, *args, **kwargs):
return self.processing(*args, **kwargs)
def valid(self, *args, **kwargs) -> bool :
data = args[0]
kind = filetype.guess(data)
if kind is None:
return False
return kind.extension == "wav"
def processing(self, *args, **kwargs):
data = args[0]
if data is None:
raise ValueError("Data is required")
if isinstance(data, bytes):
data = io.BytesIO(data)
if not self.valid(data):
raise ValueError("Invalid data")
r = sr.Recognizer()
with sr.AudioFile(data) as source:
audio_data = r.record(source)
text = r.recognize_google(audio_data)
return text
async def fast_api_handler(self, request) -> Response:
data = (await request.form()).get("audio")
if data is None:
return JSONResponse(content={"error": "data is required"}, status_code=status.HTTP_400_BAD_REQUEST)
d = await data.read()
try:
txt = await self.processing(d)
except ValueError as e:
return JSONResponse(content={"error": str(e)}, status_code=status.HTTP_400_BAD_REQUEST)
return JSONResponse(content={"txt": txt}, status_code=status.HTTP_200_OK)