feat: sentiment engine

This commit is contained in:
superobk
2024-03-20 09:42:08 +08:00
parent f2d6b9e526
commit f14c36d77a
4 changed files with 64 additions and 4 deletions

View File

@ -0,0 +1,29 @@
import logging
import onnxruntime
from transformers import BertTokenizer
import numpy as np
class SentimentEngine():
def __init__(self, model_path="resources/sentiment_engine/models/paimon_sentiment.onnx"):
logging.info('Initializing Sentiment Engine...')
onnx_model_path = model_path
self.ort_session = onnxruntime.InferenceSession(onnx_model_path, providers=['CPUExecutionProvider'])
self.tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')
def infer(self, text):
tokens = self.tokenizer(text, return_tensors="np")
input_dict = {
"input_ids": tokens["input_ids"],
"attention_mask": tokens["attention_mask"],
}
# Convert input_ids and attention_mask to int64
input_dict["input_ids"] = input_dict["input_ids"].astype(np.int64)
input_dict["attention_mask"] = input_dict["attention_mask"].astype(np.int64)
logits = self.ort_session.run(["logits"], input_dict)[0]
probabilities = np.exp(logits) / np.sum(np.exp(logits), axis=-1, keepdims=True)
predicted = np.argmax(probabilities, axis=1)[0]
logging.info(f'Sentiment Engine Infer: {predicted}')
return predicted

View File

@ -1,3 +1,4 @@
from .sentiment import Sentiment
from .tts import TTS
from ..asr.asr import ASR
from .audio_to_text import AudioToText
@ -11,6 +12,7 @@ class BlackboxFactory:
def __init__(self) -> None:
self.tts = TTS()
self.asr = ASR("./.env.yaml")
self.sentiment = Sentiment()
def create_blackbox(self, blackbox_name: str, blackbox_config: dict) -> Blackbox:
if blackbox_name == "audio_to_text":
@ -23,4 +25,6 @@ class BlackboxFactory:
return self.asr
if blackbox_name == "tts":
return self.tts
if blackbox_name == "sentiment_engine":
return self.sentiment
raise ValueError("Invalid blockbox type")

31
src/blackbox/sentiment.py Normal file
View File

@ -0,0 +1,31 @@
from typing import Any, Coroutine
from fastapi import Request, Response, status
from fastapi.responses import JSONResponse
from sentiment_engine.sentiment_engine import SentimentEngine
from .blackbox import Blackbox
class Sentiment(Blackbox):
def __init__(self) -> None:
self.engine = SentimentEngine('resources/sentiment_engine/models/paimon_sentiment.onnx')
def valid(self, data: any) -> bool:
return isinstance(data, str)
def processing(self, text: any) -> int:
return int(self.engine.infer(text))
async def fast_api_handler(self, request) -> Response:
try:
data = await request.json()
except:
return JSONResponse(content={"error": "json parse error"}, status_code=status.HTTP_400_BAD_REQUEST)
text = data.get("text")
if text is None:
return JSONResponse(content={"error": "text is required"}, status_code=status.HTTP_400_BAD_REQUEST)
sentiment = self.processing(text)
return JSONResponse(content={"sentiment": sentiment }, status_code=status.HTTP_200_OK)

View File

@ -22,10 +22,6 @@ logging.getLogger().setLevel(logging.INFO)
logging.basicConfig(level=logging.INFO)
from pydub import AudioSegment
class TTService():
def __init__(self, cfg, model, char, speed):