feat: sentiment engine

2025-12-13 16:53:24 +00:00 · 2024-03-20 09:42:08 +08:00
parent f2d6b9e526
commit f14c36d77a
4 changed files with 64 additions and 4 deletions
--- a/sentiment_engine/sentiment_engine.py
+++ b/sentiment_engine/sentiment_engine.py
@ -0,0 +1,29 @@
+import logging
+
+import onnxruntime
+from transformers import BertTokenizer
+import numpy as np
+
+
+class SentimentEngine():
+
+    def __init__(self, model_path="resources/sentiment_engine/models/paimon_sentiment.onnx"):
+        logging.info('Initializing Sentiment Engine...')
+        onnx_model_path = model_path
+        self.ort_session = onnxruntime.InferenceSession(onnx_model_path, providers=['CPUExecutionProvider'])
+        self.tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')
+
+    def infer(self, text):
+        tokens = self.tokenizer(text, return_tensors="np")
+        input_dict = {
+            "input_ids": tokens["input_ids"],
+            "attention_mask": tokens["attention_mask"],
+        }
+        # Convert input_ids and attention_mask to int64
+        input_dict["input_ids"] = input_dict["input_ids"].astype(np.int64)
+        input_dict["attention_mask"] = input_dict["attention_mask"].astype(np.int64)
+        logits = self.ort_session.run(["logits"], input_dict)[0]
+        probabilities = np.exp(logits) / np.sum(np.exp(logits), axis=-1, keepdims=True)
+        predicted = np.argmax(probabilities, axis=1)[0]
+        logging.info(f'Sentiment Engine Infer: {predicted}')
+        return predicted
--- a/src/blackbox/blackbox_factory.py
+++ b/src/blackbox/blackbox_factory.py
@ -1,3 +1,4 @@
+from .sentiment import Sentiment
 from .tts import TTS
 from ..asr.asr import ASR
 from .audio_to_text import AudioToText
@ -11,6 +12,7 @@ class BlackboxFactory:
    def __init__(self) -> None:
        self.tts = TTS()
        self.asr = ASR("./.env.yaml")
+        self.sentiment = Sentiment()

    def create_blackbox(self, blackbox_name: str, blackbox_config: dict) -> Blackbox:
        if blackbox_name == "audio_to_text":
@ -23,4 +25,6 @@ class BlackboxFactory:
            return self.asr
        if blackbox_name == "tts":
            return self.tts
+        if blackbox_name == "sentiment_engine":
+            return self.sentiment
        raise ValueError("Invalid blockbox type")
--- a/src/blackbox/sentiment.py
+++ b/src/blackbox/sentiment.py
@ -0,0 +1,31 @@
+from typing import Any, Coroutine
+
+from fastapi import Request, Response, status
+from fastapi.responses import JSONResponse
+
+from sentiment_engine.sentiment_engine import SentimentEngine
+from .blackbox import Blackbox
+
+
+class Sentiment(Blackbox):
+
+    def __init__(self) -> None:
+        self.engine = SentimentEngine('resources/sentiment_engine/models/paimon_sentiment.onnx')
+
+    def valid(self, data: any) -> bool:
+        return isinstance(data, str)
+    
+    def processing(self, text: any) -> int:
+        return int(self.engine.infer(text))
+    
+    async def fast_api_handler(self, request) -> Response:
+        try:
+            data = await request.json()
+        except:
+            return JSONResponse(content={"error": "json parse error"}, status_code=status.HTTP_400_BAD_REQUEST)
+        text = data.get("text")
+        if text is None:
+            return JSONResponse(content={"error": "text is required"}, status_code=status.HTTP_400_BAD_REQUEST)
+        sentiment = self.processing(text)
+        return JSONResponse(content={"sentiment": sentiment }, status_code=status.HTTP_200_OK)
+    
--- a/tts/tts_service.py
+++ b/tts/tts_service.py
@ -22,10 +22,6 @@ logging.getLogger().setLevel(logging.INFO)
 logging.basicConfig(level=logging.INFO)


-from pydub import AudioSegment
-
-
-
 class TTService():
    
    def __init__(self, cfg, model, char, speed):