From 37343ac0fe2107f5037c858a9ee8f90f6220f628 Mon Sep 17 00:00:00 2001
From: 0Xiao0 <511201264@qq.com>
Date: Wed, 27 May 2026 17:16:11 +0800
Subject: [PATCH] feat: icon first commit

---
 main/bridge_server.py | 65 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 64 insertions(+), 1 deletion(-)

diff --git a/main/bridge_server.py b/main/bridge_server.py
index dea6cbf..352f186 100644
--- a/main/bridge_server.py
+++ b/main/bridge_server.py
@@ -2,6 +2,7 @@ import asyncio
 import base64
 import json
 import os
+import re
 import shutil
 import struct
 import sys
@@ -55,6 +56,16 @@ TTS_DISPLAY_SCROLL_WIDTH = int(os.getenv("TTS_DISPLAY_SCROLL_WIDTH", "18"))
 TTS_DISPLAY_SCROLL_INTERVAL_SECONDS = float(os.getenv("TTS_DISPLAY_SCROLL_INTERVAL_SECONDS", "0.18"))
 TTS_DISPLAY_SCROLL_GAP = "   "
 TTS_INTERRUPT_SUPPRESS_SECONDS = float(os.getenv("TTS_INTERRUPT_SUPPRESS_SECONDS", "0.8"))
+EMOTION_TEXT_PATTERN = re.compile(
+    r"^\s*<?\s*emotion\s*=\s*([^\s>,，;；]+)\s*>?[\s,，;；]*(.*)$",
+    re.DOTALL,
+)
+EMOTION_TEST_SEQUENCE = [
+    emotion.strip()
+    for emotion in os.getenv("BRIDGE_EMOTION_TEST_SEQUENCE", "").split(",")
+    if emotion.strip()
+]
+EMOTION_TEST_INTERVAL_SECONDS = float(os.getenv("BRIDGE_EMOTION_TEST_INTERVAL_SECONDS", "2.0"))
 
 
 @dataclass
@@ -75,6 +86,7 @@ class DeviceSession:
     tts_transcript_text: str = ""
     tts_display_text: str = ""
     tts_display_final: bool = False
+    tts_emotion: str = ""
     tts_suppressed_until: float = 0.0
     agent_dispatch_task: Optional[asyncio.Task] = None
     closed: bool = False
@@ -415,9 +427,40 @@ class ESP32LiveKitBridge:
         await session.websocket.send(json.dumps({"type": "tts", "state": state}))
         print(f"已发送 tts {state}: device={session.device_id}")
 
+    async def _send_emotion(self, session: DeviceSession, emotion: str) -> None:
+        if session.websocket is None:
+            print(f"跳过 emotion {emotion}，ESP32 尚未连接")
+            return
+        await session.websocket.send(json.dumps({"type": "llm", "emotion": emotion}))
+        print(f"已发送 emotion: device={session.device_id} emotion={emotion}")
+
+    def _parse_emotion_text(self, text: str) -> tuple[Optional[str], str]:
+        match = EMOTION_TEXT_PATTERN.match(text)
+        if match is None:
+            return None, text.strip()
+        emotion, tts_text = match.groups()
+        return emotion.strip(), tts_text.strip()
+
+    async def _run_emotion_test_sequence(self, session: DeviceSession) -> None:
+        if not EMOTION_TEST_SEQUENCE:
+            return
+
+        for index, emotion in enumerate(EMOTION_TEST_SEQUENCE):
+            if session.websocket is None or session.closed:
+                return
+            if index > 0:
+                await asyncio.sleep(EMOTION_TEST_INTERVAL_SECONDS)
+            await self._send_emotion(session, emotion)
+
     async def _send_tts_text(self, session: DeviceSession, text: str, final: bool) -> None:
         if session.websocket is None:
             return
+        raw_text = text
+        _emotion, text = self._parse_emotion_text(text)
+        if not text:
+            print(f"[tts->esp32] skip empty text: raw={raw_text!r} final={final}")
+            return
+        print(f"[tts->esp32] text={text!r} final={final}")
         await session.websocket.send(
             json.dumps(
                 {
@@ -493,6 +536,7 @@ class ESP32LiveKitBridge:
         if not session.tts_display_text:
             session.tts_transcript_text = ""
             session.tts_display_final = False
+            session.tts_emotion = ""
             self._cancel_tts_display_task(session)
         await self._send_tts_state(session, "start")
         session.tts_active = True
@@ -507,6 +551,7 @@ class ESP32LiveKitBridge:
         session.tts_transcript_text = ""
         session.tts_display_text = ""
         session.tts_display_final = False
+        session.tts_emotion = ""
 
     async def _force_stop_tts(self, session: DeviceSession, reason: str) -> None:
         self._cancel_tts_display_task(session)
@@ -517,6 +562,7 @@ class ESP32LiveKitBridge:
         session.tts_transcript_text = ""
         session.tts_display_text = ""
         session.tts_display_final = False
+        session.tts_emotion = ""
         await self._send_tts_state(session, "stop")
         print(f"已强制停止本地 TTS: device={session.device_id} reason={reason}")
 
@@ -730,7 +776,17 @@ class ESP32LiveKitBridge:
                 if is_agent:
                     if time.monotonic() < session.tts_suppressed_until:
                         continue
-                    display_text = self._current_tts_display_text(segment.text)
+                    print(f"[livekit-llm] raw={segment.text!r} final={segment.final}")
+                    emotion, tts_text = self._parse_emotion_text(segment.text)
+                    print(
+                        f"[livekit-llm] parsed emotion={emotion!r} "
+                        f"tts_text={tts_text!r} final={segment.final}"
+                    )
+                    if emotion and emotion != session.tts_emotion:
+                        session.tts_emotion = emotion
+                        asyncio.create_task(self._send_emotion(session, emotion))
+                    display_text = self._current_tts_display_text(tts_text)
+                    print(f"[livekit-llm] display_text={display_text!r} final={segment.final}")
                     if not display_text or display_text == session.tts_transcript_text:
                         continue
                     session.tts_transcript_text = display_text
@@ -839,6 +895,12 @@ class ESP32LiveKitBridge:
         print(f"[config] livekit_ws_url={LIVEKIT_WS_URL}")
         print(f"[config] token_url={TOKEN_URL}")
         print(f"[config] agent_dispatch_mode={AGENT_DISPATCH_MODE}")
+        if EMOTION_TEST_SEQUENCE:
+            print(
+                "[config] emotion_test_sequence="
+                f"{','.join(EMOTION_TEST_SEQUENCE)} "
+                f"interval={EMOTION_TEST_INTERVAL_SECONDS}s"
+            )
 
     async def close(self) -> None:
         for session in list(self.device_sessions.values()):
@@ -1033,6 +1095,7 @@ class ESP32LiveKitBridge:
             }
             await websocket.send(json.dumps(hello_msg))
             print(f"已发送 server hello: device={device_id} room={session.room_name}")
+            asyncio.create_task(self._run_emotion_test_sequence(session))
 
             await self._connect_session_room(session)