From 37343ac0fe2107f5037c858a9ee8f90f6220f628 Mon Sep 17 00:00:00 2001 From: 0Xiao0 <511201264@qq.com> Date: Wed, 27 May 2026 17:16:11 +0800 Subject: [PATCH] feat: icon first commit --- main/bridge_server.py | 65 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) diff --git a/main/bridge_server.py b/main/bridge_server.py index dea6cbf..352f186 100644 --- a/main/bridge_server.py +++ b/main/bridge_server.py @@ -2,6 +2,7 @@ import asyncio import base64 import json import os +import re import shutil import struct import sys @@ -55,6 +56,16 @@ TTS_DISPLAY_SCROLL_WIDTH = int(os.getenv("TTS_DISPLAY_SCROLL_WIDTH", "18")) TTS_DISPLAY_SCROLL_INTERVAL_SECONDS = float(os.getenv("TTS_DISPLAY_SCROLL_INTERVAL_SECONDS", "0.18")) TTS_DISPLAY_SCROLL_GAP = " " TTS_INTERRUPT_SUPPRESS_SECONDS = float(os.getenv("TTS_INTERRUPT_SUPPRESS_SECONDS", "0.8")) +EMOTION_TEXT_PATTERN = re.compile( + r"^\s*,,;;]+)\s*>?[\s,,;;]*(.*)$", + re.DOTALL, +) +EMOTION_TEST_SEQUENCE = [ + emotion.strip() + for emotion in os.getenv("BRIDGE_EMOTION_TEST_SEQUENCE", "").split(",") + if emotion.strip() +] +EMOTION_TEST_INTERVAL_SECONDS = float(os.getenv("BRIDGE_EMOTION_TEST_INTERVAL_SECONDS", "2.0")) @dataclass @@ -75,6 +86,7 @@ class DeviceSession: tts_transcript_text: str = "" tts_display_text: str = "" tts_display_final: bool = False + tts_emotion: str = "" tts_suppressed_until: float = 0.0 agent_dispatch_task: Optional[asyncio.Task] = None closed: bool = False @@ -415,9 +427,40 @@ class ESP32LiveKitBridge: await session.websocket.send(json.dumps({"type": "tts", "state": state})) print(f"已发送 tts {state}: device={session.device_id}") + async def _send_emotion(self, session: DeviceSession, emotion: str) -> None: + if session.websocket is None: + print(f"跳过 emotion {emotion},ESP32 尚未连接") + return + await session.websocket.send(json.dumps({"type": "llm", "emotion": emotion})) + print(f"已发送 emotion: device={session.device_id} emotion={emotion}") + + def _parse_emotion_text(self, text: str) -> tuple[Optional[str], str]: + match = EMOTION_TEXT_PATTERN.match(text) + if match is None: + return None, text.strip() + emotion, tts_text = match.groups() + return emotion.strip(), tts_text.strip() + + async def _run_emotion_test_sequence(self, session: DeviceSession) -> None: + if not EMOTION_TEST_SEQUENCE: + return + + for index, emotion in enumerate(EMOTION_TEST_SEQUENCE): + if session.websocket is None or session.closed: + return + if index > 0: + await asyncio.sleep(EMOTION_TEST_INTERVAL_SECONDS) + await self._send_emotion(session, emotion) + async def _send_tts_text(self, session: DeviceSession, text: str, final: bool) -> None: if session.websocket is None: return + raw_text = text + _emotion, text = self._parse_emotion_text(text) + if not text: + print(f"[tts->esp32] skip empty text: raw={raw_text!r} final={final}") + return + print(f"[tts->esp32] text={text!r} final={final}") await session.websocket.send( json.dumps( { @@ -493,6 +536,7 @@ class ESP32LiveKitBridge: if not session.tts_display_text: session.tts_transcript_text = "" session.tts_display_final = False + session.tts_emotion = "" self._cancel_tts_display_task(session) await self._send_tts_state(session, "start") session.tts_active = True @@ -507,6 +551,7 @@ class ESP32LiveKitBridge: session.tts_transcript_text = "" session.tts_display_text = "" session.tts_display_final = False + session.tts_emotion = "" async def _force_stop_tts(self, session: DeviceSession, reason: str) -> None: self._cancel_tts_display_task(session) @@ -517,6 +562,7 @@ class ESP32LiveKitBridge: session.tts_transcript_text = "" session.tts_display_text = "" session.tts_display_final = False + session.tts_emotion = "" await self._send_tts_state(session, "stop") print(f"已强制停止本地 TTS: device={session.device_id} reason={reason}") @@ -730,7 +776,17 @@ class ESP32LiveKitBridge: if is_agent: if time.monotonic() < session.tts_suppressed_until: continue - display_text = self._current_tts_display_text(segment.text) + print(f"[livekit-llm] raw={segment.text!r} final={segment.final}") + emotion, tts_text = self._parse_emotion_text(segment.text) + print( + f"[livekit-llm] parsed emotion={emotion!r} " + f"tts_text={tts_text!r} final={segment.final}" + ) + if emotion and emotion != session.tts_emotion: + session.tts_emotion = emotion + asyncio.create_task(self._send_emotion(session, emotion)) + display_text = self._current_tts_display_text(tts_text) + print(f"[livekit-llm] display_text={display_text!r} final={segment.final}") if not display_text or display_text == session.tts_transcript_text: continue session.tts_transcript_text = display_text @@ -839,6 +895,12 @@ class ESP32LiveKitBridge: print(f"[config] livekit_ws_url={LIVEKIT_WS_URL}") print(f"[config] token_url={TOKEN_URL}") print(f"[config] agent_dispatch_mode={AGENT_DISPATCH_MODE}") + if EMOTION_TEST_SEQUENCE: + print( + "[config] emotion_test_sequence=" + f"{','.join(EMOTION_TEST_SEQUENCE)} " + f"interval={EMOTION_TEST_INTERVAL_SECONDS}s" + ) async def close(self) -> None: for session in list(self.device_sessions.values()): @@ -1033,6 +1095,7 @@ class ESP32LiveKitBridge: } await websocket.send(json.dumps(hello_msg)) print(f"已发送 server hello: device={device_id} room={session.room_name}") + asyncio.create_task(self._run_emotion_test_sequence(session)) await self._connect_session_room(session)