feat: icon first commit
This commit is contained in:
@ -2,6 +2,7 @@ import asyncio
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import struct
|
||||
import sys
|
||||
@ -55,6 +56,16 @@ TTS_DISPLAY_SCROLL_WIDTH = int(os.getenv("TTS_DISPLAY_SCROLL_WIDTH", "18"))
|
||||
TTS_DISPLAY_SCROLL_INTERVAL_SECONDS = float(os.getenv("TTS_DISPLAY_SCROLL_INTERVAL_SECONDS", "0.18"))
|
||||
TTS_DISPLAY_SCROLL_GAP = " "
|
||||
TTS_INTERRUPT_SUPPRESS_SECONDS = float(os.getenv("TTS_INTERRUPT_SUPPRESS_SECONDS", "0.8"))
|
||||
EMOTION_TEXT_PATTERN = re.compile(
|
||||
r"^\s*<?\s*emotion\s*=\s*([^\s>,,;;]+)\s*>?[\s,,;;]*(.*)$",
|
||||
re.DOTALL,
|
||||
)
|
||||
EMOTION_TEST_SEQUENCE = [
|
||||
emotion.strip()
|
||||
for emotion in os.getenv("BRIDGE_EMOTION_TEST_SEQUENCE", "").split(",")
|
||||
if emotion.strip()
|
||||
]
|
||||
EMOTION_TEST_INTERVAL_SECONDS = float(os.getenv("BRIDGE_EMOTION_TEST_INTERVAL_SECONDS", "2.0"))
|
||||
|
||||
|
||||
@dataclass
|
||||
@ -75,6 +86,7 @@ class DeviceSession:
|
||||
tts_transcript_text: str = ""
|
||||
tts_display_text: str = ""
|
||||
tts_display_final: bool = False
|
||||
tts_emotion: str = ""
|
||||
tts_suppressed_until: float = 0.0
|
||||
agent_dispatch_task: Optional[asyncio.Task] = None
|
||||
closed: bool = False
|
||||
@ -415,9 +427,40 @@ class ESP32LiveKitBridge:
|
||||
await session.websocket.send(json.dumps({"type": "tts", "state": state}))
|
||||
print(f"已发送 tts {state}: device={session.device_id}")
|
||||
|
||||
async def _send_emotion(self, session: DeviceSession, emotion: str) -> None:
|
||||
if session.websocket is None:
|
||||
print(f"跳过 emotion {emotion},ESP32 尚未连接")
|
||||
return
|
||||
await session.websocket.send(json.dumps({"type": "llm", "emotion": emotion}))
|
||||
print(f"已发送 emotion: device={session.device_id} emotion={emotion}")
|
||||
|
||||
def _parse_emotion_text(self, text: str) -> tuple[Optional[str], str]:
|
||||
match = EMOTION_TEXT_PATTERN.match(text)
|
||||
if match is None:
|
||||
return None, text.strip()
|
||||
emotion, tts_text = match.groups()
|
||||
return emotion.strip(), tts_text.strip()
|
||||
|
||||
async def _run_emotion_test_sequence(self, session: DeviceSession) -> None:
|
||||
if not EMOTION_TEST_SEQUENCE:
|
||||
return
|
||||
|
||||
for index, emotion in enumerate(EMOTION_TEST_SEQUENCE):
|
||||
if session.websocket is None or session.closed:
|
||||
return
|
||||
if index > 0:
|
||||
await asyncio.sleep(EMOTION_TEST_INTERVAL_SECONDS)
|
||||
await self._send_emotion(session, emotion)
|
||||
|
||||
async def _send_tts_text(self, session: DeviceSession, text: str, final: bool) -> None:
|
||||
if session.websocket is None:
|
||||
return
|
||||
raw_text = text
|
||||
_emotion, text = self._parse_emotion_text(text)
|
||||
if not text:
|
||||
print(f"[tts->esp32] skip empty text: raw={raw_text!r} final={final}")
|
||||
return
|
||||
print(f"[tts->esp32] text={text!r} final={final}")
|
||||
await session.websocket.send(
|
||||
json.dumps(
|
||||
{
|
||||
@ -493,6 +536,7 @@ class ESP32LiveKitBridge:
|
||||
if not session.tts_display_text:
|
||||
session.tts_transcript_text = ""
|
||||
session.tts_display_final = False
|
||||
session.tts_emotion = ""
|
||||
self._cancel_tts_display_task(session)
|
||||
await self._send_tts_state(session, "start")
|
||||
session.tts_active = True
|
||||
@ -507,6 +551,7 @@ class ESP32LiveKitBridge:
|
||||
session.tts_transcript_text = ""
|
||||
session.tts_display_text = ""
|
||||
session.tts_display_final = False
|
||||
session.tts_emotion = ""
|
||||
|
||||
async def _force_stop_tts(self, session: DeviceSession, reason: str) -> None:
|
||||
self._cancel_tts_display_task(session)
|
||||
@ -517,6 +562,7 @@ class ESP32LiveKitBridge:
|
||||
session.tts_transcript_text = ""
|
||||
session.tts_display_text = ""
|
||||
session.tts_display_final = False
|
||||
session.tts_emotion = ""
|
||||
await self._send_tts_state(session, "stop")
|
||||
print(f"已强制停止本地 TTS: device={session.device_id} reason={reason}")
|
||||
|
||||
@ -730,7 +776,17 @@ class ESP32LiveKitBridge:
|
||||
if is_agent:
|
||||
if time.monotonic() < session.tts_suppressed_until:
|
||||
continue
|
||||
display_text = self._current_tts_display_text(segment.text)
|
||||
print(f"[livekit-llm] raw={segment.text!r} final={segment.final}")
|
||||
emotion, tts_text = self._parse_emotion_text(segment.text)
|
||||
print(
|
||||
f"[livekit-llm] parsed emotion={emotion!r} "
|
||||
f"tts_text={tts_text!r} final={segment.final}"
|
||||
)
|
||||
if emotion and emotion != session.tts_emotion:
|
||||
session.tts_emotion = emotion
|
||||
asyncio.create_task(self._send_emotion(session, emotion))
|
||||
display_text = self._current_tts_display_text(tts_text)
|
||||
print(f"[livekit-llm] display_text={display_text!r} final={segment.final}")
|
||||
if not display_text or display_text == session.tts_transcript_text:
|
||||
continue
|
||||
session.tts_transcript_text = display_text
|
||||
@ -839,6 +895,12 @@ class ESP32LiveKitBridge:
|
||||
print(f"[config] livekit_ws_url={LIVEKIT_WS_URL}")
|
||||
print(f"[config] token_url={TOKEN_URL}")
|
||||
print(f"[config] agent_dispatch_mode={AGENT_DISPATCH_MODE}")
|
||||
if EMOTION_TEST_SEQUENCE:
|
||||
print(
|
||||
"[config] emotion_test_sequence="
|
||||
f"{','.join(EMOTION_TEST_SEQUENCE)} "
|
||||
f"interval={EMOTION_TEST_INTERVAL_SECONDS}s"
|
||||
)
|
||||
|
||||
async def close(self) -> None:
|
||||
for session in list(self.device_sessions.values()):
|
||||
@ -1033,6 +1095,7 @@ class ESP32LiveKitBridge:
|
||||
}
|
||||
await websocket.send(json.dumps(hello_msg))
|
||||
print(f"已发送 server hello: device={device_id} room={session.room_name}")
|
||||
asyncio.create_task(self._run_emotion_test_sequence(session))
|
||||
|
||||
await self._connect_session_room(session)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user