feat: support camera capture to livekit
This commit is contained in:
@ -1,4 +1,5 @@
|
||||
import asyncio
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
@ -8,6 +9,7 @@ import time
|
||||
import traceback
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional
|
||||
|
||||
import httpx
|
||||
@ -31,6 +33,8 @@ CONNECT_TIMEOUT_SECONDS = float(os.getenv("LIVEKIT_CONNECT_TIMEOUT_SECONDS", "20
|
||||
AGENT_READY_TIMEOUT_SECONDS = float(os.getenv("LIVEKIT_AGENT_READY_TIMEOUT_SECONDS", "10.0"))
|
||||
WS_PORT = 8080
|
||||
AGENT_DISPATCH_MODE = os.getenv("AGENT_DISPATCH_MODE", "token").lower()
|
||||
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
||||
VISION_FRAME_SAVE_DIR = Path(os.getenv("VISION_FRAME_SAVE_DIR", str(PROJECT_ROOT / "vision_frames")))
|
||||
|
||||
INPUT_SAMPLE_RATE = 16000
|
||||
OUTPUT_SAMPLE_RATE = 24000
|
||||
@ -45,6 +49,7 @@ TTS_PRE_ROLL_MS = 80
|
||||
TTS_START_CONSECUTIVE_AUDIBLE_FRAMES = 1
|
||||
TTS_INTERRUPT_SILENCE_FRAMES = 3
|
||||
INTERRUPT_TOPIC = "lk.interrupt"
|
||||
VISION_FRAME_TOPIC = "vision.frame"
|
||||
TTS_DISPLAY_SENTENCE_BREAKS = "。!?!?;;"
|
||||
TTS_DISPLAY_SCROLL_WIDTH = int(os.getenv("TTS_DISPLAY_SCROLL_WIDTH", "18"))
|
||||
TTS_DISPLAY_SCROLL_INTERVAL_SECONDS = float(os.getenv("TTS_DISPLAY_SCROLL_INTERVAL_SECONDS", "0.18"))
|
||||
@ -336,6 +341,73 @@ class ESP32LiveKitBridge:
|
||||
if not ok:
|
||||
print("警告: bridge 已停止 TTS,但 agent 侧 interrupt 未确认送出")
|
||||
|
||||
def _save_vision_frame(self, session: DeviceSession, image: str) -> Optional[Path]:
|
||||
try:
|
||||
image_bytes = base64.b64decode(image, validate=True)
|
||||
except Exception as exc:
|
||||
print(f"vision frame base64 解码失败: {exc}")
|
||||
return None
|
||||
|
||||
safe_device_id = "".join(
|
||||
char if char.isalnum() or char in ("-", "_") else "_"
|
||||
for char in session.device_id
|
||||
)
|
||||
timestamp_ms = int(time.time() * 1000)
|
||||
VISION_FRAME_SAVE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
path = VISION_FRAME_SAVE_DIR / f"{timestamp_ms}_{safe_device_id}.jpg"
|
||||
path.write_bytes(image_bytes)
|
||||
return path
|
||||
|
||||
async def _publish_vision_frame(self, session: DeviceSession, message: dict[str, Any]) -> None:
|
||||
image = message.get("image")
|
||||
if not isinstance(image, str) or not image:
|
||||
print("收到 vision frame,但 image 字段为空")
|
||||
return
|
||||
|
||||
saved_path = self._save_vision_frame(session, image)
|
||||
if saved_path is None:
|
||||
return
|
||||
print(f"已保存 vision frame: {saved_path}")
|
||||
|
||||
participant = getattr(session.room, "local_participant", None)
|
||||
if participant is None:
|
||||
print("跳过发送 vision frame,local participant 尚未就绪")
|
||||
return
|
||||
|
||||
payload = {
|
||||
"type": "vision_frame",
|
||||
"topic": VISION_FRAME_TOPIC,
|
||||
"room": session.room_name,
|
||||
"identity": session.identity,
|
||||
"device_id": session.device_id,
|
||||
"mime_type": message.get("mime_type", "image/jpeg"),
|
||||
"image": image,
|
||||
"saved_path": str(saved_path),
|
||||
}
|
||||
data = json.dumps(payload).encode("utf-8")
|
||||
agent_identities = self._get_agent_identities(session)
|
||||
kwargs: dict[str, Any] = {}
|
||||
if agent_identities:
|
||||
kwargs["destination_identities"] = agent_identities
|
||||
|
||||
last_error: Optional[Exception] = None
|
||||
for attempt in ({"topic": VISION_FRAME_TOPIC, **kwargs}, kwargs):
|
||||
try:
|
||||
await participant.publish_data(data, **attempt)
|
||||
print(
|
||||
f"已发送 vision frame: bytes={len(data)} "
|
||||
f"targets={agent_identities or 'broadcast'}"
|
||||
)
|
||||
return
|
||||
except TypeError as exc:
|
||||
last_error = exc
|
||||
except Exception as exc:
|
||||
print(f"发送 vision frame 失败: {exc}")
|
||||
return
|
||||
|
||||
if last_error is not None:
|
||||
print(f"发送 vision frame 失败,publish_data 签名不兼容: {last_error}")
|
||||
|
||||
async def _send_tts_state(self, session: DeviceSession, state: str) -> None:
|
||||
if session.websocket is None:
|
||||
print(f"跳过 tts {state},ESP32 尚未连接")
|
||||
@ -939,14 +1011,12 @@ class ESP32LiveKitBridge:
|
||||
)
|
||||
self.device_sessions[device_id] = session
|
||||
|
||||
# print(f"ESP32 已连接: device={device_id}")
|
||||
# print(f"ESP32 协议版本: {session.protocol_version}")
|
||||
print(f"ESP32 已连接: device={device_id}")
|
||||
print(f"ESP32 协议版本: {session.protocol_version}")
|
||||
session.tts_stream_id += 1
|
||||
opus_decoder = None
|
||||
|
||||
try:
|
||||
await self._connect_session_room(session)
|
||||
|
||||
hello_msg = {
|
||||
"type": "hello",
|
||||
"transport": "websocket",
|
||||
@ -962,6 +1032,9 @@ class ESP32LiveKitBridge:
|
||||
},
|
||||
}
|
||||
await websocket.send(json.dumps(hello_msg))
|
||||
print(f"已发送 server hello: device={device_id} room={session.room_name}")
|
||||
|
||||
await self._connect_session_room(session)
|
||||
|
||||
async for message in websocket:
|
||||
if isinstance(message, bytes):
|
||||
@ -1017,6 +1090,8 @@ class ESP32LiveKitBridge:
|
||||
abort_reason = reason if isinstance(reason, str) and reason else "button_abort"
|
||||
print(f"处理 ESP32 打断请求: reason={abort_reason}")
|
||||
await self._abort_tts(session, abort_reason)
|
||||
elif msg_type == "vision" and data.get("state") == "frame":
|
||||
await self._publish_vision_frame(session, data)
|
||||
except json.JSONDecodeError:
|
||||
print(f"收到未知的字符消息: {message}")
|
||||
except ConnectionClosedError as exc:
|
||||
|
||||
Reference in New Issue
Block a user