Files
xiaozhi-esp32/main/bridge_server_bak.py
2026-04-27 10:39:21 +08:00

276 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import asyncio
import websockets
import os
import sys
import httpx
import json
import time
import queue
import threading
from typing import Any, Optional
from livekit import rtc
from livekit.rtc import AudioSource, AudioFrame
from websockets.exceptions import ConnectionClosedError
import http.server
import multipart
from urllib.parse import parse_qs
# 配置信息
# TOKEN_URL = "http://10.6.80.130:8000/v1/token"
# LIVEKIT_WS_URL = "ws://10.6.80.130:8000/"
# ROOM = "vera-room"
# IDENTITY = "vera-1"
# TOKEN_URL = "https://omnichat.bwgdi.com/v1/token"
TOKEN_URL = "http://10.6.80.130:8000/getToken"
LIVEKIT_WS_URL = "wss://test-b2zm4kva.livekit.cloud"
# LIVEKIT_WS_URL = "wss://rtc.bwgdi.com/"
ROOM = "test-livekit-room2"
IDENTITY = "uv-livekit-hardcoded"
import uuid
# IDENTITY = f"uv-{uuid.uuid4().hex[:6]}"
CONNECT_TIMEOUT_SECONDS = 10.0
WS_PORT = 8080
SAMPLE_RATE = 16000
async def fetch_token() -> str:
async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as client:
response = await client.get(
TOKEN_URL,
params={"room": ROOM, "identity": IDENTITY, "agent_name": "my-agent"},
)
response.raise_for_status()
payload: dict[str, Any] = response.json()
token = payload.get("token")
if not isinstance(token, str) or not token:
raise ValueError(f"token response missing token field: {payload}")
print(f"[token] room={payload.get('room')} identity={payload.get('identity')}")
print(f"[token] jwt_prefix={token[:16]}... len={len(token)}")
print(f"[token] jwt_prefix={token}")
return token
class ESP32LiveKitBridge:
def __init__(self):
self.room = rtc.Room()
# 创建一个音频源,用于将 ESP32 的声音推送到 LiveKit
# 注意:采样率需与 ESP32 发送的一致,通常是 16000 或 24000
self.mic_source = AudioSource(sample_rate=SAMPLE_RATE, num_channels=1)
self.esp_ws = None # 保存 WebSocket 连接
self.audio_queue = queue.Queue()
self.wav_writer_thread: Optional[threading.Thread] = None
self.stop_event = threading.Event()
def _wav_writer_loop(self):
import wave
print("启动音频保存线程...")
try:
with wave.open("bridge_debug.wav", "wb") as wav_file:
wav_file.setnchannels(1)
wav_file.setsampwidth(2) # 16-bit
wav_file.setframerate(SAMPLE_RATE)
while not self.stop_event.is_set() or not self.audio_queue.empty():
try:
# 使用 timeout 避免永久阻塞,以便检查 stop_event
pcm_bytes = self.audio_queue.get(timeout=0.5)
wav_file.writeframes(pcm_bytes)
except queue.Empty:
continue
except Exception as e:
print(f"音频保存线程错误: {e}")
finally:
print("音频保存线程退出")
async def start(self):
@self.room.on("connection_state_changed")
def on_connection_state_changed(state: int) -> None:
print(f"[livekit] state={rtc.ConnectionState.Name(state)}")
# 1. 获取 Token 并连接 LiveKit
print(f"[config] livekit_ws_url={LIVEKIT_WS_URL}")
print(f"[config] token_url={TOKEN_URL}")
print(f"[config] room={ROOM} identity={IDENTITY}")
token = await fetch_token()
await asyncio.wait_for(
self.room.connect(
LIVEKIT_WS_URL,
token,
options=rtc.RoomOptions(connect_timeout=CONNECT_TIMEOUT_SECONDS),
),
timeout=CONNECT_TIMEOUT_SECONDS + 2.0,
)
print(f"已连接到 LiveKit 房间: {self.room.name}")
print(f"[livekit] local_identity={self.room.local_participant.identity}")
print(f"[livekit] local_sid={self.room.local_participant.sid}")
# 2. 发布麦克风轨道 (ESP32 -> LiveKit)
track = rtc.LocalAudioTrack.create_audio_track("esp32-mic", self.mic_source)
options = rtc.TrackPublishOptions(source=rtc.TrackSource.SOURCE_MICROPHONE)
await self.room.local_participant.publish_track(track, options)
# 3. 监听房间内的音频 (LiveKit -> ESP32)
# 当房间里有其他人(比如 AI Agent说话时触发此回调
@self.room.on("track_subscribed")
def on_track_subscribed(track, publication, participant):
if track.kind == rtc.TrackKind.KIND_AUDIO:
print(f"收到音频流: {participant.identity}")
# 启动一个任务来接收音频流并转发给 ESP32明确指定采样率为 16000Hz 以进行自动重采样
asyncio.create_task(self.forward_audio_to_esp32(rtc.AudioStream(track, sample_rate=SAMPLE_RATE, num_channels=1)))
self.agent_ready = asyncio.Event()
@self.room.on("participant_connected")
def on_participant_connected(p):
print(f"👤 participant joined: {p.identity}")
if "agent" in p.identity:
self.agent_ready.set()
print("等待 agent 加入...")
try:
await asyncio.wait_for(self.agent_ready.wait(), timeout=10)
print("✅ agent 已加入")
except asyncio.TimeoutError:
print("⚠️ agent 未加入(后续可能收不到音频)")
async def close(self):
"""优雅关闭所有连接和资源"""
self.stop_event.set()
if self.room:
await self.room.disconnect()
async def forward_audio_to_esp32(self, audio_stream):
"""从 LiveKit 接收音频,通过 WebSocket 发回给 ESP32"""
import opuslib
import json
# 创建下行 Opus 编码器
encoder = opuslib.Encoder(SAMPLE_RATE, 1, 'voip')
# 1. 告知 ESP32 开始说话,切换 UI 到“说话中”并准备解码
if self.esp_ws:
await self.esp_ws.send(json.dumps({"type": "tts", "state": "start"}))
try:
async for event in audio_stream:
if self.esp_ws:
try:
# AudioStream 迭代产生的是 AudioFrameEvent需要从中提取 frame
frame = event.frame
# 将 PCM 编码为 Opus 才能发给 ESP32
pcm_data = frame.data.tobytes()
# 使用当前帧的实际采样数进行编码
opus_packet = encoder.encode(pcm_data, frame.samples_per_channel)
await self.esp_ws.send(opus_packet)
except Exception as e:
print(f"发送回 ESP32 失败: {e}")
finally:
# 2. 音频流结束,告知 ESP32 停止说话,切换回聆听或闲置状态
if self.esp_ws:
await self.esp_ws.send(json.dumps({"type": "tts", "state": "stop"}))
async def handle_websocket(self, websocket):
"""处理来自 ESP32 的 WebSocket 连接"""
self.esp_ws = websocket
print("ESP32 已连接")
opus_decoder = None
try:
# 发送 hello 告诉 ESP32 握手成功
hello_msg = {
"type": "hello",
"transport": "websocket",
"audio_params": {
"format": "opus", # 明确要求 ESP32 发送 Opus
"sample_rate": SAMPLE_RATE,
"channels": 1,
"frame_duration": 60
}
}
import json
await websocket.send(json.dumps(hello_msg))
async for message in websocket:
# 接收 ESP32 的数据 -> 推送到 LiveKit
if isinstance(message, bytes):
# 判断如果消息长度极其短并且不是合理的音频流可能是ping包等
if len(message) < 4:
print(f"收到过短的字节消息 ({len(message)} bytes),跳过")
continue
# ESP32 默认使用 websocket_protocol version=1 (见 websocket_protocol.cc)
# 这个版本下,没有 4 字节的 header接收到的就是原生的 Opus 数据帧。
# 直接丢给 opuslib 解码即可。
audio_data = message
print(f"收到音频包长度: {len(message)}")
if audio_data:
try:
# Create Opus decoder if not exists
if opus_decoder is None:
import opuslib
print(f"初始化 Opus 解码器: {SAMPLE_RATE}Hz, mono")
opus_decoder = opuslib.Decoder(SAMPLE_RATE, 1)
# 启动音频保存线程
self.stop_event.clear()
thread = threading.Thread(target=self._wav_writer_loop, daemon=True)
self.wav_writer_thread = thread
thread.start()
# Decode Opus packet.
# Frame size for 60ms is SAMPLE_RATE * 0.06
frame_size = int(SAMPLE_RATE * 0.06)
pcm_bytes = opus_decoder.decode(audio_data, frame_size)
# 将音频数据放入队列由后台线程保存
self.audio_queue.put(pcm_bytes)
num_samples = len(pcm_bytes) // 2
if num_samples > 0:
frame = AudioFrame.create(sample_rate=SAMPLE_RATE, num_channels=1, samples_per_channel=num_samples)
# Use memoryview to safely copy bytes into the frame data
memoryview(frame.data).cast('B')[:] = pcm_bytes
# 将 capture_frame 放入当前事件循环的任务中
await self.mic_source.capture_frame(frame)
except Exception as e:
print(f"Opus audio decode error ({len(message)} bytes): {e}")
elif isinstance(message, str):
import json
try:
data = json.loads(message)
print(f"收到 ESP32 JSON 消息: {data}")
except json.JSONDecodeError:
print(f"收到未知的字符消息: {message}")
except ConnectionClosedError as e:
print(f"ESP32 异常断开: {e}")
except Exception as e:
print(f"WebSocket 其他错误: {e}")
finally:
print("ESP32 断开连接")
self.esp_ws = None
if hasattr(self, "wav_writer_thread") and self.wav_writer_thread:
self.stop_event.set()
# 我们不一定需要 join因为是 daemon=True
# 但这里设置 stop_event 会让线程在完成队列后退出
async def main():
bridge = ESP32LiveKitBridge()
try:
await bridge.start()
# 启动 WebSocket 服务器
async with websockets.serve(bridge.handle_websocket, "0.0.0.0", WS_PORT):
print(f"WebSocket 服务器运行在端口 {WS_PORT},等待 ESP32 连接...")
await asyncio.Future() # 保持运行
finally:
await bridge.close()
if __name__ == "__main__":
try:
asyncio.run(main())
except Exception as exc:
print(f"[error] {exc}", file=sys.stderr)
sys.exit(1)