fix: voice interrupt

This commit is contained in:
0Xiao0
2026-05-22 10:10:16 +08:00
parent 61ad9dafd9
commit 5223333418

View File

@ -49,6 +49,7 @@ TTS_DISPLAY_SENTENCE_BREAKS = "。!?!?;"
TTS_DISPLAY_SCROLL_WIDTH = int(os.getenv("TTS_DISPLAY_SCROLL_WIDTH", "18"))
TTS_DISPLAY_SCROLL_INTERVAL_SECONDS = float(os.getenv("TTS_DISPLAY_SCROLL_INTERVAL_SECONDS", "0.18"))
TTS_DISPLAY_SCROLL_GAP = " "
TTS_INTERRUPT_SUPPRESS_SECONDS = float(os.getenv("TTS_INTERRUPT_SUPPRESS_SECONDS", "0.8"))
@dataclass
@ -69,6 +70,7 @@ class DeviceSession:
tts_transcript_text: str = ""
tts_display_text: str = ""
tts_display_final: bool = False
tts_suppressed_until: float = 0.0
agent_dispatch_task: Optional[asyncio.Task] = None
closed: bool = False
captured_frame_count: int = 0
@ -431,15 +433,24 @@ class ESP32LiveKitBridge:
session.tts_display_text = ""
session.tts_display_final = False
async def _abort_tts(self, session: DeviceSession, reason: str = "client_abort") -> None:
print(f"收到打断请求,停止当前 TTS: device={session.device_id} reason={reason}")
session.tts_stream_id += 1
async def _force_stop_tts(self, session: DeviceSession, reason: str) -> None:
self._cancel_tts_display_task(session)
if session.tts_idle_task is not None:
session.tts_idle_task.cancel()
session.tts_idle_task = None
self._cancel_tts_display_task(session)
await self._send_agent_interrupt(session, reason)
await self._stop_tts(session)
session.tts_active = False
session.tts_transcript_text = ""
session.tts_display_text = ""
session.tts_display_final = False
await self._send_tts_state(session, "stop")
print(f"已强制停止本地 TTS: device={session.device_id} reason={reason}")
async def _abort_tts(self, session: DeviceSession, reason: str = "client_abort") -> None:
print(f"收到打断请求,停止当前 TTS: device={session.device_id} reason={reason}")
session.tts_stream_id += 1
session.tts_suppressed_until = time.monotonic() + TTS_INTERRUPT_SUPPRESS_SECONDS
await self._force_stop_tts(session, reason)
asyncio.create_task(self._send_agent_interrupt(session, reason))
def _reset_tts_idle_timer(self, session: DeviceSession) -> None:
if session.tts_idle_task is not None:
@ -642,6 +653,8 @@ class ESP32LiveKitBridge:
status = "✅ 最终结果" if segment.final else "⏳ 正在思考/中间结果"
print(f"🗣️ [{status} | room={session.room_name} | {identity}]: {segment.text}")
if is_agent:
if time.monotonic() < session.tts_suppressed_until:
continue
display_text = self._current_tts_display_text(segment.text)
if not display_text or display_text == session.tts_transcript_text:
continue
@ -799,7 +812,9 @@ class ESP32LiveKitBridge:
pre_roll_pcm.clear()
audible_frame_streak = 0
silence_frame_streak = 0
waiting_for_post_interrupt_silence = True
waiting_for_post_interrupt_silence = (
time.monotonic() >= session.tts_suppressed_until
)
stream_id = session.tts_stream_id
if session.tts_active:
await self._stop_tts(session)
@ -809,6 +824,14 @@ class ESP32LiveKitBridge:
pcm_data = frame.data.tobytes()
has_audible_audio = self._has_audible_audio(pcm_data)
if time.monotonic() < session.tts_suppressed_until:
pending_pcm.clear()
pre_roll_pcm.clear()
audible_frame_streak = 0
silence_frame_streak = 0
waiting_for_post_interrupt_silence = False
continue
if waiting_for_post_interrupt_silence:
if has_audible_audio:
silence_frame_streak = 0