fix: voice interrupt
This commit is contained in:
@ -49,6 +49,7 @@ TTS_DISPLAY_SENTENCE_BREAKS = "。!?!?;;"
|
||||
TTS_DISPLAY_SCROLL_WIDTH = int(os.getenv("TTS_DISPLAY_SCROLL_WIDTH", "18"))
|
||||
TTS_DISPLAY_SCROLL_INTERVAL_SECONDS = float(os.getenv("TTS_DISPLAY_SCROLL_INTERVAL_SECONDS", "0.18"))
|
||||
TTS_DISPLAY_SCROLL_GAP = " "
|
||||
TTS_INTERRUPT_SUPPRESS_SECONDS = float(os.getenv("TTS_INTERRUPT_SUPPRESS_SECONDS", "0.8"))
|
||||
|
||||
|
||||
@dataclass
|
||||
@ -69,6 +70,7 @@ class DeviceSession:
|
||||
tts_transcript_text: str = ""
|
||||
tts_display_text: str = ""
|
||||
tts_display_final: bool = False
|
||||
tts_suppressed_until: float = 0.0
|
||||
agent_dispatch_task: Optional[asyncio.Task] = None
|
||||
closed: bool = False
|
||||
captured_frame_count: int = 0
|
||||
@ -431,15 +433,24 @@ class ESP32LiveKitBridge:
|
||||
session.tts_display_text = ""
|
||||
session.tts_display_final = False
|
||||
|
||||
async def _abort_tts(self, session: DeviceSession, reason: str = "client_abort") -> None:
|
||||
print(f"收到打断请求,停止当前 TTS: device={session.device_id} reason={reason}")
|
||||
session.tts_stream_id += 1
|
||||
async def _force_stop_tts(self, session: DeviceSession, reason: str) -> None:
|
||||
self._cancel_tts_display_task(session)
|
||||
if session.tts_idle_task is not None:
|
||||
session.tts_idle_task.cancel()
|
||||
session.tts_idle_task = None
|
||||
self._cancel_tts_display_task(session)
|
||||
await self._send_agent_interrupt(session, reason)
|
||||
await self._stop_tts(session)
|
||||
session.tts_active = False
|
||||
session.tts_transcript_text = ""
|
||||
session.tts_display_text = ""
|
||||
session.tts_display_final = False
|
||||
await self._send_tts_state(session, "stop")
|
||||
print(f"已强制停止本地 TTS: device={session.device_id} reason={reason}")
|
||||
|
||||
async def _abort_tts(self, session: DeviceSession, reason: str = "client_abort") -> None:
|
||||
print(f"收到打断请求,停止当前 TTS: device={session.device_id} reason={reason}")
|
||||
session.tts_stream_id += 1
|
||||
session.tts_suppressed_until = time.monotonic() + TTS_INTERRUPT_SUPPRESS_SECONDS
|
||||
await self._force_stop_tts(session, reason)
|
||||
asyncio.create_task(self._send_agent_interrupt(session, reason))
|
||||
|
||||
def _reset_tts_idle_timer(self, session: DeviceSession) -> None:
|
||||
if session.tts_idle_task is not None:
|
||||
@ -642,6 +653,8 @@ class ESP32LiveKitBridge:
|
||||
status = "✅ 最终结果" if segment.final else "⏳ 正在思考/中间结果"
|
||||
print(f"🗣️ [{status} | room={session.room_name} | {identity}]: {segment.text}")
|
||||
if is_agent:
|
||||
if time.monotonic() < session.tts_suppressed_until:
|
||||
continue
|
||||
display_text = self._current_tts_display_text(segment.text)
|
||||
if not display_text or display_text == session.tts_transcript_text:
|
||||
continue
|
||||
@ -799,7 +812,9 @@ class ESP32LiveKitBridge:
|
||||
pre_roll_pcm.clear()
|
||||
audible_frame_streak = 0
|
||||
silence_frame_streak = 0
|
||||
waiting_for_post_interrupt_silence = True
|
||||
waiting_for_post_interrupt_silence = (
|
||||
time.monotonic() >= session.tts_suppressed_until
|
||||
)
|
||||
stream_id = session.tts_stream_id
|
||||
if session.tts_active:
|
||||
await self._stop_tts(session)
|
||||
@ -809,6 +824,14 @@ class ESP32LiveKitBridge:
|
||||
pcm_data = frame.data.tobytes()
|
||||
has_audible_audio = self._has_audible_audio(pcm_data)
|
||||
|
||||
if time.monotonic() < session.tts_suppressed_until:
|
||||
pending_pcm.clear()
|
||||
pre_roll_pcm.clear()
|
||||
audible_frame_streak = 0
|
||||
silence_frame_streak = 0
|
||||
waiting_for_post_interrupt_silence = False
|
||||
continue
|
||||
|
||||
if waiting_for_post_interrupt_silence:
|
||||
if has_audible_audio:
|
||||
silence_frame_streak = 0
|
||||
|
||||
Reference in New Issue
Block a user