Compare commits
2 Commits
61ad9dafd9
...
ws
| Author | SHA1 | Date | |
|---|---|---|---|
| 4953244c7c | |||
| 5223333418 |
@ -49,6 +49,7 @@ TTS_DISPLAY_SENTENCE_BREAKS = "。!?!?;;"
|
|||||||
TTS_DISPLAY_SCROLL_WIDTH = int(os.getenv("TTS_DISPLAY_SCROLL_WIDTH", "18"))
|
TTS_DISPLAY_SCROLL_WIDTH = int(os.getenv("TTS_DISPLAY_SCROLL_WIDTH", "18"))
|
||||||
TTS_DISPLAY_SCROLL_INTERVAL_SECONDS = float(os.getenv("TTS_DISPLAY_SCROLL_INTERVAL_SECONDS", "0.18"))
|
TTS_DISPLAY_SCROLL_INTERVAL_SECONDS = float(os.getenv("TTS_DISPLAY_SCROLL_INTERVAL_SECONDS", "0.18"))
|
||||||
TTS_DISPLAY_SCROLL_GAP = " "
|
TTS_DISPLAY_SCROLL_GAP = " "
|
||||||
|
TTS_INTERRUPT_SUPPRESS_SECONDS = float(os.getenv("TTS_INTERRUPT_SUPPRESS_SECONDS", "0.8"))
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@ -69,6 +70,7 @@ class DeviceSession:
|
|||||||
tts_transcript_text: str = ""
|
tts_transcript_text: str = ""
|
||||||
tts_display_text: str = ""
|
tts_display_text: str = ""
|
||||||
tts_display_final: bool = False
|
tts_display_final: bool = False
|
||||||
|
tts_suppressed_until: float = 0.0
|
||||||
agent_dispatch_task: Optional[asyncio.Task] = None
|
agent_dispatch_task: Optional[asyncio.Task] = None
|
||||||
closed: bool = False
|
closed: bool = False
|
||||||
captured_frame_count: int = 0
|
captured_frame_count: int = 0
|
||||||
@ -413,6 +415,9 @@ class ESP32LiveKitBridge:
|
|||||||
if session.tts_active:
|
if session.tts_active:
|
||||||
print("跳过 tts start,当前已处于激活状态")
|
print("跳过 tts start,当前已处于激活状态")
|
||||||
return
|
return
|
||||||
|
if time.monotonic() < session.tts_suppressed_until:
|
||||||
|
print("跳过 tts start,中断后的残留音频仍在抑制窗口内")
|
||||||
|
return
|
||||||
if not session.tts_display_text:
|
if not session.tts_display_text:
|
||||||
session.tts_transcript_text = ""
|
session.tts_transcript_text = ""
|
||||||
session.tts_display_final = False
|
session.tts_display_final = False
|
||||||
@ -431,15 +436,24 @@ class ESP32LiveKitBridge:
|
|||||||
session.tts_display_text = ""
|
session.tts_display_text = ""
|
||||||
session.tts_display_final = False
|
session.tts_display_final = False
|
||||||
|
|
||||||
async def _abort_tts(self, session: DeviceSession, reason: str = "client_abort") -> None:
|
async def _force_stop_tts(self, session: DeviceSession, reason: str) -> None:
|
||||||
print(f"收到打断请求,停止当前 TTS: device={session.device_id} reason={reason}")
|
self._cancel_tts_display_task(session)
|
||||||
session.tts_stream_id += 1
|
|
||||||
if session.tts_idle_task is not None:
|
if session.tts_idle_task is not None:
|
||||||
session.tts_idle_task.cancel()
|
session.tts_idle_task.cancel()
|
||||||
session.tts_idle_task = None
|
session.tts_idle_task = None
|
||||||
self._cancel_tts_display_task(session)
|
session.tts_active = False
|
||||||
await self._send_agent_interrupt(session, reason)
|
session.tts_transcript_text = ""
|
||||||
await self._stop_tts(session)
|
session.tts_display_text = ""
|
||||||
|
session.tts_display_final = False
|
||||||
|
await self._send_tts_state(session, "stop")
|
||||||
|
print(f"已强制停止本地 TTS: device={session.device_id} reason={reason}")
|
||||||
|
|
||||||
|
async def _abort_tts(self, session: DeviceSession, reason: str = "client_abort") -> None:
|
||||||
|
print(f"收到打断请求,停止当前 TTS: device={session.device_id} reason={reason}")
|
||||||
|
session.tts_stream_id += 1
|
||||||
|
session.tts_suppressed_until = time.monotonic() + TTS_INTERRUPT_SUPPRESS_SECONDS
|
||||||
|
await self._force_stop_tts(session, reason)
|
||||||
|
asyncio.create_task(self._send_agent_interrupt(session, reason))
|
||||||
|
|
||||||
def _reset_tts_idle_timer(self, session: DeviceSession) -> None:
|
def _reset_tts_idle_timer(self, session: DeviceSession) -> None:
|
||||||
if session.tts_idle_task is not None:
|
if session.tts_idle_task is not None:
|
||||||
@ -642,6 +656,8 @@ class ESP32LiveKitBridge:
|
|||||||
status = "✅ 最终结果" if segment.final else "⏳ 正在思考/中间结果"
|
status = "✅ 最终结果" if segment.final else "⏳ 正在思考/中间结果"
|
||||||
print(f"🗣️ [{status} | room={session.room_name} | {identity}]: {segment.text}")
|
print(f"🗣️ [{status} | room={session.room_name} | {identity}]: {segment.text}")
|
||||||
if is_agent:
|
if is_agent:
|
||||||
|
if time.monotonic() < session.tts_suppressed_until:
|
||||||
|
continue
|
||||||
display_text = self._current_tts_display_text(segment.text)
|
display_text = self._current_tts_display_text(segment.text)
|
||||||
if not display_text or display_text == session.tts_transcript_text:
|
if not display_text or display_text == session.tts_transcript_text:
|
||||||
continue
|
continue
|
||||||
@ -809,6 +825,14 @@ class ESP32LiveKitBridge:
|
|||||||
pcm_data = frame.data.tobytes()
|
pcm_data = frame.data.tobytes()
|
||||||
has_audible_audio = self._has_audible_audio(pcm_data)
|
has_audible_audio = self._has_audible_audio(pcm_data)
|
||||||
|
|
||||||
|
if time.monotonic() < session.tts_suppressed_until:
|
||||||
|
pending_pcm.clear()
|
||||||
|
pre_roll_pcm.clear()
|
||||||
|
audible_frame_streak = 0
|
||||||
|
silence_frame_streak = 0
|
||||||
|
waiting_for_post_interrupt_silence = True
|
||||||
|
continue
|
||||||
|
|
||||||
if waiting_for_post_interrupt_silence:
|
if waiting_for_post_interrupt_silence:
|
||||||
if has_audible_audio:
|
if has_audible_audio:
|
||||||
silence_frame_streak = 0
|
silence_frame_streak = 0
|
||||||
|
|||||||
Reference in New Issue
Block a user