fix: voice interupt

This commit is contained in:
0Xiao0
2026-06-12 11:38:47 +08:00
parent 9637e09aef
commit 2c4329fd84
22 changed files with 816 additions and 240 deletions

1
.gitignore vendored
View File

@ -19,3 +19,4 @@ main/mmap_generate_emoji.h
*.bin
mmap_generate_*.h
.clangd
background_frames/

View File

@ -1,25 +1,24 @@
#include "application.h"
#include "assets.h"
#include "assets/lang_config.h"
#include "audio_codec.h"
#include "board.h"
#include "display.h"
#include "system_info.h"
#include "audio_codec.h"
#include "mqtt_protocol.h"
#include "websocket_protocol.h"
#include "assets/lang_config.h"
#include "mcp_server.h"
#include "assets.h"
#include "mqtt_protocol.h"
#include "settings.h"
#include "system_info.h"
#include "websocket_protocol.h"
#include <cstring>
#include <esp_log.h>
#include <cJSON.h>
#include <driver/gpio.h>
#include <esp_log.h>
#include <arpa/inet.h>
#include <cJSON.h>
#include <font_awesome.h>
#include <cstring>
#define TAG "Application"
Application::Application() {
event_group_ = xEventGroupCreate();
@ -33,16 +32,16 @@ Application::Application() {
aec_mode_ = kAecOff;
#endif
esp_timer_create_args_t clock_timer_args = {
.callback = [](void* arg) {
Application* app = (Application*)arg;
xEventGroupSetBits(app->event_group_, MAIN_EVENT_CLOCK_TICK);
},
.arg = this,
.dispatch_method = ESP_TIMER_TASK,
.name = "clock_timer",
.skip_unhandled_events = true
};
esp_timer_create_args_t clock_timer_args = {.callback =
[](void* arg) {
Application* app = (Application*)arg;
xEventGroupSetBits(app->event_group_,
MAIN_EVENT_CLOCK_TICK);
},
.arg = this,
.dispatch_method = ESP_TIMER_TASK,
.name = "clock_timer",
.skip_unhandled_events = true};
esp_timer_create(&clock_timer_args, &clock_timer_handle_);
}
@ -54,9 +53,7 @@ Application::~Application() {
vEventGroupDelete(event_group_);
}
bool Application::SetDeviceState(DeviceState state) {
return state_machine_.TransitionTo(state);
}
bool Application::SetDeviceState(DeviceState state) { return state_machine_.TransitionTo(state); }
void Application::Initialize() {
auto& board = Board::GetInstance();
@ -142,13 +139,16 @@ void Application::Initialize() {
display->SetStatus(Lang::Strings::DETECTING_MODULE);
break;
case NetworkEvent::ModemErrorNoSim:
Alert(Lang::Strings::ERROR, Lang::Strings::PIN_ERROR, "triangle_exclamation", Lang::Sounds::OGG_ERR_PIN);
Alert(Lang::Strings::ERROR, Lang::Strings::PIN_ERROR, "triangle_exclamation",
Lang::Sounds::OGG_ERR_PIN);
break;
case NetworkEvent::ModemErrorRegDenied:
Alert(Lang::Strings::ERROR, Lang::Strings::REG_ERROR, "triangle_exclamation", Lang::Sounds::OGG_ERR_REG);
Alert(Lang::Strings::ERROR, Lang::Strings::REG_ERROR, "triangle_exclamation",
Lang::Sounds::OGG_ERR_REG);
break;
case NetworkEvent::ModemErrorInitFailed:
Alert(Lang::Strings::ERROR, Lang::Strings::MODEM_INIT_ERROR, "triangle_exclamation", Lang::Sounds::OGG_EXCLAMATION);
Alert(Lang::Strings::ERROR, Lang::Strings::MODEM_INIT_ERROR, "triangle_exclamation",
Lang::Sounds::OGG_EXCLAMATION);
break;
case NetworkEvent::ModemErrorTimeout:
display->SetStatus(Lang::Strings::REGISTERING_NETWORK);
@ -168,18 +168,10 @@ void Application::Run() {
vTaskPrioritySet(nullptr, 10);
const EventBits_t ALL_EVENTS =
MAIN_EVENT_SCHEDULE |
MAIN_EVENT_SEND_AUDIO |
MAIN_EVENT_WAKE_WORD_DETECTED |
MAIN_EVENT_VAD_CHANGE |
MAIN_EVENT_CLOCK_TICK |
MAIN_EVENT_ERROR |
MAIN_EVENT_NETWORK_CONNECTED |
MAIN_EVENT_NETWORK_DISCONNECTED |
MAIN_EVENT_TOGGLE_CHAT |
MAIN_EVENT_START_LISTENING |
MAIN_EVENT_STOP_LISTENING |
MAIN_EVENT_ACTIVATION_DONE |
MAIN_EVENT_SCHEDULE | MAIN_EVENT_SEND_AUDIO | MAIN_EVENT_WAKE_WORD_DETECTED |
MAIN_EVENT_VAD_CHANGE | MAIN_EVENT_CLOCK_TICK | MAIN_EVENT_ERROR |
MAIN_EVENT_NETWORK_CONNECTED | MAIN_EVENT_NETWORK_DISCONNECTED | MAIN_EVENT_TOGGLE_CHAT |
MAIN_EVENT_START_LISTENING | MAIN_EVENT_STOP_LISTENING | MAIN_EVENT_ACTIVATION_DONE |
MAIN_EVENT_STATE_CHANGED;
while (true) {
@ -187,7 +179,8 @@ void Application::Run() {
if (bits & MAIN_EVENT_ERROR) {
SetDeviceState(kDeviceStateIdle);
Alert(Lang::Strings::ERROR, last_error_message_.c_str(), "circle_xmark", Lang::Sounds::OGG_EXCLAMATION);
Alert(Lang::Strings::ERROR, last_error_message_.c_str(), "circle_xmark",
Lang::Sounds::OGG_EXCLAMATION);
}
if (bits & MAIN_EVENT_NETWORK_CONNECTED) {
@ -278,12 +271,14 @@ void Application::HandleNetworkConnectedEvent() {
return;
}
xTaskCreate([](void* arg) {
Application* app = static_cast<Application*>(arg);
app->ActivationTask();
app->activation_task_handle_ = nullptr;
vTaskDelete(NULL);
}, "activation", 4096 * 2, this, 2, &activation_task_handle_);
xTaskCreate(
[](void* arg) {
Application* app = static_cast<Application*>(arg);
app->ActivationTask();
app->activation_task_handle_ = nullptr;
vTaskDelete(NULL);
},
"activation", 4096 * 2, this, 2, &activation_task_handle_);
}
// Update the status bar immediately to show the network state
@ -294,7 +289,8 @@ void Application::HandleNetworkConnectedEvent() {
void Application::HandleNetworkDisconnectedEvent() {
// Close current conversation when network disconnected
auto state = GetDeviceState();
if (state == kDeviceStateConnecting || state == kDeviceStateListening || state == kDeviceStateSpeaking) {
if (state == kDeviceStateConnecting || state == kDeviceStateListening ||
state == kDeviceStateThinking || state == kDeviceStateSpeaking) {
ESP_LOGI(TAG, "Closing audio channel due to network disconnection");
protocol_->CloseAudioChannel();
}
@ -379,7 +375,8 @@ void Application::CheckAssetsVersion() {
char message[256];
snprintf(message, sizeof(message), Lang::Strings::FOUND_NEW_ASSETS, download_url.c_str());
Alert(Lang::Strings::LOADING_ASSETS, message, "cloud_arrow_down", Lang::Sounds::OGG_UPGRADE);
Alert(Lang::Strings::LOADING_ASSETS, message, "cloud_arrow_down",
Lang::Sounds::OGG_UPGRADE);
// Wait for the audio service to be idle for 3 seconds
vTaskDelay(pdMS_TO_TICKS(3000));
@ -387,19 +384,21 @@ void Application::CheckAssetsVersion() {
board.SetPowerSaveLevel(PowerSaveLevel::PERFORMANCE);
display->SetChatMessage("system", Lang::Strings::PLEASE_WAIT);
bool success = assets.Download(download_url, [this, display](int progress, size_t speed) -> void {
char buffer[32];
snprintf(buffer, sizeof(buffer), "%d%% %uKB/s", progress, speed / 1024);
Schedule([display, message = std::string(buffer)]() {
display->SetChatMessage("system", message.c_str());
bool success =
assets.Download(download_url, [this, display](int progress, size_t speed) -> void {
char buffer[32];
snprintf(buffer, sizeof(buffer), "%d%% %uKB/s", progress, speed / 1024);
Schedule([display, message = std::string(buffer)]() {
display->SetChatMessage("system", message.c_str());
});
});
});
board.SetPowerSaveLevel(PowerSaveLevel::LOW_POWER);
vTaskDelay(pdMS_TO_TICKS(1000));
if (!success) {
Alert(Lang::Strings::ERROR, Lang::Strings::DOWNLOAD_ASSETS_FAILED, "circle_xmark", Lang::Sounds::OGG_EXCLAMATION);
Alert(Lang::Strings::ERROR, Lang::Strings::DOWNLOAD_ASSETS_FAILED, "circle_xmark",
Lang::Sounds::OGG_EXCLAMATION);
vTaskDelay(pdMS_TO_TICKS(2000));
SetDeviceState(kDeviceStateActivating);
return;
@ -415,7 +414,7 @@ void Application::CheckAssetsVersion() {
void Application::CheckNewVersion() {
const int MAX_RETRY = 10;
int retry_count = 0;
int retry_delay = 10; // Initial retry delay in seconds
int retry_delay = 10; // Initial retry delay in seconds
auto& board = Board::GetInstance();
while (true) {
@ -431,27 +430,30 @@ void Application::CheckNewVersion() {
}
char error_message[128];
snprintf(error_message, sizeof(error_message), "code=%d, url=%s", err, ota_->GetCheckVersionUrl().c_str());
snprintf(error_message, sizeof(error_message), "code=%d, url=%s", err,
ota_->GetCheckVersionUrl().c_str());
char buffer[256];
snprintf(buffer, sizeof(buffer), Lang::Strings::CHECK_NEW_VERSION_FAILED, retry_delay, error_message);
snprintf(buffer, sizeof(buffer), Lang::Strings::CHECK_NEW_VERSION_FAILED, retry_delay,
error_message);
Alert(Lang::Strings::ERROR, buffer, "cloud_slash", Lang::Sounds::OGG_EXCLAMATION);
ESP_LOGW(TAG, "Check new version failed, retry in %d seconds (%d/%d)", retry_delay, retry_count, MAX_RETRY);
ESP_LOGW(TAG, "Check new version failed, retry in %d seconds (%d/%d)", retry_delay,
retry_count, MAX_RETRY);
for (int i = 0; i < retry_delay; i++) {
vTaskDelay(pdMS_TO_TICKS(1000));
if (GetDeviceState() == kDeviceStateIdle) {
break;
}
}
retry_delay *= 2; // Double the retry delay
retry_delay *= 2; // Double the retry delay
continue;
}
retry_count = 0;
retry_delay = 10; // Reset retry delay
retry_delay = 10; // Reset retry delay
if (ota_->HasNewVersion()) {
if (UpgradeFirmware(ota_->GetFirmwareUrl(), ota_->GetFirmwareVersion())) {
return; // This line will never be reached after reboot
return; // This line will never be reached after reboot
}
// If upgrade failed, continue to normal operation
}
@ -507,9 +509,7 @@ void Application::InitializeProtocol() {
}
#endif
protocol_->OnConnected([this]() {
DismissAlert();
});
protocol_->OnConnected([this]() { DismissAlert(); });
protocol_->OnNetworkError([this](const std::string& message) {
last_error_message_ = message;
@ -517,7 +517,7 @@ void Application::InitializeProtocol() {
});
protocol_->OnIncomingAudio([this](std::unique_ptr<AudioStreamPacket> packet) {
if (GetDeviceState() == kDeviceStateSpeaking) {
if (accepting_tts_audio_.load() || GetDeviceState() == kDeviceStateSpeaking) {
audio_service_.PushPacketToDecodeQueue(std::move(packet));
}
});
@ -525,13 +525,16 @@ void Application::InitializeProtocol() {
protocol_->OnAudioChannelOpened([this, codec, &board]() {
board.SetPowerSaveLevel(PowerSaveLevel::PERFORMANCE);
if (protocol_->server_sample_rate() != codec->output_sample_rate()) {
ESP_LOGW(TAG, "Server sample rate %d does not match device output sample rate %d, resampling may cause distortion",
protocol_->server_sample_rate(), codec->output_sample_rate());
ESP_LOGW(TAG,
"Server sample rate %d does not match device output sample rate %d, "
"resampling may cause distortion",
protocol_->server_sample_rate(), codec->output_sample_rate());
}
});
protocol_->OnAudioChannelClosed([this, &board]() {
board.SetPowerSaveLevel(PowerSaveLevel::LOW_POWER);
accepting_tts_audio_.store(false);
Schedule([this]() {
if (GetDeviceState() == kDeviceStateConnecting) {
return;
@ -547,14 +550,20 @@ void Application::InitializeProtocol() {
auto type = cJSON_GetObjectItem(root, "type");
if (strcmp(type->valuestring, "tts") == 0) {
auto state = cJSON_GetObjectItem(root, "state");
if (strcmp(state->valuestring, "start") == 0) {
if (strcmp(state->valuestring, "thinking") == 0) {
Schedule([this]() { SetDeviceState(kDeviceStateThinking); });
} else if (strcmp(state->valuestring, "start") == 0) {
audio_service_.ResetDecoder();
accepting_tts_audio_.store(true);
Schedule([this]() {
aborted_ = false;
SetDeviceState(kDeviceStateSpeaking);
});
} else if (strcmp(state->valuestring, "stop") == 0) {
accepting_tts_audio_.store(false);
Schedule([this]() {
if (GetDeviceState() == kDeviceStateSpeaking) {
auto state = GetDeviceState();
if (state == kDeviceStateSpeaking || state == kDeviceStateThinking) {
if (listening_mode_ == kListeningModeManualStop) {
SetDeviceState(kDeviceStateIdle);
} else {
@ -597,9 +606,7 @@ void Application::InitializeProtocol() {
ESP_LOGI(TAG, "System command: %s", command->valuestring);
if (strcmp(command->valuestring, "reboot") == 0) {
// Do a reboot if user requests a OTA update
Schedule([this]() {
Reboot();
});
Schedule([this]() { Reboot(); });
} else {
ESP_LOGW(TAG, "Unknown system command: %s", command->valuestring);
}
@ -609,7 +616,8 @@ void Application::InitializeProtocol() {
auto message = cJSON_GetObjectItem(root, "message");
auto emotion = cJSON_GetObjectItem(root, "emotion");
if (cJSON_IsString(status) && cJSON_IsString(message) && cJSON_IsString(emotion)) {
Alert(status->valuestring, message->valuestring, emotion->valuestring, Lang::Sounds::OGG_VIBRATION);
Alert(status->valuestring, message->valuestring, emotion->valuestring,
Lang::Sounds::OGG_VIBRATION);
} else {
ESP_LOGW(TAG, "Alert command requires status, message and emotion");
}
@ -618,9 +626,10 @@ void Application::InitializeProtocol() {
auto payload = cJSON_GetObjectItem(root, "payload");
ESP_LOGI(TAG, "Received custom message: %s", cJSON_PrintUnformatted(root));
if (cJSON_IsObject(payload)) {
Schedule([this, display, payload_str = std::string(cJSON_PrintUnformatted(payload))]() {
display->SetChatMessage("system", payload_str.c_str());
});
Schedule(
[this, display, payload_str = std::string(cJSON_PrintUnformatted(payload))]() {
display->SetChatMessage("system", payload_str.c_str());
});
} else {
ESP_LOGW(TAG, "Invalid custom message format: missing payload");
}
@ -638,32 +647,27 @@ void Application::ShowActivationCode(const std::string& code, const std::string&
char digit;
const std::string_view& sound;
};
static const std::array<digit_sound, 10> digit_sounds{{
digit_sound{'0', Lang::Sounds::OGG_0},
digit_sound{'1', Lang::Sounds::OGG_1},
digit_sound{'2', Lang::Sounds::OGG_2},
digit_sound{'3', Lang::Sounds::OGG_3},
digit_sound{'4', Lang::Sounds::OGG_4},
digit_sound{'5', Lang::Sounds::OGG_5},
digit_sound{'6', Lang::Sounds::OGG_6},
digit_sound{'7', Lang::Sounds::OGG_7},
digit_sound{'8', Lang::Sounds::OGG_8},
digit_sound{'9', Lang::Sounds::OGG_9}
}};
static const std::array<digit_sound, 10> digit_sounds{
{digit_sound{'0', Lang::Sounds::OGG_0}, digit_sound{'1', Lang::Sounds::OGG_1},
digit_sound{'2', Lang::Sounds::OGG_2}, digit_sound{'3', Lang::Sounds::OGG_3},
digit_sound{'4', Lang::Sounds::OGG_4}, digit_sound{'5', Lang::Sounds::OGG_5},
digit_sound{'6', Lang::Sounds::OGG_6}, digit_sound{'7', Lang::Sounds::OGG_7},
digit_sound{'8', Lang::Sounds::OGG_8}, digit_sound{'9', Lang::Sounds::OGG_9}}};
// This sentence uses 9KB of SRAM, so we need to wait for it to finish
Alert(Lang::Strings::ACTIVATION, message.c_str(), "link", Lang::Sounds::OGG_ACTIVATION);
for (const auto& digit : code) {
auto it = std::find_if(digit_sounds.begin(), digit_sounds.end(),
[digit](const digit_sound& ds) { return ds.digit == digit; });
[digit](const digit_sound& ds) { return ds.digit == digit; });
if (it != digit_sounds.end()) {
audio_service_.PlaySound(it->sound);
}
}
}
void Application::Alert(const char* status, const char* message, const char* emotion, const std::string_view& sound) {
void Application::Alert(const char* status, const char* message, const char* emotion,
const std::string_view& sound) {
ESP_LOGW(TAG, "Alert [%s] %s: %s", emotion, status, message);
auto display = Board::GetInstance().GetDisplay();
display->SetStatus(status);
@ -683,9 +687,7 @@ void Application::DismissAlert() {
}
}
void Application::ToggleChatState() {
ToggleChatStateForMode(kChatAgentModeNormal, false);
}
void Application::ToggleChatState() { ToggleChatStateForMode(kChatAgentModeNormal, false); }
void Application::ToggleChatStateWithVision() {
ToggleChatStateForMode(kChatAgentModeNormal, true);
@ -698,9 +700,7 @@ void Application::ToggleChatStateForMode(ChatAgentMode agent_mode, bool vision_e
xEventGroupSetBits(event_group_, MAIN_EVENT_TOGGLE_CHAT);
}
bool Application::IsVisionTextModeEnabled() const {
return vision_text_mode_enabled_.load();
}
bool Application::IsVisionTextModeEnabled() const { return vision_text_mode_enabled_.load(); }
const char* Application::GetChatAgentModeName() const {
return chat_agent_mode_.load() == kChatAgentModeBeaver ? "beaver" : "normal";
@ -720,9 +720,7 @@ void Application::StartListening() {
xEventGroupSetBits(event_group_, MAIN_EVENT_START_LISTENING);
}
void Application::StopListening() {
xEventGroupSetBits(event_group_, MAIN_EVENT_STOP_LISTENING);
}
void Application::StopListening() { xEventGroupSetBits(event_group_, MAIN_EVENT_STOP_LISTENING); }
void Application::HandleToggleChatEvent() {
auto state = GetDeviceState();
@ -748,21 +746,21 @@ void Application::HandleToggleChatEvent() {
if (state == kDeviceStateIdle) {
ListeningMode mode = GetDefaultListeningMode();
bool agent_mode_changed = chat_agent_mode_.load() != active_chat_agent_mode_.load();
bool vision_mode_changed = vision_text_mode_enabled_.load() != active_vision_text_mode_enabled_.load();
bool vision_mode_changed =
vision_text_mode_enabled_.load() != active_vision_text_mode_enabled_.load();
if (!protocol_->IsAudioChannelOpened() || agent_mode_changed || vision_mode_changed) {
if (protocol_->IsAudioChannelOpened()) {
protocol_->CloseAudioChannel();
}
SetDeviceState(kDeviceStateConnecting);
// Schedule to let the state change be processed first (UI update)
Schedule([this, mode]() {
ContinueOpenAudioChannel(mode);
});
Schedule([this, mode]() { ContinueOpenAudioChannel(mode); });
return;
}
SetListeningMode(mode);
} else if (state == kDeviceStateSpeaking) {
} else if (state == kDeviceStateSpeaking || state == kDeviceStateThinking) {
AbortSpeaking(kAbortReasonNone);
SetListeningMode(GetDefaultListeningMode());
} else if (state == kDeviceStateListening) {
protocol_->CloseAudioChannel();
}
@ -810,13 +808,11 @@ void Application::HandleStartListeningEvent() {
if (!protocol_->IsAudioChannelOpened()) {
SetDeviceState(kDeviceStateConnecting);
// Schedule to let the state change be processed first (UI update)
Schedule([this]() {
ContinueOpenAudioChannel(kListeningModeManualStop);
});
Schedule([this]() { ContinueOpenAudioChannel(kListeningModeManualStop); });
return;
}
SetListeningMode(kListeningModeManualStop);
} else if (state == kDeviceStateSpeaking) {
} else if (state == kDeviceStateSpeaking || state == kDeviceStateThinking) {
AbortSpeaking(kAbortReasonNone);
SetListeningMode(kListeningModeManualStop);
}
@ -854,17 +850,14 @@ void Application::HandleWakeWordDetectedEvent() {
SetDeviceState(kDeviceStateConnecting);
// Schedule to let the state change be processed first (UI update),
// then continue with OpenAudioChannel which may block for ~1 second
Schedule([this, wake_word]() {
ContinueWakeWordInvoke(wake_word);
});
Schedule([this, wake_word]() { ContinueWakeWordInvoke(wake_word); });
return;
}
// Channel already opened, continue directly
ContinueWakeWordInvoke(wake_word);
} else if (state == kDeviceStateSpeaking || state == kDeviceStateListening) {
} else if (state == kDeviceStateSpeaking || state == kDeviceStateThinking ||
state == kDeviceStateListening) {
AbortSpeaking(kAbortReasonWakeWordDetected);
// Clear send queue to avoid sending residues to server
while (audio_service_.PopPacketFromSendQueue());
if (state == kDeviceStateListening) {
protocol_->SendStartListening(GetDefaultListeningMode());
@ -931,8 +924,8 @@ void Application::HandleStateChangedEvent() {
case kDeviceStateIdle:
vision_frame_sent_for_current_listen_.store(false);
display->SetStatus(Lang::Strings::STANDBY);
display->ClearChatMessages(); // Clear messages first
display->SetEmotion("neutral"); // Then set emotion (wechat mode checks child count)
display->ClearChatMessages(); // Clear messages first
display->SetEmotion("neutral"); // Then set emotion (wechat mode checks child count)
audio_service_.EnableVoiceProcessing(false);
audio_service_.EnableWakeWordDetection(true);
break;
@ -947,18 +940,14 @@ void Application::HandleStateChangedEvent() {
display->SetStatus(Lang::Strings::LISTENING);
display->SetEmotion("neutral");
// Make sure the audio processor is running
if (play_popup_on_listening_ || !audio_service_.IsAudioProcessorRunning()) {
// For auto mode, wait for playback queue to be empty before enabling voice processing
// This prevents audio truncation when STOP arrives late due to network jitter
if (listening_mode_ == kListeningModeAutoStop) {
audio_service_.WaitForPlaybackQueueEmpty();
}
// Send the start listening command
protocol_->SendStartListening(listening_mode_);
audio_service_.EnableVoiceProcessing(true);
// Re-entering listening after an interrupt must restart the capture path even if the
// processor task is still marked running, otherwise realtime mode can show Listening
// while no fresh mic frames are sent.
if (listening_mode_ == kListeningModeAutoStop) {
audio_service_.WaitForPlaybackQueueEmpty();
}
protocol_->SendStartListening(listening_mode_);
audio_service_.EnableVoiceProcessing(true);
#ifdef CONFIG_WAKE_WORD_DETECTION_IN_LISTENING
// Enable wake word detection in listening mode (configured via Kconfig)
@ -974,6 +963,16 @@ void Application::HandleStateChangedEvent() {
audio_service_.PlaySound(Lang::Sounds::OGG_POPUP);
}
break;
case kDeviceStateThinking:
vad_speaking_.store(false);
display->SetStatus(Lang::Strings::THINKING);
display->SetEmotion("thinking");
if (listening_mode_ != kListeningModeRealtime) {
audio_service_.EnableVoiceProcessing(false);
audio_service_.EnableWakeWordDetection(audio_service_.IsAfeWakeWord());
}
break;
case kDeviceStateSpeaking:
display->SetStatus(Lang::Strings::SPEAKING);
@ -982,7 +981,9 @@ void Application::HandleStateChangedEvent() {
// Only AFE wake word can be detected in speaking mode
audio_service_.EnableWakeWordDetection(audio_service_.IsAfeWakeWord());
}
audio_service_.ResetDecoder();
if (!accepting_tts_audio_.load()) {
audio_service_.ResetDecoder();
}
break;
case kDeviceStateWifiConfiguring:
audio_service_.EnableVoiceProcessing(false);
@ -1026,6 +1027,8 @@ void Application::Schedule(std::function<void()>&& callback) {
void Application::AbortSpeaking(AbortReason reason) {
ESP_LOGI(TAG, "Abort speaking");
aborted_ = true;
accepting_tts_audio_.store(false);
audio_service_.ResetDecoder();
if (protocol_) {
protocol_->SendAbortSpeaking(reason);
}
@ -1069,7 +1072,8 @@ bool Application::UpgradeFirmware(const std::string& url, const std::string& ver
}
ESP_LOGI(TAG, "Starting firmware upgrade from URL: %s", upgrade_url.c_str());
Alert(Lang::Strings::OTA_UPGRADE, Lang::Strings::UPGRADING, "download", Lang::Sounds::OGG_UPGRADE);
Alert(Lang::Strings::OTA_UPGRADE, Lang::Strings::UPGRADING, "download",
Lang::Sounds::OGG_UPGRADE);
vTaskDelay(pdMS_TO_TICKS(3000));
SetDeviceState(kDeviceStateUpgrading);
@ -1091,17 +1095,19 @@ bool Application::UpgradeFirmware(const std::string& url, const std::string& ver
if (!upgrade_success) {
// Upgrade failed, restart audio service and continue running
ESP_LOGE(TAG, "Firmware upgrade failed, restarting audio service and continuing operation...");
audio_service_.Start(); // Restart audio service
board.SetPowerSaveLevel(PowerSaveLevel::LOW_POWER); // Restore power save level
Alert(Lang::Strings::ERROR, Lang::Strings::UPGRADE_FAILED, "circle_xmark", Lang::Sounds::OGG_EXCLAMATION);
ESP_LOGE(TAG,
"Firmware upgrade failed, restarting audio service and continuing operation...");
audio_service_.Start(); // Restart audio service
board.SetPowerSaveLevel(PowerSaveLevel::LOW_POWER); // Restore power save level
Alert(Lang::Strings::ERROR, Lang::Strings::UPGRADE_FAILED, "circle_xmark",
Lang::Sounds::OGG_EXCLAMATION);
vTaskDelay(pdMS_TO_TICKS(3000));
return false;
} else {
// Upgrade success, reboot immediately
ESP_LOGI(TAG, "Firmware upgrade successful, rebooting...");
display->SetChatMessage("system", "Upgrade successful, rebooting...");
vTaskDelay(pdMS_TO_TICKS(1000)); // Brief pause to show message
vTaskDelay(pdMS_TO_TICKS(1000)); // Brief pause to show message
Reboot();
return true;
}
@ -1120,17 +1126,13 @@ void Application::WakeWordInvoke(const std::string& wake_word) {
if (!protocol_->IsAudioChannelOpened()) {
SetDeviceState(kDeviceStateConnecting);
// Schedule to let the state change be processed first (UI update)
Schedule([this, wake_word]() {
ContinueWakeWordInvoke(wake_word);
});
Schedule([this, wake_word]() { ContinueWakeWordInvoke(wake_word); });
return;
}
// Channel already opened, continue directly
ContinueWakeWordInvoke(wake_word);
} else if (state == kDeviceStateSpeaking) {
Schedule([this]() {
AbortSpeaking(kAbortReasonNone);
});
} else if (state == kDeviceStateSpeaking || state == kDeviceStateThinking) {
Schedule([this]() { AbortSpeaking(kAbortReasonNone); });
} else if (state == kDeviceStateListening) {
Schedule([this]() {
if (protocol_) {
@ -1163,7 +1165,7 @@ void Application::RegisterMcpBroadcastCallback(std::function<void(const std::str
void Application::SendMcpMessage(const std::string& payload) {
// Always schedule to run in main task for thread safety
Schedule([this, payload](){
Schedule([this, payload]() {
if (protocol_) {
protocol_->SendMcpMessage(payload);
}
@ -1179,18 +1181,18 @@ void Application::SetAecMode(AecMode mode) {
auto& board = Board::GetInstance();
auto display = board.GetDisplay();
switch (aec_mode_) {
case kAecOff:
audio_service_.EnableDeviceAec(false);
display->ShowNotification(Lang::Strings::RTC_MODE_OFF);
break;
case kAecOnServerSide:
audio_service_.EnableDeviceAec(false);
display->ShowNotification(Lang::Strings::RTC_MODE_ON);
break;
case kAecOnDeviceSide:
audio_service_.EnableDeviceAec(true);
display->ShowNotification(Lang::Strings::RTC_MODE_ON);
break;
case kAecOff:
audio_service_.EnableDeviceAec(false);
display->ShowNotification(Lang::Strings::RTC_MODE_OFF);
break;
case kAecOnServerSide:
audio_service_.EnableDeviceAec(false);
display->ShowNotification(Lang::Strings::RTC_MODE_ON);
break;
case kAecOnDeviceSide:
audio_service_.EnableDeviceAec(true);
display->ShowNotification(Lang::Strings::RTC_MODE_ON);
break;
}
// If the AEC mode is changed, close the audio channel
@ -1200,9 +1202,7 @@ void Application::SetAecMode(AecMode mode) {
});
}
void Application::PlaySound(const std::string_view& sound) {
audio_service_.PlaySound(sound);
}
void Application::PlaySound(const std::string_view& sound) { audio_service_.PlaySound(sound); }
void Application::ResetProtocol() {
Schedule([this]() {

View File

@ -162,6 +162,7 @@ private:
std::atomic<bool> active_vision_text_mode_enabled_ = false;
std::atomic<bool> vad_speaking_ = false;
std::atomic<bool> vision_frame_sent_for_current_listen_ = false;
std::atomic<bool> accepting_tts_audio_ = false;
int clock_ticks_ = 0;
TaskHandle_t activation_task_handle_ = nullptr;

View File

@ -26,6 +26,7 @@
"CONNECTION_SUCCESSFUL": "Connection Successful",
"CONNECTED_TO": "Connected to ",
"LISTENING": "Listening...",
"THINKING": "Thinking...",
"SPEAKING": "Speaking...",
"SERVER_NOT_FOUND": "Looking for available service",
"SERVER_NOT_CONNECTED": "Unable to connect to service, please try again later",

View File

@ -23,6 +23,7 @@
"CONNECTING": "连接中...",
"CONNECTED_TO": "已连接 ",
"LISTENING": "聆听中...",
"THINKING": "思考中...",
"SPEAKING": "说话中...",
"SERVER_NOT_FOUND": "正在寻找可用服务",
"SERVER_NOT_CONNECTED": "无法连接服务,请稍后再试",

View File

@ -579,6 +579,7 @@ void AudioService::EnableWakeWordDetection(bool enable) {
void AudioService::EnableVoiceProcessing(bool enable) {
ESP_LOGD(TAG, "%s voice processing", enable ? "Enabling" : "Disabling");
if (enable) {
bool was_running = IsAudioProcessorRunning();
if (!audio_processor_initialized_) {
audio_processor_->Initialize(codec_, OPUS_FRAME_DURATION_MS, models_list_);
audio_processor_initialized_ = true;
@ -586,7 +587,7 @@ void AudioService::EnableVoiceProcessing(bool enable) {
/* We should make sure no audio is playing */
ResetDecoder();
audio_input_need_warmup_ = true;
audio_input_need_warmup_ = !was_running;
// Reset input resampler to clear cached data from previous mode (e.g. WakeWord)
// This prevents buffer overflow when switching between different feed sizes
{

View File

@ -0,0 +1,177 @@
#include "background_capture_service.h"
#include "board.h"
#include "camera.h"
#include <algorithm>
#include <esp_heap_caps.h>
#include <esp_log.h>
#define TAG "BgCapture"
BackgroundCaptureService::BackgroundCaptureService() = default;
BackgroundCaptureService::~BackgroundCaptureService() {
Stop();
}
void BackgroundCaptureService::Start() {
#if CONFIG_BACKGROUND_CAPTURE_ENABLE
if (running_.exchange(true)) {
return;
}
auto result = xTaskCreate(
&BackgroundCaptureService::TaskEntry,
"bg_capture",
CONFIG_BACKGROUND_CAPTURE_TASK_STACK_SIZE,
this,
CONFIG_BACKGROUND_CAPTURE_TASK_PRIORITY,
&task_handle_);
if (result != pdPASS) {
running_.store(false);
task_handle_ = nullptr;
ESP_LOGE(TAG, "Failed to create background capture task");
}
#endif
}
void BackgroundCaptureService::Stop() {
#if CONFIG_BACKGROUND_CAPTURE_ENABLE
if (!running_.exchange(false)) {
return;
}
while (task_handle_ != nullptr) {
vTaskDelay(pdMS_TO_TICKS(20));
}
#endif
}
void BackgroundCaptureService::TaskEntry(void* arg) {
#if CONFIG_BACKGROUND_CAPTURE_ENABLE
auto* service = static_cast<BackgroundCaptureService*>(arg);
service->Run();
service->task_handle_ = nullptr;
#else
(void)arg;
#endif
vTaskDelete(nullptr);
}
void BackgroundCaptureService::Run() {
#if CONFIG_BACKGROUND_CAPTURE_ENABLE
ESP_LOGI(TAG, "Background capture task started");
while (running_.load()) {
if (!CaptureAndSendFrame()) {
consecutive_failures_++;
auto delay_ms = GetFailureDelayMs();
ESP_LOGW(TAG, "Background capture retry in %u ms, failures=%u",
delay_ms, consecutive_failures_);
vTaskDelay(pdMS_TO_TICKS(delay_ms));
continue;
}
consecutive_failures_ = 0;
vTaskDelay(pdMS_TO_TICKS(CONFIG_BACKGROUND_CAPTURE_FRAME_INTERVAL_MS));
}
ESP_LOGI(TAG, "Background capture task stopped");
#endif
}
bool BackgroundCaptureService::CaptureAndSendFrame() {
#if CONFIG_BACKGROUND_CAPTURE_ENABLE
const size_t free_internal_heap = heap_caps_get_free_size(MALLOC_CAP_INTERNAL);
if (free_internal_heap < CONFIG_BACKGROUND_CAPTURE_MIN_FREE_INTERNAL_HEAP) {
ESP_LOGW(TAG, "Skip background capture, low internal heap: free=%u threshold=%u",
static_cast<unsigned>(free_internal_heap),
static_cast<unsigned>(CONFIG_BACKGROUND_CAPTURE_MIN_FREE_INTERNAL_HEAP));
return false;
}
auto camera = Board::GetInstance().GetCamera();
if (camera == nullptr) {
ESP_LOGW(TAG, "No camera available for background capture");
return false;
}
std::string jpeg_data;
if (!camera->CaptureToJpeg(jpeg_data, false)) {
ESP_LOGW(TAG, "Failed to capture background frame");
return false;
}
if (jpeg_data.empty()) {
ESP_LOGW(TAG, "Captured empty background frame");
return false;
}
return UploadJpegFrame(jpeg_data);
#else
return false;
#endif
}
uint32_t BackgroundCaptureService::GetFailureDelayMs() const {
#if CONFIG_BACKGROUND_CAPTURE_ENABLE
const uint32_t base_delay_ms = CONFIG_BACKGROUND_CAPTURE_RETRY_INTERVAL_MS;
const uint32_t max_delay_ms = CONFIG_BACKGROUND_CAPTURE_MAX_BACKOFF_MS;
const uint32_t shift = std::min<uint32_t>(consecutive_failures_ - 1, 4);
return std::min<uint32_t>(base_delay_ms << shift, max_delay_ms);
#else
return 0;
#endif
}
bool BackgroundCaptureService::UploadJpegFrame(const std::string& jpeg_data) {
#if CONFIG_BACKGROUND_CAPTURE_ENABLE
const std::string url = CONFIG_BACKGROUND_CAPTURE_UPLOAD_URL;
if (url.empty()) {
ESP_LOGI(TAG, "Captured background frame: %u bytes", static_cast<unsigned>(jpeg_data.size()));
return true;
}
auto network = Board::GetInstance().GetNetwork();
if (network == nullptr) {
ESP_LOGW(TAG, "No network available for background upload");
return false;
}
const std::string boundary = "----XIAOZHI_BACKGROUND_CAPTURE_BOUNDARY";
auto http = network->CreateHttp(3);
http->SetHeader("Content-Type", "multipart/form-data; boundary=" + boundary);
if (!http->Open("POST", url)) {
ESP_LOGW(TAG, "Failed to open background upload URL: %s", url.c_str());
return false;
}
std::string file_header;
file_header += "--" + boundary + "\r\n";
file_header += "Content-Disposition: form-data; name=\"file\"; filename=\"frame.jpg\"\r\n";
file_header += "Content-Type: image/jpeg\r\n\r\n";
http->Write(file_header.c_str(), file_header.size());
http->Write(jpeg_data.data(), jpeg_data.size());
std::string footer;
footer += "\r\n--" + boundary + "--\r\n";
http->Write(footer.c_str(), footer.size());
http->Write("", 0);
const int status_code = http->GetStatusCode();
http->Close();
if (status_code < 200 || status_code >= 300) {
ESP_LOGW(TAG, "Background upload failed, status=%d", status_code);
return false;
}
ESP_LOGI(TAG, "Uploaded background frame: %u bytes", static_cast<unsigned>(jpeg_data.size()));
return true;
#else
(void)jpeg_data;
return false;
#endif
}

View File

@ -0,0 +1,32 @@
#ifndef BACKGROUND_CAPTURE_SERVICE_H
#define BACKGROUND_CAPTURE_SERVICE_H
#include <freertos/FreeRTOS.h>
#include <freertos/task.h>
#include <atomic>
#include <cstdint>
#include <string>
class BackgroundCaptureService {
public:
BackgroundCaptureService();
~BackgroundCaptureService();
void Start();
void Stop();
bool IsRunning() const { return running_.load(); }
private:
TaskHandle_t task_handle_ = nullptr;
std::atomic<bool> running_ = false;
uint32_t consecutive_failures_ = 0;
static void TaskEntry(void* arg);
void Run();
bool CaptureAndSendFrame();
bool UploadJpegFrame(const std::string& jpeg_data);
uint32_t GetFailureDelayMs() const;
};
#endif // BACKGROUND_CAPTURE_SERVICE_H

View File

@ -214,6 +214,9 @@ public:
case kDeviceStateSpeaking:
ctrl_->SetStatusColor(64, 0, 0); // red
break;
case kDeviceStateThinking:
ctrl_->SetStatusColor(0, 0, 64); // blue
break;
default:
ctrl_->SetStatusColor(0, 0, 64); // blue
break;

View File

@ -203,7 +203,10 @@ void WifiBoard::EnterWifiConfigMode() {
auto& app = Application::GetInstance();
auto state = app.GetDeviceState();
if (state == kDeviceStateSpeaking || state == kDeviceStateListening || state == kDeviceStateIdle) {
if (state == kDeviceStateSpeaking ||
state == kDeviceStateThinking ||
state == kDeviceStateListening ||
state == kDeviceStateIdle) {
// Reset protocol (close audio channel, reset protocol)
Application::GetInstance().ResetProtocol();

View File

@ -85,6 +85,13 @@ void ElectronEmojiDisplay::SetStatus(const char* status) {
lv_obj_add_flag(network_label_, LV_OBJ_FLAG_HIDDEN);
lv_obj_add_flag(battery_label_, LV_OBJ_FLAG_HIDDEN);
return;
} else if (strcmp(status, Lang::Strings::THINKING) == 0) {
lv_obj_set_style_text_font(status_label_, text_font, 0);
lv_label_set_text(status_label_, status);
lv_obj_clear_flag(status_label_, LV_OBJ_FLAG_HIDDEN);
lv_obj_add_flag(network_label_, LV_OBJ_FLAG_HIDDEN);
lv_obj_add_flag(battery_label_, LV_OBJ_FLAG_HIDDEN);
return;
} else if (strcmp(status, Lang::Strings::CONNECTING) == 0) {
lv_obj_set_style_text_font(status_label_, &OTTO_ICON_FONT, 0);
lv_label_set_text(status_label_, "\xEF\x83\x81"); // U+F0c1 连接图标

View File

@ -155,6 +155,8 @@ void EmojiWidget::SetStatus(const char* status)
if (player_) {
if (strcmp(status, Lang::Strings::LISTENING) == 0) {
player_->StartPlayer("asking", true, 15);
} else if (strcmp(status, Lang::Strings::THINKING) == 0) {
player_->StartPlayer("thinking", true, 15);
} else if (strcmp(status, Lang::Strings::STANDBY) == 0) {
player_->StartPlayer("wake", true, 15);
}

View File

@ -231,9 +231,9 @@ private:
// 如果当前是聆听状态,切换到待命状态
ESP_LOGI(TAG, "从聆听状态切换到待命状态");
app.ToggleChatState(); // 切换到待命状态
} else if (current_state == kDeviceStateSpeaking) {
// 如果当前是说话状态,终止说话并切换到待命状态
ESP_LOGI(TAG, "从说话状态切换到待命状态");
} else if (current_state == kDeviceStateSpeaking || current_state == kDeviceStateThinking) {
// 如果当前是说话或思考状态,终止并切换到待命状态
ESP_LOGI(TAG, "从说话/思考状态切换到待命状态");
app.ToggleChatState(); // 终止说话
} else {
// 其他状态下只唤醒设备

View File

@ -77,6 +77,13 @@ void OttoEmojiDisplay::SetStatus(const char* status) {
lv_obj_add_flag(network_label_, LV_OBJ_FLAG_HIDDEN);
lv_obj_add_flag(battery_label_, LV_OBJ_FLAG_HIDDEN);
return;
} else if (strcmp(status, Lang::Strings::THINKING) == 0) {
lv_obj_set_style_text_font(status_label_, text_font, 0);
lv_label_set_text(status_label_, status);
lv_obj_clear_flag(status_label_, LV_OBJ_FLAG_HIDDEN);
lv_obj_add_flag(network_label_, LV_OBJ_FLAG_HIDDEN);
lv_obj_add_flag(battery_label_, LV_OBJ_FLAG_HIDDEN);
return;
} else if (strcmp(status, Lang::Strings::CONNECTING) == 0) {
lv_obj_set_style_text_font(status_label_, &OTTO_ICON_FONT, 0);
lv_label_set_text(status_label_, "\xEF\x83\x81"); // U+F0c1 连接图标

View File

@ -1,5 +1,6 @@
import asyncio
import base64
import contextlib
import json
import os
import re
@ -44,29 +45,40 @@ CHAT_MODE_AGENT_NAMES = {
CONNECT_TIMEOUT_SECONDS = float(os.getenv("LIVEKIT_CONNECT_TIMEOUT_SECONDS", "20.0"))
AGENT_READY_TIMEOUT_SECONDS = float(os.getenv("LIVEKIT_AGENT_READY_TIMEOUT_SECONDS", "10.0"))
WS_PORT = 8080
WS_MAX_QUEUE = int(os.getenv("BRIDGE_WS_MAX_QUEUE", "128"))
WS_MAX_SIZE = int(os.getenv("BRIDGE_WS_MAX_SIZE", str(8 * 1024 * 1024)))
AGENT_DISPATCH_MODE = os.getenv("AGENT_DISPATCH_MODE", "token").lower()
PROJECT_ROOT = Path(__file__).resolve().parent.parent
VISION_FRAME_SAVE_DIR = Path(os.getenv("VISION_FRAME_SAVE_DIR", str(PROJECT_ROOT / "vision_frames")))
INPUT_SAMPLE_RATE = 16000
OUTPUT_SAMPLE_RATE = 24000
INPUT_FRAME_DURATION_MS = 20
INPUT_SAMPLES_PER_OPUS_FRAME = INPUT_SAMPLE_RATE * INPUT_FRAME_DURATION_MS // 1000
INPUT_MAX_SAMPLES_PER_OPUS_FRAME = INPUT_SAMPLE_RATE * 60 // 1000
OUTPUT_FRAME_DURATION_MS = 20
OUTPUT_SAMPLES_PER_OPUS_FRAME = OUTPUT_SAMPLE_RATE * OUTPUT_FRAME_DURATION_MS // 1000
TTS_IDLE_TIMEOUT_SECONDS = 0.25
INPUT_SAMPLE_RATE = int(os.getenv("BRIDGE_INPUT_SAMPLE_RATE", "16000"))
OUTPUT_SAMPLE_RATE = int(os.getenv("BRIDGE_OUTPUT_SAMPLE_RATE", "24000"))
INPUT_FRAME_DURATION_MS = int(os.getenv("BRIDGE_INPUT_FRAME_DURATION_MS", "60"))
INPUT_MAX_SAMPLES_PER_OPUS_FRAME = INPUT_SAMPLE_RATE * 120 // 1000
OUTPUT_FRAME_DURATION_MS = int(os.getenv("BRIDGE_OUTPUT_FRAME_DURATION_MS", "60"))
AUDIO_STATS_INTERVAL_SECONDS = float(os.getenv("BRIDGE_AUDIO_STATS_INTERVAL_SECONDS", "5.0"))
DOWNLINK_SEND_GAP_WARN_MS = float(os.getenv("BRIDGE_DOWNLINK_SEND_GAP_WARN_MS", "180.0"))
UPLINK_CAPTURE_TIMEOUT_SECONDS = float(os.getenv("BRIDGE_UPLINK_CAPTURE_TIMEOUT_SECONDS", "0.25"))
TTS_IDLE_TIMEOUT_SECONDS = float(os.getenv("TTS_IDLE_TIMEOUT_SECONDS", "1.2"))
TTS_MIN_ACTIVE_SECONDS = float(os.getenv("TTS_MIN_ACTIVE_SECONDS", "1.0"))
TTS_SILENCE_PEAK_THRESHOLD = 96
TTS_PRE_ROLL_MS = 80
TTS_START_CONSECUTIVE_AUDIBLE_FRAMES = 1
TTS_PRE_ROLL_MS = int(os.getenv("TTS_PRE_ROLL_MS", "480"))
TTS_START_CONSECUTIVE_AUDIBLE_FRAMES = int(os.getenv("TTS_START_CONSECUTIVE_AUDIBLE_FRAMES", "1"))
TTS_INTERRUPT_SILENCE_FRAMES = 3
INTERRUPT_TOPIC = "lk.interrupt"
VISION_FRAME_TOPIC = "vision.frame"
AGENT_STATE_ATTRIBUTE = "lk.agent.state"
TTS_DISPLAY_SENTENCE_BREAKS = "。!?!?;"
TTS_DISPLAY_SCROLL_WIDTH = int(os.getenv("TTS_DISPLAY_SCROLL_WIDTH", "18"))
TTS_DISPLAY_SCROLL_INTERVAL_SECONDS = float(os.getenv("TTS_DISPLAY_SCROLL_INTERVAL_SECONDS", "0.18"))
TTS_DISPLAY_SCROLL_GAP = " "
TTS_INTERRUPT_SUPPRESS_SECONDS = float(os.getenv("TTS_INTERRUPT_SUPPRESS_SECONDS", "0.8"))
TTS_POST_INTERRUPT_USER_AUDIO_GRACE_SECONDS = float(
os.getenv("TTS_POST_INTERRUPT_USER_AUDIO_GRACE_SECONDS", "0.25")
)
TTS_POST_INTERRUPT_LISTEN_WINDOW_SECONDS = float(
os.getenv("TTS_POST_INTERRUPT_LISTEN_WINDOW_SECONDS", "8.0")
)
EMOTION_TEXT_PATTERN = re.compile(
r"^\s*<?\s*emotion\s*=\s*([^\s>,;]+)\s*>?[\s,;]*(.*)$",
re.DOTALL,
@ -95,6 +107,7 @@ class DeviceSession:
agent_ready: asyncio.Event
forwarding_tracks: dict[str, asyncio.Task[Any]] = field(default_factory=dict)
tts_active: bool = False
tts_thinking: bool = False
tts_idle_task: Optional[asyncio.Task] = None
tts_display_task: Optional[asyncio.Task] = None
tts_stream_id: int = 0
@ -103,6 +116,11 @@ class DeviceSession:
tts_display_final: bool = False
tts_emotion: str = ""
tts_suppressed_until: float = 0.0
tts_started_at: float = 0.0
tts_last_audible_at: float = 0.0
tts_waiting_for_user_audio_after_interrupt: bool = False
last_interrupt_time: float = 0.0
last_uplink_audible_time: float = 0.0
agent_dispatch_task: Optional[asyncio.Task] = None
closed: bool = False
captured_frame_count: int = 0
@ -138,6 +156,100 @@ class ESP32LiveKitBridge:
if formatted_tb.strip():
print(formatted_tb.rstrip())
def _audio_duration_ms(self, sample_count: int, sample_rate: int) -> float:
if sample_rate <= 0:
return 0.0
return sample_count * 1000.0 / sample_rate
def _build_server_hello(self, session: DeviceSession) -> dict[str, Any]:
return {
"type": "hello",
"transport": "websocket",
"session": {
"room": session.room_name,
"identity": session.identity,
},
"audio_params": {
"format": "opus",
"sample_rate": OUTPUT_SAMPLE_RATE,
"channels": 1,
"frame_duration": OUTPUT_FRAME_DURATION_MS,
},
}
def _log_client_hello(self, session: DeviceSession, message: dict[str, Any]) -> None:
audio_params = message.get("audio_params")
if not isinstance(audio_params, dict):
return
sample_rate = audio_params.get("sample_rate")
frame_duration = audio_params.get("frame_duration")
channels = audio_params.get("channels")
fmt = audio_params.get("format")
print(
"[client-audio] "
f"device={session.device_id} format={fmt} sample_rate={sample_rate} "
f"channels={channels} frame_duration={frame_duration}"
)
if sample_rate != INPUT_SAMPLE_RATE or channels != 1:
print(
"[client-audio] warning: bridge uplink decode expects "
f"{INPUT_SAMPLE_RATE}Hz mono, got {sample_rate}Hz channels={channels}"
)
if frame_duration != INPUT_FRAME_DURATION_MS:
print(
"[client-audio] warning: bridge expects "
f"{INPUT_FRAME_DURATION_MS}ms uplink frames, got {frame_duration}ms"
)
def _track_room_connect_task(
self,
session: DeviceSession,
task: asyncio.Task[Any],
) -> None:
if task.cancelled():
return
try:
task.result()
except Exception as exc:
self._log_exception(
f"LiveKit 房间连接后台任务失败: room={session.room_name}",
exc,
)
websocket = session.websocket
if websocket is not None:
asyncio.create_task(websocket.close(code=1011, reason="livekit connect failed"))
async def _capture_mic_frame(
self,
session: DeviceSession,
pcm_bytes: bytes,
num_samples: int,
) -> bool:
try:
frame = AudioFrame(pcm_bytes, INPUT_SAMPLE_RATE, 1, num_samples)
except TypeError:
frame = AudioFrame.create(
sample_rate=INPUT_SAMPLE_RATE,
num_channels=1,
samples_per_channel=num_samples,
)
memoryview(frame.data).cast("B")[:] = pcm_bytes
try:
await asyncio.wait_for(
session.mic_source.capture_frame(frame),
timeout=UPLINK_CAPTURE_TIMEOUT_SECONDS,
)
return True
except asyncio.TimeoutError:
print(
"[uplink] warning: capture_frame timeout, dropping frame "
f"device={session.device_id} samples={num_samples}"
)
return False
def _build_device_id(self, websocket: Any) -> str:
headers = websocket.request.headers
requested_id = headers.get("X-Device-Id") or headers.get("Device-Id")
@ -445,7 +557,7 @@ class ESP32LiveKitBridge:
print("收到 vision frame但 image 字段为空")
return
saved_path = self._save_vision_frame(session, image)
saved_path = await asyncio.to_thread(self._save_vision_frame, session, image)
if saved_path is None:
return
print(f"已保存 vision frame: {saved_path}")
@ -599,6 +711,10 @@ class ESP32LiveKitBridge:
if session.tts_active:
print("跳过 tts start当前已处于激活状态")
return
block_reason = self._tts_resume_block_reason(session, include_user_quiet=False)
if block_reason is not None:
print(f"跳过 tts start打断后仍在等待稳定聆听: {block_reason}")
return
if time.monotonic() < session.tts_suppressed_until:
print("跳过 tts start中断后的残留音频仍在抑制窗口内")
return
@ -607,16 +723,83 @@ class ESP32LiveKitBridge:
session.tts_display_final = False
session.tts_emotion = ""
self._cancel_tts_display_task(session)
now = time.monotonic()
session.tts_started_at = now
session.tts_last_audible_at = now
await self._send_tts_state(session, "start")
session.tts_active = True
session.tts_thinking = False
async def _start_thinking(self, session: DeviceSession) -> None:
if session.tts_active:
print("跳过 tts thinking当前已处于 TTS 播放状态")
return
if session.tts_thinking:
print("跳过 tts thinking当前已处于思考状态")
return
block_reason = self._tts_resume_block_reason(session, include_user_quiet=False)
if block_reason is not None:
print(f"跳过 tts thinking打断后仍在等待稳定聆听: {block_reason}")
return
if time.monotonic() < session.tts_suppressed_until:
print("跳过 tts thinking中断后的残留音频仍在抑制窗口内")
return
await self._send_tts_state(session, "thinking")
session.tts_thinking = True
def _tts_resume_block_reason(
self,
session: DeviceSession,
now: Optional[float] = None,
*,
include_user_quiet: bool = True,
) -> Optional[str]:
if now is None:
now = time.monotonic()
if session.tts_waiting_for_user_audio_after_interrupt:
return "waiting_for_user_audio_after_interrupt"
if session.last_interrupt_time <= 0.0:
return None
since_interrupt = now - session.last_interrupt_time
if since_interrupt > TTS_POST_INTERRUPT_LISTEN_WINDOW_SECONDS:
return None
if session.last_uplink_audible_time < session.last_interrupt_time:
return None
if not include_user_quiet:
return None
quiet_for = now - session.last_uplink_audible_time
if quiet_for < TTS_POST_INTERRUPT_USER_AUDIO_GRACE_SECONDS:
return f"user_audio_quiet_for={quiet_for:.2f}s"
return None
def _handle_agent_state(self, session: DeviceSession, participant: rtc.Participant) -> None:
state = participant.attributes.get(AGENT_STATE_ATTRIBUTE)
if not isinstance(state, str) or not state:
return
print(
f"[agent-state] room={session.room_name} identity={participant.identity} state={state}"
)
if state == "thinking":
asyncio.create_task(self._start_thinking(session))
async def _stop_tts(self, session: DeviceSession) -> None:
if not session.tts_active:
if not session.tts_active and not session.tts_thinking:
print("跳过 tts stop当前未激活")
return
self._cancel_tts_display_task(session)
await self._send_tts_state(session, "stop")
session.tts_active = False
session.tts_thinking = False
session.tts_started_at = 0.0
session.tts_last_audible_at = 0.0
session.tts_transcript_text = ""
session.tts_display_text = ""
session.tts_display_final = False
@ -628,6 +811,9 @@ class ESP32LiveKitBridge:
session.tts_idle_task.cancel()
session.tts_idle_task = None
session.tts_active = False
session.tts_thinking = False
session.tts_started_at = 0.0
session.tts_last_audible_at = 0.0
session.tts_transcript_text = ""
session.tts_display_text = ""
session.tts_display_final = False
@ -637,12 +823,16 @@ class ESP32LiveKitBridge:
async def _abort_tts(self, session: DeviceSession, reason: str = "client_abort") -> None:
print(f"收到打断请求,停止当前 TTS: device={session.device_id} reason={reason}")
now = time.monotonic()
session.tts_stream_id += 1
session.tts_suppressed_until = time.monotonic() + TTS_INTERRUPT_SUPPRESS_SECONDS
session.last_interrupt_time = now
session.tts_suppressed_until = now + TTS_INTERRUPT_SUPPRESS_SECONDS
session.tts_waiting_for_user_audio_after_interrupt = True
await self._force_stop_tts(session, reason)
asyncio.create_task(self._send_agent_interrupt(session, reason))
def _reset_tts_idle_timer(self, session: DeviceSession) -> None:
session.tts_last_audible_at = time.monotonic()
if session.tts_idle_task is not None:
session.tts_idle_task.cancel()
session.tts_idle_task = asyncio.create_task(
@ -651,11 +841,28 @@ class ESP32LiveKitBridge:
async def _tts_idle_watchdog(self, session: DeviceSession, stream_id: int) -> None:
try:
await asyncio.sleep(TTS_IDLE_TIMEOUT_SECONDS)
if stream_id != session.tts_stream_id:
while True:
await asyncio.sleep(TTS_IDLE_TIMEOUT_SECONDS)
if stream_id != session.tts_stream_id or not session.tts_active:
return
now = time.monotonic()
idle_for = now - session.tts_last_audible_at
active_for = now - session.tts_started_at
remaining = max(
TTS_IDLE_TIMEOUT_SECONDS - idle_for,
TTS_MIN_ACTIVE_SECONDS - active_for,
)
if remaining > 0:
await asyncio.sleep(remaining)
continue
print(
"TTS 静音达到阈值,切回聆听状态: "
f"idle={idle_for:.2f}s active={active_for:.2f}s"
)
await self._stop_tts(session)
return
print(f"TTS 空闲超过 {TTS_IDLE_TIMEOUT_SECONDS}s切回聆听状态")
await self._stop_tts(session)
except asyncio.CancelledError:
pass
@ -799,6 +1006,7 @@ class ESP32LiveKitBridge:
if self._is_agent_participant(participant, session.agent_name):
session.agent_ready.set()
self._scan_participant_audio_tracks(session, participant, "connected_scan")
self._handle_agent_state(session, participant)
@session.room.on("participant_connected")
def on_participant_connected(participant: rtc.RemoteParticipant) -> None:
@ -810,6 +1018,7 @@ class ESP32LiveKitBridge:
self._scan_participant_audio_tracks(
session, participant, "participant_connected_scan"
)
self._handle_agent_state(session, participant)
@session.room.on("participant_disconnected")
def on_participant_disconnected(participant: rtc.RemoteParticipant) -> None:
@ -820,6 +1029,16 @@ class ESP32LiveKitBridge:
if not track_sid.endswith(f":{participant.identity}")
}
@session.room.on("participant_attributes_changed")
def on_participant_attributes_changed(changed: list[str], participant: rtc.Participant) -> None:
if AGENT_STATE_ATTRIBUTE not in changed:
return
if not isinstance(participant, rtc.RemoteParticipant):
return
if not self._is_agent_participant(participant, session.agent_name):
return
self._handle_agent_state(session, participant)
@session.room.on("data_received")
def on_data_received(data_packet: rtc.DataPacket) -> None:
identity = data_packet.participant.identity if data_packet.participant else "未知"
@ -865,6 +1084,7 @@ class ESP32LiveKitBridge:
if not segment.final:
continue
display_text = segment.text
asyncio.create_task(self._start_thinking(session))
if session.websocket is not None:
ws = session.websocket
@ -967,9 +1187,21 @@ class ESP32LiveKitBridge:
async def start(self) -> None:
print(f"[config] websocket_port={WS_PORT}")
print(f"[config] websocket_max_queue={WS_MAX_QUEUE} websocket_max_size={WS_MAX_SIZE}")
print(f"[config] livekit_ws_url={LIVEKIT_WS_URL}")
print(f"[config] token_url={TOKEN_URL}")
print(f"[config] agent_dispatch_mode={AGENT_DISPATCH_MODE}")
print(
"[config] audio="
f"uplink_decode:{INPUT_SAMPLE_RATE}Hz/{INPUT_FRAME_DURATION_MS}ms "
f"downlink_encode:{OUTPUT_SAMPLE_RATE}Hz/{OUTPUT_FRAME_DURATION_MS}ms "
f"stats_interval:{AUDIO_STATS_INTERVAL_SECONDS}s "
f"capture_timeout:{UPLINK_CAPTURE_TIMEOUT_SECONDS}s "
f"tts_idle:{TTS_IDLE_TIMEOUT_SECONDS}s "
f"tts_min_active:{TTS_MIN_ACTIVE_SECONDS}s "
f"tts_start_frames:{TTS_START_CONSECUTIVE_AUDIBLE_FRAMES} "
f"tts_pre_roll:{TTS_PRE_ROLL_MS}ms"
)
print(
"[config] agents="
f"normal:{CHAT_MODE_AGENT_NAMES['normal']} "
@ -996,6 +1228,7 @@ class ESP32LiveKitBridge:
session.websocket = None
session.agent_ready.set()
session.tts_active = False
session.tts_thinking = False
session.tts_stream_id += 1
if session.tts_idle_task is not None:
session.tts_idle_task.cancel()
@ -1016,13 +1249,20 @@ class ESP32LiveKitBridge:
pending_pcm = bytearray()
pre_roll_pcm = bytearray()
pre_roll_max_bytes = OUTPUT_SAMPLE_RATE * TTS_PRE_ROLL_MS // 1000 * 2
output_samples_per_opus_frame = OUTPUT_SAMPLE_RATE * OUTPUT_FRAME_DURATION_MS // 1000
output_frame_bytes = output_samples_per_opus_frame * 2
audible_frame_streak = 0
silence_frame_streak = 0
waiting_for_post_interrupt_silence = False
downlink_packets = 0
downlink_audio_ms = 0.0
last_downlink_stats_time = time.monotonic()
last_send_time: Optional[float] = None
stream_id = session.tts_stream_id
print(
f"启动 TTS 转发: device={session.device_id} room={session.room_name} "
f"track_sid={track_sid} stream_id={stream_id}"
f"track_sid={track_sid} stream_id={stream_id} "
f"opus={OUTPUT_SAMPLE_RATE}Hz/{OUTPUT_FRAME_DURATION_MS}ms"
)
try:
@ -1043,7 +1283,8 @@ class ESP32LiveKitBridge:
pcm_data = frame.data.tobytes()
has_audible_audio = self._has_audible_audio(pcm_data)
if time.monotonic() < session.tts_suppressed_until:
now = time.monotonic()
if now < session.tts_suppressed_until:
pending_pcm.clear()
pre_roll_pcm.clear()
audible_frame_streak = 0
@ -1051,6 +1292,31 @@ class ESP32LiveKitBridge:
waiting_for_post_interrupt_silence = True
continue
block_reason = self._tts_resume_block_reason(
session,
now,
include_user_quiet=False,
)
if block_reason is not None:
pending_pcm.clear()
pre_roll_pcm.clear()
audible_frame_streak = 0
silence_frame_streak = 0
if block_reason == "waiting_for_user_audio_after_interrupt":
waiting_for_post_interrupt_silence = True
continue
if (
waiting_for_post_interrupt_silence
and session.last_interrupt_time > 0.0
and session.last_uplink_audible_time >= session.last_interrupt_time
and now - session.last_uplink_audible_time
>= TTS_POST_INTERRUPT_USER_AUDIO_GRACE_SECONDS
):
print("检测到用户打断后语音已结束,允许新 TTS 直接起播")
waiting_for_post_interrupt_silence = False
silence_frame_streak = 0
if waiting_for_post_interrupt_silence:
if has_audible_audio:
silence_frame_streak = 0
@ -1098,20 +1364,42 @@ class ESP32LiveKitBridge:
if not current_frame_buffered:
pending_pcm.extend(pcm_data)
frame_bytes = OUTPUT_SAMPLES_PER_OPUS_FRAME * 2
while (
len(pending_pcm) >= frame_bytes
len(pending_pcm) >= output_frame_bytes
and stream_id == session.tts_stream_id
and session.websocket is not None
):
try:
now = time.monotonic()
if last_send_time is not None:
send_gap_ms = (now - last_send_time) * 1000.0
if send_gap_ms > DOWNLINK_SEND_GAP_WARN_MS:
print(
"[downlink] warning: send gap "
f"{send_gap_ms:.1f}ms device={session.device_id} "
f"pending_ms={self._audio_duration_ms(len(pending_pcm) // 2, OUTPUT_SAMPLE_RATE):.1f}"
)
last_send_time = now
opus_packet = encoder.encode(
bytes(pending_pcm[:frame_bytes]),
OUTPUT_SAMPLES_PER_OPUS_FRAME,
bytes(pending_pcm[:output_frame_bytes]),
output_samples_per_opus_frame,
)
del pending_pcm[:frame_bytes]
del pending_pcm[:output_frame_bytes]
await session.websocket.send(self._wrap_opus_payload(session, opus_packet))
downlink_packets += 1
downlink_audio_ms += OUTPUT_FRAME_DURATION_MS
if now - last_downlink_stats_time >= AUDIO_STATS_INTERVAL_SECONDS:
print(
"[downlink] "
f"device={session.device_id} packets={downlink_packets} "
f"audio_ms={downlink_audio_ms:.0f} "
f"pending_ms={self._audio_duration_ms(len(pending_pcm) // 2, OUTPUT_SAMPLE_RATE):.1f}"
)
downlink_packets = 0
downlink_audio_ms = 0.0
last_downlink_stats_time = now
except Exception as exc:
print(f"发送回 ESP32 失败: {exc}")
break
@ -1171,27 +1459,26 @@ class ESP32LiveKitBridge:
)
session.tts_stream_id += 1
opus_decoder = None
uplink_packets = 0
uplink_audio_ms = 0.0
uplink_decode_errors = 0
uplink_dropped_frames = 0
last_uplink_stats_time = time.monotonic()
room_connect_task: Optional[asyncio.Task[Any]] = None
try:
hello_msg = {
"type": "hello",
"transport": "websocket",
"session": {
"room": session.room_name,
"identity": session.identity,
},
"audio_params": {
"format": "opus",
"sample_rate": OUTPUT_SAMPLE_RATE,
"channels": 1,
"frame_duration": OUTPUT_FRAME_DURATION_MS,
},
}
hello_msg = self._build_server_hello(session)
await websocket.send(json.dumps(hello_msg))
print(f"已发送 server hello: device={device_id} room={session.room_name}")
print(
f"已发送 server hello: device={device_id} room={session.room_name} "
f"audio={OUTPUT_SAMPLE_RATE}Hz/{OUTPUT_FRAME_DURATION_MS}ms"
)
asyncio.create_task(self._run_emotion_test_sequence(session))
await self._connect_session_room(session)
room_connect_task = asyncio.create_task(self._connect_session_room(session))
room_connect_task.add_done_callback(
lambda task: self._track_room_connect_task(session, task)
)
async for message in websocket:
if isinstance(message, bytes):
@ -1214,6 +1501,16 @@ class ESP32LiveKitBridge:
if num_samples > 0:
session.captured_frame_count += 1
now = time.monotonic()
uplink_packets += 1
uplink_audio_ms += self._audio_duration_ms(num_samples, INPUT_SAMPLE_RATE)
if self._has_audible_audio(pcm_bytes):
session.last_uplink_audible_time = now
if session.tts_waiting_for_user_audio_after_interrupt:
session.tts_waiting_for_user_audio_after_interrupt = False
print(
f"[uplink] detected user audio after interrupt: "
f"device={session.device_id}"
)
if (
session.captured_frame_count <= 5
or now - session.first_capture_log_time >= 5.0
@ -1224,25 +1521,32 @@ class ESP32LiveKitBridge:
# f"bytes={len(pcm_bytes)} samples={num_samples} "
# f"room={session.room_name}"
# )
try:
frame = AudioFrame(pcm_bytes, INPUT_SAMPLE_RATE, 1, num_samples)
await session.mic_source.capture_frame(frame)
except TypeError:
frame = AudioFrame.create(
sample_rate=INPUT_SAMPLE_RATE,
num_channels=1,
samples_per_channel=num_samples,
if now - last_uplink_stats_time >= AUDIO_STATS_INTERVAL_SECONDS:
print(
"[uplink] "
f"device={session.device_id} packets={uplink_packets} "
f"audio_ms={uplink_audio_ms:.0f} "
f"decode_errors={uplink_decode_errors} "
f"dropped_frames={uplink_dropped_frames}"
)
memoryview(frame.data).cast("B")[:] = pcm_bytes
await session.mic_source.capture_frame(frame)
uplink_packets = 0
uplink_audio_ms = 0.0
uplink_decode_errors = 0
uplink_dropped_frames = 0
last_uplink_stats_time = now
if not await self._capture_mic_frame(session, pcm_bytes, num_samples):
uplink_dropped_frames += 1
except Exception as exc:
uplink_decode_errors += 1
print(f"Opus audio decode error ({len(message)} bytes): {exc}")
elif isinstance(message, str):
try:
data = json.loads(message)
# print(f"收到 ESP32 JSON 消息: {data}")
msg_type = data.get("type")
if msg_type == "abort":
if msg_type == "hello":
self._log_client_hello(session, data)
elif msg_type == "abort":
reason = data.get("reason")
abort_reason = reason if isinstance(reason, str) and reason else "button_abort"
print(f"处理 ESP32 打断请求: reason={abort_reason}")
@ -1257,6 +1561,10 @@ class ESP32LiveKitBridge:
self._log_exception("WebSocket 其他错误", exc)
finally:
print(f"ESP32 断开连接: device={device_id} room={session.room_name}")
if room_connect_task is not None and not room_connect_task.done():
room_connect_task.cancel()
with contextlib.suppress(asyncio.CancelledError):
await room_connect_task
await self._close_session(session)
self.device_sessions.pop(device_id, None)
@ -1265,7 +1573,13 @@ async def main() -> None:
bridge = ESP32LiveKitBridge()
try:
await bridge.start()
async with websockets.serve(bridge.handle_websocket, "0.0.0.0", WS_PORT):
async with websockets.serve(
bridge.handle_websocket,
"0.0.0.0",
WS_PORT,
max_queue=WS_MAX_QUEUE,
max_size=WS_MAX_SIZE,
):
print(f"WebSocket 服务器运行在端口 {WS_PORT},等待 ESP32 连接...")
await asyncio.Future()
finally:

View File

@ -8,6 +8,7 @@ enum DeviceState {
kDeviceStateIdle,
kDeviceStateConnecting,
kDeviceStateListening,
kDeviceStateThinking,
kDeviceStateSpeaking,
kDeviceStateUpgrading,
kDeviceStateActivating,

View File

@ -13,6 +13,7 @@ static const char* const STATE_STRINGS[] = {
"idle",
"connecting",
"listening",
"thinking",
"speaking",
"upgrading",
"activating",
@ -69,9 +70,10 @@ bool DeviceStateMachine::IsValidTransition(DeviceState from, DeviceState to) con
to == kDeviceStateActivating;
case kDeviceStateIdle:
// Can go to connecting, listening (manual mode), speaking, activating, upgrading, or wifi configuring
// Can go to connecting, listening (manual mode), thinking, speaking, activating, upgrading, or wifi configuring
return to == kDeviceStateConnecting ||
to == kDeviceStateListening ||
to == kDeviceStateThinking ||
to == kDeviceStateSpeaking ||
to == kDeviceStateActivating ||
to == kDeviceStateUpgrading ||
@ -83,8 +85,15 @@ bool DeviceStateMachine::IsValidTransition(DeviceState from, DeviceState to) con
to == kDeviceStateListening;
case kDeviceStateListening:
// Can go to speaking or idle
// Can go to thinking, speaking, or idle
return to == kDeviceStateThinking ||
to == kDeviceStateSpeaking ||
to == kDeviceStateIdle;
case kDeviceStateThinking:
// Can go to speaking, listening, or idle
return to == kDeviceStateSpeaking ||
to == kDeviceStateListening ||
to == kDeviceStateIdle;
case kDeviceStateSpeaking:

View File

@ -167,6 +167,8 @@ void EmoteDisplay::SetStatus(const char* const status)
emote_set_event_msg(emote_handle_, EMOTE_MGR_EVT_LISTEN, NULL);
} else if (std::strcmp(status, Lang::Strings::STANDBY) == 0) {
emote_set_event_msg(emote_handle_, EMOTE_MGR_EVT_IDLE, NULL);
} else if (std::strcmp(status, Lang::Strings::THINKING) == 0) {
emote_set_event_msg(emote_handle_, EMOTE_MGR_EVT_LISTEN, NULL);
} else if (std::strcmp(status, Lang::Strings::SPEAKING) == 0) {
emote_set_event_msg(emote_handle_, EMOTE_MGR_EVT_SPEAK, NULL);
} else if (std::strcmp(status, Lang::Strings::ERROR) == 0) {

View File

@ -203,6 +203,7 @@ void LvglDisplay::UpdateStatusBar(bool update_all) {
kDeviceStateStarting,
kDeviceStateWifiConfiguring,
kDeviceStateListening,
kDeviceStateThinking,
kDeviceStateActivating,
};
if (std::find(allowed_states.begin(), allowed_states.end(), device_state) != allowed_states.end()) {

View File

@ -228,6 +228,11 @@ void CircularStrip::OnStateChanged() {
SetAllColor(color);
break;
}
case kDeviceStateThinking: {
StripColor color = { low_brightness_, low_brightness_, default_brightness_ };
Blink(color, 300);
break;
}
case kDeviceStateUpgrading: {
StripColor color = { low_brightness_, default_brightness_, low_brightness_ };
Blink(color, 100);

View File

@ -235,6 +235,10 @@ void GpioLed::OnStateChanged() {
// TurnOn();
StartFadeTask();
break;
case kDeviceStateThinking:
SetBrightness(DEFAULT_BRIGHTNESS);
StartContinuousBlink(300);
break;
case kDeviceStateSpeaking:
SetBrightness(SPEAKING_BRIGHTNESS);
TurnOn();

View File

@ -152,6 +152,10 @@ void SingleLed::OnStateChanged() {
SetColor(0, DEFAULT_BRIGHTNESS, 0);
TurnOn();
break;
case kDeviceStateThinking:
SetColor(0, 0, DEFAULT_BRIGHTNESS);
StartContinuousBlink(300);
break;
case kDeviceStateUpgrading:
SetColor(0, DEFAULT_BRIGHTNESS, 0);
StartContinuousBlink(100);