12 Commits
main ... icon

Author SHA1 Message Date
66d318774a fix: server error to disconnect 2026-06-17 15:19:31 +08:00
60df0fe196 feat: add tools to normal agent 2026-06-12 14:23:41 +08:00
2c4329fd84 fix: voice interupt 2026-06-12 11:38:47 +08:00
9637e09aef feat: beaver 2026-06-04 15:48:10 +08:00
b92e6e1b07 feat: remove background cam every time 2026-05-29 14:53:58 +08:00
33ee598c21 feat: add icon beaver 2026-05-29 11:22:31 +08:00
37343ac0fe feat: icon first commit 2026-05-27 17:16:11 +08:00
fc6302661d feat: support camera capture to livekit 2026-05-25 17:21:11 +08:00
4953244c7c fix: voice interrupt 2026-05-22 10:20:00 +08:00
5223333418 fix: voice interrupt 2026-05-22 10:10:16 +08:00
61ad9dafd9 fix: text display 2026-05-21 17:05:09 +08:00
928d40826f feat: ws connect 2026-05-18 15:56:50 +08:00
31 changed files with 2733 additions and 260 deletions

2
.gitignore vendored
View File

@ -10,6 +10,7 @@ sdkconfig
dependencies.lock dependencies.lock
.env .env
releases/ releases/
vision_frames/
main/assets/lang_config.h main/assets/lang_config.h
main/mmap_generate_emoji.h main/mmap_generate_emoji.h
.DS_Store .DS_Store
@ -18,3 +19,4 @@ main/mmap_generate_emoji.h
*.bin *.bin
mmap_generate_*.h mmap_generate_*.h
.clangd .clangd
background_frames/

View File

@ -6,6 +6,34 @@ config OTA_URL
help help
The application will access this URL to check for new firmwares and server address. The application will access this URL to check for new firmwares and server address.
config USE_DIRECT_WEBSOCKET
bool "Use direct WebSocket without OTA"
default n
help
Skip the OTA server check and use the WebSocket settings below directly.
config WEBSOCKET_URL
string "Default WebSocket URL"
depends on USE_DIRECT_WEBSOCKET
default "ws://172.19.0.240:8080"
help
The WebSocket server URL used when direct WebSocket mode is enabled.
config WEBSOCKET_TOKEN
string "Default WebSocket token"
depends on USE_DIRECT_WEBSOCKET
default ""
help
Optional Authorization token for the direct WebSocket server.
config WEBSOCKET_PROTOCOL_VERSION
int "Default WebSocket protocol version"
depends on USE_DIRECT_WEBSOCKET
range 1 3
default 1
help
Protocol-Version header and hello version used by the WebSocket protocol.
choice choice
prompt "Flash Assets" prompt "Flash Assets"
default FLASH_DEFAULT_ASSETS if !USE_EMOTE_MESSAGE_STYLE default FLASH_DEFAULT_ASSETS if !USE_EMOTE_MESSAGE_STYLE

View File

@ -1,25 +1,24 @@
#include "application.h" #include "application.h"
#include "assets.h"
#include "assets/lang_config.h"
#include "audio_codec.h"
#include "board.h" #include "board.h"
#include "display.h" #include "display.h"
#include "system_info.h"
#include "audio_codec.h"
#include "mqtt_protocol.h"
#include "websocket_protocol.h"
#include "assets/lang_config.h"
#include "mcp_server.h" #include "mcp_server.h"
#include "assets.h" #include "mqtt_protocol.h"
#include "settings.h" #include "settings.h"
#include "system_info.h"
#include "websocket_protocol.h"
#include <cstring>
#include <esp_log.h>
#include <cJSON.h>
#include <driver/gpio.h> #include <driver/gpio.h>
#include <esp_log.h>
#include <arpa/inet.h> #include <arpa/inet.h>
#include <cJSON.h>
#include <font_awesome.h> #include <font_awesome.h>
#include <cstring>
#define TAG "Application" #define TAG "Application"
Application::Application() { Application::Application() {
event_group_ = xEventGroupCreate(); event_group_ = xEventGroupCreate();
@ -33,16 +32,16 @@ Application::Application() {
aec_mode_ = kAecOff; aec_mode_ = kAecOff;
#endif #endif
esp_timer_create_args_t clock_timer_args = { esp_timer_create_args_t clock_timer_args = {.callback =
.callback = [](void* arg) { [](void* arg) {
Application* app = (Application*)arg; Application* app = (Application*)arg;
xEventGroupSetBits(app->event_group_, MAIN_EVENT_CLOCK_TICK); xEventGroupSetBits(app->event_group_,
}, MAIN_EVENT_CLOCK_TICK);
.arg = this, },
.dispatch_method = ESP_TIMER_TASK, .arg = this,
.name = "clock_timer", .dispatch_method = ESP_TIMER_TASK,
.skip_unhandled_events = true .name = "clock_timer",
}; .skip_unhandled_events = true};
esp_timer_create(&clock_timer_args, &clock_timer_handle_); esp_timer_create(&clock_timer_args, &clock_timer_handle_);
} }
@ -54,9 +53,7 @@ Application::~Application() {
vEventGroupDelete(event_group_); vEventGroupDelete(event_group_);
} }
bool Application::SetDeviceState(DeviceState state) { bool Application::SetDeviceState(DeviceState state) { return state_machine_.TransitionTo(state); }
return state_machine_.TransitionTo(state);
}
void Application::Initialize() { void Application::Initialize() {
auto& board = Board::GetInstance(); auto& board = Board::GetInstance();
@ -81,6 +78,7 @@ void Application::Initialize() {
xEventGroupSetBits(event_group_, MAIN_EVENT_WAKE_WORD_DETECTED); xEventGroupSetBits(event_group_, MAIN_EVENT_WAKE_WORD_DETECTED);
}; };
callbacks.on_vad_change = [this](bool speaking) { callbacks.on_vad_change = [this](bool speaking) {
vad_speaking_.store(speaking);
xEventGroupSetBits(event_group_, MAIN_EVENT_VAD_CHANGE); xEventGroupSetBits(event_group_, MAIN_EVENT_VAD_CHANGE);
}; };
audio_service_.SetCallbacks(callbacks); audio_service_.SetCallbacks(callbacks);
@ -101,7 +99,7 @@ void Application::Initialize() {
// Set network event callback for UI updates and network state handling // Set network event callback for UI updates and network state handling
board.SetNetworkEventCallback([this](NetworkEvent event, const std::string& data) { board.SetNetworkEventCallback([this](NetworkEvent event, const std::string& data) {
auto display = Board::GetInstance().GetDisplay(); auto display = Board::GetInstance().GetDisplay();
switch (event) { switch (event) {
case NetworkEvent::Scanning: case NetworkEvent::Scanning:
display->ShowNotification(Lang::Strings::SCANNING_WIFI, 30000); display->ShowNotification(Lang::Strings::SCANNING_WIFI, 30000);
@ -141,13 +139,16 @@ void Application::Initialize() {
display->SetStatus(Lang::Strings::DETECTING_MODULE); display->SetStatus(Lang::Strings::DETECTING_MODULE);
break; break;
case NetworkEvent::ModemErrorNoSim: case NetworkEvent::ModemErrorNoSim:
Alert(Lang::Strings::ERROR, Lang::Strings::PIN_ERROR, "triangle_exclamation", Lang::Sounds::OGG_ERR_PIN); Alert(Lang::Strings::ERROR, Lang::Strings::PIN_ERROR, "triangle_exclamation",
Lang::Sounds::OGG_ERR_PIN);
break; break;
case NetworkEvent::ModemErrorRegDenied: case NetworkEvent::ModemErrorRegDenied:
Alert(Lang::Strings::ERROR, Lang::Strings::REG_ERROR, "triangle_exclamation", Lang::Sounds::OGG_ERR_REG); Alert(Lang::Strings::ERROR, Lang::Strings::REG_ERROR, "triangle_exclamation",
Lang::Sounds::OGG_ERR_REG);
break; break;
case NetworkEvent::ModemErrorInitFailed: case NetworkEvent::ModemErrorInitFailed:
Alert(Lang::Strings::ERROR, Lang::Strings::MODEM_INIT_ERROR, "triangle_exclamation", Lang::Sounds::OGG_EXCLAMATION); Alert(Lang::Strings::ERROR, Lang::Strings::MODEM_INIT_ERROR, "triangle_exclamation",
Lang::Sounds::OGG_EXCLAMATION);
break; break;
case NetworkEvent::ModemErrorTimeout: case NetworkEvent::ModemErrorTimeout:
display->SetStatus(Lang::Strings::REGISTERING_NETWORK); display->SetStatus(Lang::Strings::REGISTERING_NETWORK);
@ -166,19 +167,11 @@ void Application::Run() {
// Set the priority of the main task to 10 // Set the priority of the main task to 10
vTaskPrioritySet(nullptr, 10); vTaskPrioritySet(nullptr, 10);
const EventBits_t ALL_EVENTS = const EventBits_t ALL_EVENTS =
MAIN_EVENT_SCHEDULE | MAIN_EVENT_SCHEDULE | MAIN_EVENT_SEND_AUDIO | MAIN_EVENT_WAKE_WORD_DETECTED |
MAIN_EVENT_SEND_AUDIO | MAIN_EVENT_VAD_CHANGE | MAIN_EVENT_CLOCK_TICK | MAIN_EVENT_ERROR |
MAIN_EVENT_WAKE_WORD_DETECTED | MAIN_EVENT_NETWORK_CONNECTED | MAIN_EVENT_NETWORK_DISCONNECTED | MAIN_EVENT_TOGGLE_CHAT |
MAIN_EVENT_VAD_CHANGE | MAIN_EVENT_START_LISTENING | MAIN_EVENT_STOP_LISTENING | MAIN_EVENT_ACTIVATION_DONE |
MAIN_EVENT_CLOCK_TICK |
MAIN_EVENT_ERROR |
MAIN_EVENT_NETWORK_CONNECTED |
MAIN_EVENT_NETWORK_DISCONNECTED |
MAIN_EVENT_TOGGLE_CHAT |
MAIN_EVENT_START_LISTENING |
MAIN_EVENT_STOP_LISTENING |
MAIN_EVENT_ACTIVATION_DONE |
MAIN_EVENT_STATE_CHANGED; MAIN_EVENT_STATE_CHANGED;
while (true) { while (true) {
@ -186,7 +179,8 @@ void Application::Run() {
if (bits & MAIN_EVENT_ERROR) { if (bits & MAIN_EVENT_ERROR) {
SetDeviceState(kDeviceStateIdle); SetDeviceState(kDeviceStateIdle);
Alert(Lang::Strings::ERROR, last_error_message_.c_str(), "circle_xmark", Lang::Sounds::OGG_EXCLAMATION); Alert(Lang::Strings::ERROR, last_error_message_.c_str(), "circle_xmark",
Lang::Sounds::OGG_EXCLAMATION);
} }
if (bits & MAIN_EVENT_NETWORK_CONNECTED) { if (bits & MAIN_EVENT_NETWORK_CONNECTED) {
@ -233,6 +227,13 @@ void Application::Run() {
if (GetDeviceState() == kDeviceStateListening) { if (GetDeviceState() == kDeviceStateListening) {
auto led = Board::GetInstance().GetLed(); auto led = Board::GetInstance().GetLed();
led->OnStateChanged(); led->OnStateChanged();
if (vad_speaking_.load() && vision_text_mode_enabled_.load() &&
!vision_frame_sent_for_current_listen_.exchange(true)) {
if (!SendCurrentVisionFrame()) {
vision_frame_sent_for_current_listen_.store(false);
}
}
} }
} }
@ -249,7 +250,7 @@ void Application::Run() {
clock_ticks_++; clock_ticks_++;
auto display = Board::GetInstance().GetDisplay(); auto display = Board::GetInstance().GetDisplay();
display->UpdateStatusBar(); display->UpdateStatusBar();
// Print debug info every 10 seconds // Print debug info every 10 seconds
if (clock_ticks_ % 10 == 0) { if (clock_ticks_ % 10 == 0) {
SystemInfo::PrintHeapStats(); SystemInfo::PrintHeapStats();
@ -270,12 +271,14 @@ void Application::HandleNetworkConnectedEvent() {
return; return;
} }
xTaskCreate([](void* arg) { xTaskCreate(
Application* app = static_cast<Application*>(arg); [](void* arg) {
app->ActivationTask(); Application* app = static_cast<Application*>(arg);
app->activation_task_handle_ = nullptr; app->ActivationTask();
vTaskDelete(NULL); app->activation_task_handle_ = nullptr;
}, "activation", 4096 * 2, this, 2, &activation_task_handle_); vTaskDelete(NULL);
},
"activation", 4096 * 2, this, 2, &activation_task_handle_);
} }
// Update the status bar immediately to show the network state // Update the status bar immediately to show the network state
@ -286,7 +289,8 @@ void Application::HandleNetworkConnectedEvent() {
void Application::HandleNetworkDisconnectedEvent() { void Application::HandleNetworkDisconnectedEvent() {
// Close current conversation when network disconnected // Close current conversation when network disconnected
auto state = GetDeviceState(); auto state = GetDeviceState();
if (state == kDeviceStateConnecting || state == kDeviceStateListening || state == kDeviceStateSpeaking) { if (state == kDeviceStateConnecting || state == kDeviceStateListening ||
state == kDeviceStateThinking || state == kDeviceStateSpeaking) {
ESP_LOGI(TAG, "Closing audio channel due to network disconnection"); ESP_LOGI(TAG, "Closing audio channel due to network disconnection");
protocol_->CloseAudioChannel(); protocol_->CloseAudioChannel();
} }
@ -302,11 +306,15 @@ void Application::HandleActivationDoneEvent() {
SystemInfo::PrintHeapStats(); SystemInfo::PrintHeapStats();
SetDeviceState(kDeviceStateIdle); SetDeviceState(kDeviceStateIdle);
has_server_time_ = ota_->HasServerTime(); if (ota_ != nullptr) {
has_server_time_ = ota_->HasServerTime();
}
auto display = Board::GetInstance().GetDisplay(); auto display = Board::GetInstance().GetDisplay();
std::string message = std::string(Lang::Strings::VERSION) + ota_->GetCurrentVersion(); if (ota_ != nullptr) {
display->ShowNotification(message.c_str()); std::string message = std::string(Lang::Strings::VERSION) + ota_->GetCurrentVersion();
display->ShowNotification(message.c_str());
}
display->SetChatMessage("system", ""); display->SetChatMessage("system", "");
// Release OTA object after activation is complete // Release OTA object after activation is complete
@ -321,6 +329,10 @@ void Application::HandleActivationDoneEvent() {
} }
void Application::ActivationTask() { void Application::ActivationTask() {
#if CONFIG_USE_DIRECT_WEBSOCKET
CheckAssetsVersion();
InitializeProtocol();
#else
// Create OTA object for activation process // Create OTA object for activation process
ota_ = std::make_unique<Ota>(); ota_ = std::make_unique<Ota>();
@ -332,6 +344,7 @@ void Application::ActivationTask() {
// Initialize the protocol // Initialize the protocol
InitializeProtocol(); InitializeProtocol();
#endif
// Signal completion to main loop // Signal completion to main loop
xEventGroupSetBits(event_group_, MAIN_EVENT_ACTIVATION_DONE); xEventGroupSetBits(event_group_, MAIN_EVENT_ACTIVATION_DONE);
@ -352,7 +365,7 @@ void Application::CheckAssetsVersion() {
ESP_LOGW(TAG, "Assets partition is disabled for board %s", BOARD_NAME); ESP_LOGW(TAG, "Assets partition is disabled for board %s", BOARD_NAME);
return; return;
} }
Settings settings("assets", true); Settings settings("assets", true);
// Check if there is a new assets need to be downloaded // Check if there is a new assets need to be downloaded
std::string download_url = settings.GetString("download_url"); std::string download_url = settings.GetString("download_url");
@ -362,27 +375,30 @@ void Application::CheckAssetsVersion() {
char message[256]; char message[256];
snprintf(message, sizeof(message), Lang::Strings::FOUND_NEW_ASSETS, download_url.c_str()); snprintf(message, sizeof(message), Lang::Strings::FOUND_NEW_ASSETS, download_url.c_str());
Alert(Lang::Strings::LOADING_ASSETS, message, "cloud_arrow_down", Lang::Sounds::OGG_UPGRADE); Alert(Lang::Strings::LOADING_ASSETS, message, "cloud_arrow_down",
Lang::Sounds::OGG_UPGRADE);
// Wait for the audio service to be idle for 3 seconds // Wait for the audio service to be idle for 3 seconds
vTaskDelay(pdMS_TO_TICKS(3000)); vTaskDelay(pdMS_TO_TICKS(3000));
SetDeviceState(kDeviceStateUpgrading); SetDeviceState(kDeviceStateUpgrading);
board.SetPowerSaveLevel(PowerSaveLevel::PERFORMANCE); board.SetPowerSaveLevel(PowerSaveLevel::PERFORMANCE);
display->SetChatMessage("system", Lang::Strings::PLEASE_WAIT); display->SetChatMessage("system", Lang::Strings::PLEASE_WAIT);
bool success = assets.Download(download_url, [this, display](int progress, size_t speed) -> void { bool success =
char buffer[32]; assets.Download(download_url, [this, display](int progress, size_t speed) -> void {
snprintf(buffer, sizeof(buffer), "%d%% %uKB/s", progress, speed / 1024); char buffer[32];
Schedule([display, message = std::string(buffer)]() { snprintf(buffer, sizeof(buffer), "%d%% %uKB/s", progress, speed / 1024);
display->SetChatMessage("system", message.c_str()); Schedule([display, message = std::string(buffer)]() {
display->SetChatMessage("system", message.c_str());
});
}); });
});
board.SetPowerSaveLevel(PowerSaveLevel::LOW_POWER); board.SetPowerSaveLevel(PowerSaveLevel::LOW_POWER);
vTaskDelay(pdMS_TO_TICKS(1000)); vTaskDelay(pdMS_TO_TICKS(1000));
if (!success) { if (!success) {
Alert(Lang::Strings::ERROR, Lang::Strings::DOWNLOAD_ASSETS_FAILED, "circle_xmark", Lang::Sounds::OGG_EXCLAMATION); Alert(Lang::Strings::ERROR, Lang::Strings::DOWNLOAD_ASSETS_FAILED, "circle_xmark",
Lang::Sounds::OGG_EXCLAMATION);
vTaskDelay(pdMS_TO_TICKS(2000)); vTaskDelay(pdMS_TO_TICKS(2000));
SetDeviceState(kDeviceStateActivating); SetDeviceState(kDeviceStateActivating);
return; return;
@ -398,7 +414,7 @@ void Application::CheckAssetsVersion() {
void Application::CheckNewVersion() { void Application::CheckNewVersion() {
const int MAX_RETRY = 10; const int MAX_RETRY = 10;
int retry_count = 0; int retry_count = 0;
int retry_delay = 10; // Initial retry delay in seconds int retry_delay = 10; // Initial retry delay in seconds
auto& board = Board::GetInstance(); auto& board = Board::GetInstance();
while (true) { while (true) {
@ -414,27 +430,30 @@ void Application::CheckNewVersion() {
} }
char error_message[128]; char error_message[128];
snprintf(error_message, sizeof(error_message), "code=%d, url=%s", err, ota_->GetCheckVersionUrl().c_str()); snprintf(error_message, sizeof(error_message), "code=%d, url=%s", err,
ota_->GetCheckVersionUrl().c_str());
char buffer[256]; char buffer[256];
snprintf(buffer, sizeof(buffer), Lang::Strings::CHECK_NEW_VERSION_FAILED, retry_delay, error_message); snprintf(buffer, sizeof(buffer), Lang::Strings::CHECK_NEW_VERSION_FAILED, retry_delay,
error_message);
Alert(Lang::Strings::ERROR, buffer, "cloud_slash", Lang::Sounds::OGG_EXCLAMATION); Alert(Lang::Strings::ERROR, buffer, "cloud_slash", Lang::Sounds::OGG_EXCLAMATION);
ESP_LOGW(TAG, "Check new version failed, retry in %d seconds (%d/%d)", retry_delay, retry_count, MAX_RETRY); ESP_LOGW(TAG, "Check new version failed, retry in %d seconds (%d/%d)", retry_delay,
retry_count, MAX_RETRY);
for (int i = 0; i < retry_delay; i++) { for (int i = 0; i < retry_delay; i++) {
vTaskDelay(pdMS_TO_TICKS(1000)); vTaskDelay(pdMS_TO_TICKS(1000));
if (GetDeviceState() == kDeviceStateIdle) { if (GetDeviceState() == kDeviceStateIdle) {
break; break;
} }
} }
retry_delay *= 2; // Double the retry delay retry_delay *= 2; // Double the retry delay
continue; continue;
} }
retry_count = 0; retry_count = 0;
retry_delay = 10; // Reset retry delay retry_delay = 10; // Reset retry delay
if (ota_->HasNewVersion()) { if (ota_->HasNewVersion()) {
if (UpgradeFirmware(ota_->GetFirmwareUrl(), ota_->GetFirmwareVersion())) { if (UpgradeFirmware(ota_->GetFirmwareUrl(), ota_->GetFirmwareVersion())) {
return; // This line will never be reached after reboot return; // This line will never be reached after reboot
} }
// If upgrade failed, continue to normal operation // If upgrade failed, continue to normal operation
} }
@ -477,6 +496,9 @@ void Application::InitializeProtocol() {
display->SetStatus(Lang::Strings::LOADING_PROTOCOL); display->SetStatus(Lang::Strings::LOADING_PROTOCOL);
#if CONFIG_USE_DIRECT_WEBSOCKET
protocol_ = std::make_unique<WebsocketProtocol>();
#else
if (ota_->HasMqttConfig()) { if (ota_->HasMqttConfig()) {
protocol_ = std::make_unique<MqttProtocol>(); protocol_ = std::make_unique<MqttProtocol>();
} else if (ota_->HasWebsocketConfig()) { } else if (ota_->HasWebsocketConfig()) {
@ -485,52 +507,63 @@ void Application::InitializeProtocol() {
ESP_LOGW(TAG, "No protocol specified in the OTA config, using MQTT"); ESP_LOGW(TAG, "No protocol specified in the OTA config, using MQTT");
protocol_ = std::make_unique<MqttProtocol>(); protocol_ = std::make_unique<MqttProtocol>();
} }
#endif
protocol_->OnConnected([this]() { protocol_->OnConnected([this]() { DismissAlert(); });
DismissAlert();
});
protocol_->OnNetworkError([this](const std::string& message) { protocol_->OnNetworkError([this](const std::string& message) {
last_error_message_ = message; last_error_message_ = message;
xEventGroupSetBits(event_group_, MAIN_EVENT_ERROR); xEventGroupSetBits(event_group_, MAIN_EVENT_ERROR);
}); });
protocol_->OnIncomingAudio([this](std::unique_ptr<AudioStreamPacket> packet) { protocol_->OnIncomingAudio([this](std::unique_ptr<AudioStreamPacket> packet) {
if (GetDeviceState() == kDeviceStateSpeaking) { if (accepting_tts_audio_.load() || GetDeviceState() == kDeviceStateSpeaking) {
audio_service_.PushPacketToDecodeQueue(std::move(packet)); audio_service_.PushPacketToDecodeQueue(std::move(packet));
} }
}); });
protocol_->OnAudioChannelOpened([this, codec, &board]() { protocol_->OnAudioChannelOpened([this, codec, &board]() {
board.SetPowerSaveLevel(PowerSaveLevel::PERFORMANCE); board.SetPowerSaveLevel(PowerSaveLevel::PERFORMANCE);
if (protocol_->server_sample_rate() != codec->output_sample_rate()) { if (protocol_->server_sample_rate() != codec->output_sample_rate()) {
ESP_LOGW(TAG, "Server sample rate %d does not match device output sample rate %d, resampling may cause distortion", ESP_LOGW(TAG,
protocol_->server_sample_rate(), codec->output_sample_rate()); "Server sample rate %d does not match device output sample rate %d, "
"resampling may cause distortion",
protocol_->server_sample_rate(), codec->output_sample_rate());
} }
}); });
protocol_->OnAudioChannelClosed([this, &board]() { protocol_->OnAudioChannelClosed([this, &board]() {
board.SetPowerSaveLevel(PowerSaveLevel::LOW_POWER); board.SetPowerSaveLevel(PowerSaveLevel::LOW_POWER);
accepting_tts_audio_.store(false);
Schedule([this]() { Schedule([this]() {
if (GetDeviceState() == kDeviceStateConnecting) {
return;
}
auto display = Board::GetInstance().GetDisplay(); auto display = Board::GetInstance().GetDisplay();
display->SetChatMessage("system", ""); display->SetChatMessage("system", "");
SetDeviceState(kDeviceStateIdle); SetDeviceState(kDeviceStateIdle);
}); });
}); });
protocol_->OnIncomingJson([this, display](const cJSON* root) { protocol_->OnIncomingJson([this, display](const cJSON* root) {
// Parse JSON data // Parse JSON data
auto type = cJSON_GetObjectItem(root, "type"); auto type = cJSON_GetObjectItem(root, "type");
if (strcmp(type->valuestring, "tts") == 0) { if (strcmp(type->valuestring, "tts") == 0) {
auto state = cJSON_GetObjectItem(root, "state"); auto state = cJSON_GetObjectItem(root, "state");
if (strcmp(state->valuestring, "start") == 0) { if (strcmp(state->valuestring, "thinking") == 0) {
Schedule([this]() { SetDeviceState(kDeviceStateThinking); });
} else if (strcmp(state->valuestring, "start") == 0) {
audio_service_.ResetDecoder();
accepting_tts_audio_.store(true);
Schedule([this]() { Schedule([this]() {
aborted_ = false; aborted_ = false;
SetDeviceState(kDeviceStateSpeaking); SetDeviceState(kDeviceStateSpeaking);
}); });
} else if (strcmp(state->valuestring, "stop") == 0) { } else if (strcmp(state->valuestring, "stop") == 0) {
accepting_tts_audio_.store(false);
Schedule([this]() { Schedule([this]() {
if (GetDeviceState() == kDeviceStateSpeaking) { auto state = GetDeviceState();
if (state == kDeviceStateSpeaking || state == kDeviceStateThinking) {
if (listening_mode_ == kListeningModeManualStop) { if (listening_mode_ == kListeningModeManualStop) {
SetDeviceState(kDeviceStateIdle); SetDeviceState(kDeviceStateIdle);
} else { } else {
@ -573,9 +606,7 @@ void Application::InitializeProtocol() {
ESP_LOGI(TAG, "System command: %s", command->valuestring); ESP_LOGI(TAG, "System command: %s", command->valuestring);
if (strcmp(command->valuestring, "reboot") == 0) { if (strcmp(command->valuestring, "reboot") == 0) {
// Do a reboot if user requests a OTA update // Do a reboot if user requests a OTA update
Schedule([this]() { Schedule([this]() { Reboot(); });
Reboot();
});
} else { } else {
ESP_LOGW(TAG, "Unknown system command: %s", command->valuestring); ESP_LOGW(TAG, "Unknown system command: %s", command->valuestring);
} }
@ -585,7 +616,8 @@ void Application::InitializeProtocol() {
auto message = cJSON_GetObjectItem(root, "message"); auto message = cJSON_GetObjectItem(root, "message");
auto emotion = cJSON_GetObjectItem(root, "emotion"); auto emotion = cJSON_GetObjectItem(root, "emotion");
if (cJSON_IsString(status) && cJSON_IsString(message) && cJSON_IsString(emotion)) { if (cJSON_IsString(status) && cJSON_IsString(message) && cJSON_IsString(emotion)) {
Alert(status->valuestring, message->valuestring, emotion->valuestring, Lang::Sounds::OGG_VIBRATION); Alert(status->valuestring, message->valuestring, emotion->valuestring,
Lang::Sounds::OGG_VIBRATION);
} else { } else {
ESP_LOGW(TAG, "Alert command requires status, message and emotion"); ESP_LOGW(TAG, "Alert command requires status, message and emotion");
} }
@ -594,9 +626,10 @@ void Application::InitializeProtocol() {
auto payload = cJSON_GetObjectItem(root, "payload"); auto payload = cJSON_GetObjectItem(root, "payload");
ESP_LOGI(TAG, "Received custom message: %s", cJSON_PrintUnformatted(root)); ESP_LOGI(TAG, "Received custom message: %s", cJSON_PrintUnformatted(root));
if (cJSON_IsObject(payload)) { if (cJSON_IsObject(payload)) {
Schedule([this, display, payload_str = std::string(cJSON_PrintUnformatted(payload))]() { Schedule(
display->SetChatMessage("system", payload_str.c_str()); [this, display, payload_str = std::string(cJSON_PrintUnformatted(payload))]() {
}); display->SetChatMessage("system", payload_str.c_str());
});
} else { } else {
ESP_LOGW(TAG, "Invalid custom message format: missing payload"); ESP_LOGW(TAG, "Invalid custom message format: missing payload");
} }
@ -605,7 +638,7 @@ void Application::InitializeProtocol() {
ESP_LOGW(TAG, "Unknown message type: %s", type->valuestring); ESP_LOGW(TAG, "Unknown message type: %s", type->valuestring);
} }
}); });
protocol_->Start(); protocol_->Start();
} }
@ -614,32 +647,27 @@ void Application::ShowActivationCode(const std::string& code, const std::string&
char digit; char digit;
const std::string_view& sound; const std::string_view& sound;
}; };
static const std::array<digit_sound, 10> digit_sounds{{ static const std::array<digit_sound, 10> digit_sounds{
digit_sound{'0', Lang::Sounds::OGG_0}, {digit_sound{'0', Lang::Sounds::OGG_0}, digit_sound{'1', Lang::Sounds::OGG_1},
digit_sound{'1', Lang::Sounds::OGG_1}, digit_sound{'2', Lang::Sounds::OGG_2}, digit_sound{'3', Lang::Sounds::OGG_3},
digit_sound{'2', Lang::Sounds::OGG_2}, digit_sound{'4', Lang::Sounds::OGG_4}, digit_sound{'5', Lang::Sounds::OGG_5},
digit_sound{'3', Lang::Sounds::OGG_3}, digit_sound{'6', Lang::Sounds::OGG_6}, digit_sound{'7', Lang::Sounds::OGG_7},
digit_sound{'4', Lang::Sounds::OGG_4}, digit_sound{'8', Lang::Sounds::OGG_8}, digit_sound{'9', Lang::Sounds::OGG_9}}};
digit_sound{'5', Lang::Sounds::OGG_5},
digit_sound{'6', Lang::Sounds::OGG_6},
digit_sound{'7', Lang::Sounds::OGG_7},
digit_sound{'8', Lang::Sounds::OGG_8},
digit_sound{'9', Lang::Sounds::OGG_9}
}};
// This sentence uses 9KB of SRAM, so we need to wait for it to finish // This sentence uses 9KB of SRAM, so we need to wait for it to finish
Alert(Lang::Strings::ACTIVATION, message.c_str(), "link", Lang::Sounds::OGG_ACTIVATION); Alert(Lang::Strings::ACTIVATION, message.c_str(), "link", Lang::Sounds::OGG_ACTIVATION);
for (const auto& digit : code) { for (const auto& digit : code) {
auto it = std::find_if(digit_sounds.begin(), digit_sounds.end(), auto it = std::find_if(digit_sounds.begin(), digit_sounds.end(),
[digit](const digit_sound& ds) { return ds.digit == digit; }); [digit](const digit_sound& ds) { return ds.digit == digit; });
if (it != digit_sounds.end()) { if (it != digit_sounds.end()) {
audio_service_.PlaySound(it->sound); audio_service_.PlaySound(it->sound);
} }
} }
} }
void Application::Alert(const char* status, const char* message, const char* emotion, const std::string_view& sound) { void Application::Alert(const char* status, const char* message, const char* emotion,
const std::string_view& sound) {
ESP_LOGW(TAG, "Alert [%s] %s: %s", emotion, status, message); ESP_LOGW(TAG, "Alert [%s] %s: %s", emotion, status, message);
auto display = Board::GetInstance().GetDisplay(); auto display = Board::GetInstance().GetDisplay();
display->SetStatus(status); display->SetStatus(status);
@ -659,21 +687,44 @@ void Application::DismissAlert() {
} }
} }
void Application::ToggleChatState() { void Application::ToggleChatState() { ToggleChatStateForMode(kChatAgentModeNormal, false); }
void Application::ToggleChatStateWithVision() {
ToggleChatStateForMode(kChatAgentModeNormal, true);
}
void Application::ToggleChatStateForMode(ChatAgentMode agent_mode, bool vision_enabled) {
chat_agent_mode_.store(agent_mode);
vision_text_mode_enabled_.store(vision_enabled);
vision_frame_sent_for_current_listen_.store(false);
xEventGroupSetBits(event_group_, MAIN_EVENT_TOGGLE_CHAT); xEventGroupSetBits(event_group_, MAIN_EVENT_TOGGLE_CHAT);
} }
bool Application::IsVisionTextModeEnabled() const { return vision_text_mode_enabled_.load(); }
const char* Application::GetChatAgentModeName() const {
return chat_agent_mode_.load() == kChatAgentModeBeaver ? "beaver" : "normal";
}
const char* Application::GetChatModeName() const {
bool vision_enabled = vision_text_mode_enabled_.load();
if (chat_agent_mode_.load() == kChatAgentModeBeaver) {
return vision_enabled ? "vision-beaver" : "beaver";
}
return vision_enabled ? "vision-normal" : "normal";
}
void Application::StartListening() { void Application::StartListening() {
vision_text_mode_enabled_.store(false);
vision_frame_sent_for_current_listen_.store(false);
xEventGroupSetBits(event_group_, MAIN_EVENT_START_LISTENING); xEventGroupSetBits(event_group_, MAIN_EVENT_START_LISTENING);
} }
void Application::StopListening() { void Application::StopListening() { xEventGroupSetBits(event_group_, MAIN_EVENT_STOP_LISTENING); }
xEventGroupSetBits(event_group_, MAIN_EVENT_STOP_LISTENING);
}
void Application::HandleToggleChatEvent() { void Application::HandleToggleChatEvent() {
auto state = GetDeviceState(); auto state = GetDeviceState();
if (state == kDeviceStateActivating) { if (state == kDeviceStateActivating) {
SetDeviceState(kDeviceStateIdle); SetDeviceState(kDeviceStateIdle);
return; return;
@ -694,17 +745,22 @@ void Application::HandleToggleChatEvent() {
if (state == kDeviceStateIdle) { if (state == kDeviceStateIdle) {
ListeningMode mode = GetDefaultListeningMode(); ListeningMode mode = GetDefaultListeningMode();
if (!protocol_->IsAudioChannelOpened()) { bool agent_mode_changed = chat_agent_mode_.load() != active_chat_agent_mode_.load();
bool vision_mode_changed =
vision_text_mode_enabled_.load() != active_vision_text_mode_enabled_.load();
if (!protocol_->IsAudioChannelOpened() || agent_mode_changed || vision_mode_changed) {
if (protocol_->IsAudioChannelOpened()) {
protocol_->CloseAudioChannel();
}
SetDeviceState(kDeviceStateConnecting); SetDeviceState(kDeviceStateConnecting);
// Schedule to let the state change be processed first (UI update) // Schedule to let the state change be processed first (UI update)
Schedule([this, mode]() { Schedule([this, mode]() { ContinueOpenAudioChannel(mode); });
ContinueOpenAudioChannel(mode);
});
return; return;
} }
SetListeningMode(mode); SetListeningMode(mode);
} else if (state == kDeviceStateSpeaking) { } else if (state == kDeviceStateSpeaking || state == kDeviceStateThinking) {
AbortSpeaking(kAbortReasonNone); AbortSpeaking(kAbortReasonNone);
SetListeningMode(GetDefaultListeningMode());
} else if (state == kDeviceStateListening) { } else if (state == kDeviceStateListening) {
protocol_->CloseAudioChannel(); protocol_->CloseAudioChannel();
} }
@ -726,12 +782,14 @@ void Application::ContinueOpenAudioChannel(ListeningMode mode) {
} }
} }
active_chat_agent_mode_.store(chat_agent_mode_.load());
active_vision_text_mode_enabled_.store(vision_text_mode_enabled_.load());
SetListeningMode(mode); SetListeningMode(mode);
} }
void Application::HandleStartListeningEvent() { void Application::HandleStartListeningEvent() {
auto state = GetDeviceState(); auto state = GetDeviceState();
if (state == kDeviceStateActivating) { if (state == kDeviceStateActivating) {
SetDeviceState(kDeviceStateIdle); SetDeviceState(kDeviceStateIdle);
return; return;
@ -745,18 +803,16 @@ void Application::HandleStartListeningEvent() {
ESP_LOGE(TAG, "Protocol not initialized"); ESP_LOGE(TAG, "Protocol not initialized");
return; return;
} }
if (state == kDeviceStateIdle) { if (state == kDeviceStateIdle) {
if (!protocol_->IsAudioChannelOpened()) { if (!protocol_->IsAudioChannelOpened()) {
SetDeviceState(kDeviceStateConnecting); SetDeviceState(kDeviceStateConnecting);
// Schedule to let the state change be processed first (UI update) // Schedule to let the state change be processed first (UI update)
Schedule([this]() { Schedule([this]() { ContinueOpenAudioChannel(kListeningModeManualStop); });
ContinueOpenAudioChannel(kListeningModeManualStop);
});
return; return;
} }
SetListeningMode(kListeningModeManualStop); SetListeningMode(kListeningModeManualStop);
} else if (state == kDeviceStateSpeaking) { } else if (state == kDeviceStateSpeaking || state == kDeviceStateThinking) {
AbortSpeaking(kAbortReasonNone); AbortSpeaking(kAbortReasonNone);
SetListeningMode(kListeningModeManualStop); SetListeningMode(kListeningModeManualStop);
} }
@ -764,7 +820,7 @@ void Application::HandleStartListeningEvent() {
void Application::HandleStopListeningEvent() { void Application::HandleStopListeningEvent() {
auto state = GetDeviceState(); auto state = GetDeviceState();
if (state == kDeviceStateAudioTesting) { if (state == kDeviceStateAudioTesting) {
audio_service_.EnableAudioTesting(false); audio_service_.EnableAudioTesting(false);
SetDeviceState(kDeviceStateWifiConfiguring); SetDeviceState(kDeviceStateWifiConfiguring);
@ -794,17 +850,14 @@ void Application::HandleWakeWordDetectedEvent() {
SetDeviceState(kDeviceStateConnecting); SetDeviceState(kDeviceStateConnecting);
// Schedule to let the state change be processed first (UI update), // Schedule to let the state change be processed first (UI update),
// then continue with OpenAudioChannel which may block for ~1 second // then continue with OpenAudioChannel which may block for ~1 second
Schedule([this, wake_word]() { Schedule([this, wake_word]() { ContinueWakeWordInvoke(wake_word); });
ContinueWakeWordInvoke(wake_word);
});
return; return;
} }
// Channel already opened, continue directly // Channel already opened, continue directly
ContinueWakeWordInvoke(wake_word); ContinueWakeWordInvoke(wake_word);
} else if (state == kDeviceStateSpeaking || state == kDeviceStateListening) { } else if (state == kDeviceStateSpeaking || state == kDeviceStateThinking ||
state == kDeviceStateListening) {
AbortSpeaking(kAbortReasonWakeWordDetected); AbortSpeaking(kAbortReasonWakeWordDetected);
// Clear send queue to avoid sending residues to server
while (audio_service_.PopPacketFromSendQueue());
if (state == kDeviceStateListening) { if (state == kDeviceStateListening) {
protocol_->SendStartListening(GetDefaultListeningMode()); protocol_->SendStartListening(GetDefaultListeningMode());
@ -865,13 +918,14 @@ void Application::HandleStateChangedEvent() {
auto display = board.GetDisplay(); auto display = board.GetDisplay();
auto led = board.GetLed(); auto led = board.GetLed();
led->OnStateChanged(); led->OnStateChanged();
switch (new_state) { switch (new_state) {
case kDeviceStateUnknown: case kDeviceStateUnknown:
case kDeviceStateIdle: case kDeviceStateIdle:
vision_frame_sent_for_current_listen_.store(false);
display->SetStatus(Lang::Strings::STANDBY); display->SetStatus(Lang::Strings::STANDBY);
display->ClearChatMessages(); // Clear messages first display->ClearChatMessages(); // Clear messages first
display->SetEmotion("neutral"); // Then set emotion (wechat mode checks child count) display->SetEmotion("neutral"); // Then set emotion (wechat mode checks child count)
audio_service_.EnableVoiceProcessing(false); audio_service_.EnableVoiceProcessing(false);
audio_service_.EnableWakeWordDetection(true); audio_service_.EnableWakeWordDetection(true);
break; break;
@ -881,21 +935,19 @@ void Application::HandleStateChangedEvent() {
display->SetChatMessage("system", ""); display->SetChatMessage("system", "");
break; break;
case kDeviceStateListening: case kDeviceStateListening:
vad_speaking_.store(false);
vision_frame_sent_for_current_listen_.store(false);
display->SetStatus(Lang::Strings::LISTENING); display->SetStatus(Lang::Strings::LISTENING);
display->SetEmotion("neutral"); display->SetEmotion("neutral");
// Make sure the audio processor is running // Re-entering listening after an interrupt must restart the capture path even if the
if (play_popup_on_listening_ || !audio_service_.IsAudioProcessorRunning()) { // processor task is still marked running, otherwise realtime mode can show Listening
// For auto mode, wait for playback queue to be empty before enabling voice processing // while no fresh mic frames are sent.
// This prevents audio truncation when STOP arrives late due to network jitter if (listening_mode_ == kListeningModeAutoStop) {
if (listening_mode_ == kListeningModeAutoStop) { audio_service_.WaitForPlaybackQueueEmpty();
audio_service_.WaitForPlaybackQueueEmpty();
}
// Send the start listening command
protocol_->SendStartListening(listening_mode_);
audio_service_.EnableVoiceProcessing(true);
} }
protocol_->SendStartListening(listening_mode_);
audio_service_.EnableVoiceProcessing(true);
#ifdef CONFIG_WAKE_WORD_DETECTION_IN_LISTENING #ifdef CONFIG_WAKE_WORD_DETECTION_IN_LISTENING
// Enable wake word detection in listening mode (configured via Kconfig) // Enable wake word detection in listening mode (configured via Kconfig)
@ -904,13 +956,23 @@ void Application::HandleStateChangedEvent() {
// Disable wake word detection in listening mode // Disable wake word detection in listening mode
audio_service_.EnableWakeWordDetection(false); audio_service_.EnableWakeWordDetection(false);
#endif #endif
// Play popup sound after ResetDecoder (in EnableVoiceProcessing) has been called // Play popup sound after ResetDecoder (in EnableVoiceProcessing) has been called
if (play_popup_on_listening_) { if (play_popup_on_listening_) {
play_popup_on_listening_ = false; play_popup_on_listening_ = false;
audio_service_.PlaySound(Lang::Sounds::OGG_POPUP); audio_service_.PlaySound(Lang::Sounds::OGG_POPUP);
} }
break; break;
case kDeviceStateThinking:
vad_speaking_.store(false);
display->SetStatus(Lang::Strings::THINKING);
display->SetEmotion("thinking");
if (listening_mode_ != kListeningModeRealtime) {
audio_service_.EnableVoiceProcessing(false);
audio_service_.EnableWakeWordDetection(audio_service_.IsAfeWakeWord());
}
break;
case kDeviceStateSpeaking: case kDeviceStateSpeaking:
display->SetStatus(Lang::Strings::SPEAKING); display->SetStatus(Lang::Strings::SPEAKING);
@ -919,7 +981,9 @@ void Application::HandleStateChangedEvent() {
// Only AFE wake word can be detected in speaking mode // Only AFE wake word can be detected in speaking mode
audio_service_.EnableWakeWordDetection(audio_service_.IsAfeWakeWord()); audio_service_.EnableWakeWordDetection(audio_service_.IsAfeWakeWord());
} }
audio_service_.ResetDecoder(); if (!accepting_tts_audio_.load()) {
audio_service_.ResetDecoder();
}
break; break;
case kDeviceStateWifiConfiguring: case kDeviceStateWifiConfiguring:
audio_service_.EnableVoiceProcessing(false); audio_service_.EnableVoiceProcessing(false);
@ -931,6 +995,27 @@ void Application::HandleStateChangedEvent() {
} }
} }
bool Application::SendCurrentVisionFrame() {
if (!protocol_ || !protocol_->IsAudioChannelOpened()) {
return false;
}
auto camera = Board::GetInstance().GetCamera();
if (camera == nullptr) {
return false;
}
std::string jpeg_data;
if (!camera->CaptureToJpeg(jpeg_data, true)) {
ESP_LOGW(TAG, "Failed to capture vision frame");
return false;
}
protocol_->SendVisionFrame(jpeg_data);
ESP_LOGI(TAG, "Sent vision frame, size=%u bytes", static_cast<unsigned>(jpeg_data.size()));
return true;
}
void Application::Schedule(std::function<void()>&& callback) { void Application::Schedule(std::function<void()>&& callback) {
{ {
std::lock_guard<std::mutex> lock(mutex_); std::lock_guard<std::mutex> lock(mutex_);
@ -942,6 +1027,8 @@ void Application::Schedule(std::function<void()>&& callback) {
void Application::AbortSpeaking(AbortReason reason) { void Application::AbortSpeaking(AbortReason reason) {
ESP_LOGI(TAG, "Abort speaking"); ESP_LOGI(TAG, "Abort speaking");
aborted_ = true; aborted_ = true;
accepting_tts_audio_.store(false);
audio_service_.ResetDecoder();
if (protocol_) { if (protocol_) {
protocol_->SendAbortSpeaking(reason); protocol_->SendAbortSpeaking(reason);
} }
@ -949,6 +1036,8 @@ void Application::AbortSpeaking(AbortReason reason) {
void Application::SetListeningMode(ListeningMode mode) { void Application::SetListeningMode(ListeningMode mode) {
listening_mode_ = mode; listening_mode_ = mode;
vad_speaking_.store(false);
vision_frame_sent_for_current_listen_.store(false);
SetDeviceState(kDeviceStateListening); SetDeviceState(kDeviceStateListening);
} }
@ -983,7 +1072,8 @@ bool Application::UpgradeFirmware(const std::string& url, const std::string& ver
} }
ESP_LOGI(TAG, "Starting firmware upgrade from URL: %s", upgrade_url.c_str()); ESP_LOGI(TAG, "Starting firmware upgrade from URL: %s", upgrade_url.c_str());
Alert(Lang::Strings::OTA_UPGRADE, Lang::Strings::UPGRADING, "download", Lang::Sounds::OGG_UPGRADE); Alert(Lang::Strings::OTA_UPGRADE, Lang::Strings::UPGRADING, "download",
Lang::Sounds::OGG_UPGRADE);
vTaskDelay(pdMS_TO_TICKS(3000)); vTaskDelay(pdMS_TO_TICKS(3000));
SetDeviceState(kDeviceStateUpgrading); SetDeviceState(kDeviceStateUpgrading);
@ -1005,17 +1095,19 @@ bool Application::UpgradeFirmware(const std::string& url, const std::string& ver
if (!upgrade_success) { if (!upgrade_success) {
// Upgrade failed, restart audio service and continue running // Upgrade failed, restart audio service and continue running
ESP_LOGE(TAG, "Firmware upgrade failed, restarting audio service and continuing operation..."); ESP_LOGE(TAG,
audio_service_.Start(); // Restart audio service "Firmware upgrade failed, restarting audio service and continuing operation...");
board.SetPowerSaveLevel(PowerSaveLevel::LOW_POWER); // Restore power save level audio_service_.Start(); // Restart audio service
Alert(Lang::Strings::ERROR, Lang::Strings::UPGRADE_FAILED, "circle_xmark", Lang::Sounds::OGG_EXCLAMATION); board.SetPowerSaveLevel(PowerSaveLevel::LOW_POWER); // Restore power save level
Alert(Lang::Strings::ERROR, Lang::Strings::UPGRADE_FAILED, "circle_xmark",
Lang::Sounds::OGG_EXCLAMATION);
vTaskDelay(pdMS_TO_TICKS(3000)); vTaskDelay(pdMS_TO_TICKS(3000));
return false; return false;
} else { } else {
// Upgrade success, reboot immediately // Upgrade success, reboot immediately
ESP_LOGI(TAG, "Firmware upgrade successful, rebooting..."); ESP_LOGI(TAG, "Firmware upgrade successful, rebooting...");
display->SetChatMessage("system", "Upgrade successful, rebooting..."); display->SetChatMessage("system", "Upgrade successful, rebooting...");
vTaskDelay(pdMS_TO_TICKS(1000)); // Brief pause to show message vTaskDelay(pdMS_TO_TICKS(1000)); // Brief pause to show message
Reboot(); Reboot();
return true; return true;
} }
@ -1027,25 +1119,21 @@ void Application::WakeWordInvoke(const std::string& wake_word) {
} }
auto state = GetDeviceState(); auto state = GetDeviceState();
if (state == kDeviceStateIdle) { if (state == kDeviceStateIdle) {
audio_service_.EncodeWakeWord(); audio_service_.EncodeWakeWord();
if (!protocol_->IsAudioChannelOpened()) { if (!protocol_->IsAudioChannelOpened()) {
SetDeviceState(kDeviceStateConnecting); SetDeviceState(kDeviceStateConnecting);
// Schedule to let the state change be processed first (UI update) // Schedule to let the state change be processed first (UI update)
Schedule([this, wake_word]() { Schedule([this, wake_word]() { ContinueWakeWordInvoke(wake_word); });
ContinueWakeWordInvoke(wake_word);
});
return; return;
} }
// Channel already opened, continue directly // Channel already opened, continue directly
ContinueWakeWordInvoke(wake_word); ContinueWakeWordInvoke(wake_word);
} else if (state == kDeviceStateSpeaking) { } else if (state == kDeviceStateSpeaking || state == kDeviceStateThinking) {
Schedule([this]() { Schedule([this]() { AbortSpeaking(kAbortReasonNone); });
AbortSpeaking(kAbortReasonNone); } else if (state == kDeviceStateListening) {
});
} else if (state == kDeviceStateListening) {
Schedule([this]() { Schedule([this]() {
if (protocol_) { if (protocol_) {
protocol_->CloseAudioChannel(); protocol_->CloseAudioChannel();
@ -1077,7 +1165,7 @@ void Application::RegisterMcpBroadcastCallback(std::function<void(const std::str
void Application::SendMcpMessage(const std::string& payload) { void Application::SendMcpMessage(const std::string& payload) {
// Always schedule to run in main task for thread safety // Always schedule to run in main task for thread safety
Schedule([this, payload](){ Schedule([this, payload]() {
if (protocol_) { if (protocol_) {
protocol_->SendMcpMessage(payload); protocol_->SendMcpMessage(payload);
} }
@ -1093,18 +1181,18 @@ void Application::SetAecMode(AecMode mode) {
auto& board = Board::GetInstance(); auto& board = Board::GetInstance();
auto display = board.GetDisplay(); auto display = board.GetDisplay();
switch (aec_mode_) { switch (aec_mode_) {
case kAecOff: case kAecOff:
audio_service_.EnableDeviceAec(false); audio_service_.EnableDeviceAec(false);
display->ShowNotification(Lang::Strings::RTC_MODE_OFF); display->ShowNotification(Lang::Strings::RTC_MODE_OFF);
break; break;
case kAecOnServerSide: case kAecOnServerSide:
audio_service_.EnableDeviceAec(false); audio_service_.EnableDeviceAec(false);
display->ShowNotification(Lang::Strings::RTC_MODE_ON); display->ShowNotification(Lang::Strings::RTC_MODE_ON);
break; break;
case kAecOnDeviceSide: case kAecOnDeviceSide:
audio_service_.EnableDeviceAec(true); audio_service_.EnableDeviceAec(true);
display->ShowNotification(Lang::Strings::RTC_MODE_ON); display->ShowNotification(Lang::Strings::RTC_MODE_ON);
break; break;
} }
// If the AEC mode is changed, close the audio channel // If the AEC mode is changed, close the audio channel
@ -1114,9 +1202,7 @@ void Application::SetAecMode(AecMode mode) {
}); });
} }
void Application::PlaySound(const std::string_view& sound) { void Application::PlaySound(const std::string_view& sound) { audio_service_.PlaySound(sound); }
audio_service_.PlaySound(sound);
}
void Application::ResetProtocol() { void Application::ResetProtocol() {
Schedule([this]() { Schedule([this]() {
@ -1128,4 +1214,3 @@ void Application::ResetProtocol() {
protocol_.reset(); protocol_.reset();
}); });
} }

View File

@ -11,6 +11,7 @@
#include <deque> #include <deque>
#include <memory> #include <memory>
#include <functional> #include <functional>
#include <atomic>
#include "protocol.h" #include "protocol.h"
#include "ota.h" #include "ota.h"
@ -40,6 +41,11 @@ enum AecMode {
kAecOnServerSide, kAecOnServerSide,
}; };
enum ChatAgentMode {
kChatAgentModeNormal,
kChatAgentModeBeaver,
};
class Application { class Application {
public: public:
static Application& GetInstance() { static Application& GetInstance() {
@ -91,6 +97,12 @@ public:
* Sends MAIN_EVENT_TOGGLE_CHAT to be handled in Run() * Sends MAIN_EVENT_TOGGLE_CHAT to be handled in Run()
*/ */
void ToggleChatState(); void ToggleChatState();
void ToggleChatStateWithVision();
void ToggleChatStateForMode(ChatAgentMode agent_mode, bool vision_enabled);
bool IsVisionTextModeEnabled() const;
ChatAgentMode GetChatAgentMode() const { return chat_agent_mode_.load(); }
const char* GetChatAgentModeName() const;
const char* GetChatModeName() const;
/** /**
* Start listening (event-based, thread-safe) * Start listening (event-based, thread-safe)
@ -144,6 +156,13 @@ private:
bool aborted_ = false; bool aborted_ = false;
bool assets_version_checked_ = false; bool assets_version_checked_ = false;
bool play_popup_on_listening_ = false; // Flag to play popup sound after state changes to listening bool play_popup_on_listening_ = false; // Flag to play popup sound after state changes to listening
std::atomic<ChatAgentMode> chat_agent_mode_ = kChatAgentModeNormal;
std::atomic<ChatAgentMode> active_chat_agent_mode_ = kChatAgentModeNormal;
std::atomic<bool> vision_text_mode_enabled_ = false;
std::atomic<bool> active_vision_text_mode_enabled_ = false;
std::atomic<bool> vad_speaking_ = false;
std::atomic<bool> vision_frame_sent_for_current_listen_ = false;
std::atomic<bool> accepting_tts_audio_ = false;
int clock_ticks_ = 0; int clock_ticks_ = 0;
TaskHandle_t activation_task_handle_ = nullptr; TaskHandle_t activation_task_handle_ = nullptr;
@ -159,6 +178,7 @@ private:
void HandleWakeWordDetectedEvent(); void HandleWakeWordDetectedEvent();
void ContinueOpenAudioChannel(ListeningMode mode); void ContinueOpenAudioChannel(ListeningMode mode);
void ContinueWakeWordInvoke(const std::string& wake_word); void ContinueWakeWordInvoke(const std::string& wake_word);
bool SendCurrentVisionFrame();
// Activation task (runs in background) // Activation task (runs in background)
void ActivationTask(); void ActivationTask();

View File

@ -26,6 +26,7 @@
"CONNECTION_SUCCESSFUL": "Connection Successful", "CONNECTION_SUCCESSFUL": "Connection Successful",
"CONNECTED_TO": "Connected to ", "CONNECTED_TO": "Connected to ",
"LISTENING": "Listening...", "LISTENING": "Listening...",
"THINKING": "Thinking...",
"SPEAKING": "Speaking...", "SPEAKING": "Speaking...",
"SERVER_NOT_FOUND": "Looking for available service", "SERVER_NOT_FOUND": "Looking for available service",
"SERVER_NOT_CONNECTED": "Unable to connect to service, please try again later", "SERVER_NOT_CONNECTED": "Unable to connect to service, please try again later",
@ -56,4 +57,4 @@
"LOADING_ASSETS": "Loading assets...", "LOADING_ASSETS": "Loading assets...",
"HELLO_MY_FRIEND": "Hello, my friend!" "HELLO_MY_FRIEND": "Hello, my friend!"
} }
} }

View File

@ -23,6 +23,7 @@
"CONNECTING": "连接中...", "CONNECTING": "连接中...",
"CONNECTED_TO": "已连接 ", "CONNECTED_TO": "已连接 ",
"LISTENING": "聆听中...", "LISTENING": "聆听中...",
"THINKING": "思考中...",
"SPEAKING": "说话中...", "SPEAKING": "说话中...",
"SERVER_NOT_FOUND": "正在寻找可用服务", "SERVER_NOT_FOUND": "正在寻找可用服务",
"SERVER_NOT_CONNECTED": "无法连接服务,请稍后再试", "SERVER_NOT_CONNECTED": "无法连接服务,请稍后再试",
@ -56,4 +57,4 @@
"FLIGHT_MODE_OFF": "飞行模式已关闭", "FLIGHT_MODE_OFF": "飞行模式已关闭",
"FLIGHT_MODE_ON": "飞行模式已开启" "FLIGHT_MODE_ON": "飞行模式已开启"
} }
} }

View File

@ -579,6 +579,7 @@ void AudioService::EnableWakeWordDetection(bool enable) {
void AudioService::EnableVoiceProcessing(bool enable) { void AudioService::EnableVoiceProcessing(bool enable) {
ESP_LOGD(TAG, "%s voice processing", enable ? "Enabling" : "Disabling"); ESP_LOGD(TAG, "%s voice processing", enable ? "Enabling" : "Disabling");
if (enable) { if (enable) {
bool was_running = IsAudioProcessorRunning();
if (!audio_processor_initialized_) { if (!audio_processor_initialized_) {
audio_processor_->Initialize(codec_, OPUS_FRAME_DURATION_MS, models_list_); audio_processor_->Initialize(codec_, OPUS_FRAME_DURATION_MS, models_list_);
audio_processor_initialized_ = true; audio_processor_initialized_ = true;
@ -586,7 +587,7 @@ void AudioService::EnableVoiceProcessing(bool enable) {
/* We should make sure no audio is playing */ /* We should make sure no audio is playing */
ResetDecoder(); ResetDecoder();
audio_input_need_warmup_ = true; audio_input_need_warmup_ = !was_running;
// Reset input resampler to clear cached data from previous mode (e.g. WakeWord) // Reset input resampler to clear cached data from previous mode (e.g. WakeWord)
// This prevents buffer overflow when switching between different feed sizes // This prevents buffer overflow when switching between different feed sizes
{ {

View File

@ -0,0 +1,177 @@
#include "background_capture_service.h"
#include "board.h"
#include "camera.h"
#include <algorithm>
#include <esp_heap_caps.h>
#include <esp_log.h>
#define TAG "BgCapture"
BackgroundCaptureService::BackgroundCaptureService() = default;
BackgroundCaptureService::~BackgroundCaptureService() {
Stop();
}
void BackgroundCaptureService::Start() {
#if CONFIG_BACKGROUND_CAPTURE_ENABLE
if (running_.exchange(true)) {
return;
}
auto result = xTaskCreate(
&BackgroundCaptureService::TaskEntry,
"bg_capture",
CONFIG_BACKGROUND_CAPTURE_TASK_STACK_SIZE,
this,
CONFIG_BACKGROUND_CAPTURE_TASK_PRIORITY,
&task_handle_);
if (result != pdPASS) {
running_.store(false);
task_handle_ = nullptr;
ESP_LOGE(TAG, "Failed to create background capture task");
}
#endif
}
void BackgroundCaptureService::Stop() {
#if CONFIG_BACKGROUND_CAPTURE_ENABLE
if (!running_.exchange(false)) {
return;
}
while (task_handle_ != nullptr) {
vTaskDelay(pdMS_TO_TICKS(20));
}
#endif
}
void BackgroundCaptureService::TaskEntry(void* arg) {
#if CONFIG_BACKGROUND_CAPTURE_ENABLE
auto* service = static_cast<BackgroundCaptureService*>(arg);
service->Run();
service->task_handle_ = nullptr;
#else
(void)arg;
#endif
vTaskDelete(nullptr);
}
void BackgroundCaptureService::Run() {
#if CONFIG_BACKGROUND_CAPTURE_ENABLE
ESP_LOGI(TAG, "Background capture task started");
while (running_.load()) {
if (!CaptureAndSendFrame()) {
consecutive_failures_++;
auto delay_ms = GetFailureDelayMs();
ESP_LOGW(TAG, "Background capture retry in %u ms, failures=%u",
delay_ms, consecutive_failures_);
vTaskDelay(pdMS_TO_TICKS(delay_ms));
continue;
}
consecutive_failures_ = 0;
vTaskDelay(pdMS_TO_TICKS(CONFIG_BACKGROUND_CAPTURE_FRAME_INTERVAL_MS));
}
ESP_LOGI(TAG, "Background capture task stopped");
#endif
}
bool BackgroundCaptureService::CaptureAndSendFrame() {
#if CONFIG_BACKGROUND_CAPTURE_ENABLE
const size_t free_internal_heap = heap_caps_get_free_size(MALLOC_CAP_INTERNAL);
if (free_internal_heap < CONFIG_BACKGROUND_CAPTURE_MIN_FREE_INTERNAL_HEAP) {
ESP_LOGW(TAG, "Skip background capture, low internal heap: free=%u threshold=%u",
static_cast<unsigned>(free_internal_heap),
static_cast<unsigned>(CONFIG_BACKGROUND_CAPTURE_MIN_FREE_INTERNAL_HEAP));
return false;
}
auto camera = Board::GetInstance().GetCamera();
if (camera == nullptr) {
ESP_LOGW(TAG, "No camera available for background capture");
return false;
}
std::string jpeg_data;
if (!camera->CaptureToJpeg(jpeg_data, false)) {
ESP_LOGW(TAG, "Failed to capture background frame");
return false;
}
if (jpeg_data.empty()) {
ESP_LOGW(TAG, "Captured empty background frame");
return false;
}
return UploadJpegFrame(jpeg_data);
#else
return false;
#endif
}
uint32_t BackgroundCaptureService::GetFailureDelayMs() const {
#if CONFIG_BACKGROUND_CAPTURE_ENABLE
const uint32_t base_delay_ms = CONFIG_BACKGROUND_CAPTURE_RETRY_INTERVAL_MS;
const uint32_t max_delay_ms = CONFIG_BACKGROUND_CAPTURE_MAX_BACKOFF_MS;
const uint32_t shift = std::min<uint32_t>(consecutive_failures_ - 1, 4);
return std::min<uint32_t>(base_delay_ms << shift, max_delay_ms);
#else
return 0;
#endif
}
bool BackgroundCaptureService::UploadJpegFrame(const std::string& jpeg_data) {
#if CONFIG_BACKGROUND_CAPTURE_ENABLE
const std::string url = CONFIG_BACKGROUND_CAPTURE_UPLOAD_URL;
if (url.empty()) {
ESP_LOGI(TAG, "Captured background frame: %u bytes", static_cast<unsigned>(jpeg_data.size()));
return true;
}
auto network = Board::GetInstance().GetNetwork();
if (network == nullptr) {
ESP_LOGW(TAG, "No network available for background upload");
return false;
}
const std::string boundary = "----XIAOZHI_BACKGROUND_CAPTURE_BOUNDARY";
auto http = network->CreateHttp(3);
http->SetHeader("Content-Type", "multipart/form-data; boundary=" + boundary);
if (!http->Open("POST", url)) {
ESP_LOGW(TAG, "Failed to open background upload URL: %s", url.c_str());
return false;
}
std::string file_header;
file_header += "--" + boundary + "\r\n";
file_header += "Content-Disposition: form-data; name=\"file\"; filename=\"frame.jpg\"\r\n";
file_header += "Content-Type: image/jpeg\r\n\r\n";
http->Write(file_header.c_str(), file_header.size());
http->Write(jpeg_data.data(), jpeg_data.size());
std::string footer;
footer += "\r\n--" + boundary + "--\r\n";
http->Write(footer.c_str(), footer.size());
http->Write("", 0);
const int status_code = http->GetStatusCode();
http->Close();
if (status_code < 200 || status_code >= 300) {
ESP_LOGW(TAG, "Background upload failed, status=%d", status_code);
return false;
}
ESP_LOGI(TAG, "Uploaded background frame: %u bytes", static_cast<unsigned>(jpeg_data.size()));
return true;
#else
(void)jpeg_data;
return false;
#endif
}

View File

@ -0,0 +1,32 @@
#ifndef BACKGROUND_CAPTURE_SERVICE_H
#define BACKGROUND_CAPTURE_SERVICE_H
#include <freertos/FreeRTOS.h>
#include <freertos/task.h>
#include <atomic>
#include <cstdint>
#include <string>
class BackgroundCaptureService {
public:
BackgroundCaptureService();
~BackgroundCaptureService();
void Start();
void Stop();
bool IsRunning() const { return running_.load(); }
private:
TaskHandle_t task_handle_ = nullptr;
std::atomic<bool> running_ = false;
uint32_t consecutive_failures_ = 0;
static void TaskEntry(void* arg);
void Run();
bool CaptureAndSendFrame();
bool UploadJpegFrame(const std::string& jpeg_data);
uint32_t GetFailureDelayMs() const;
};
#endif // BACKGROUND_CAPTURE_SERVICE_H

View File

@ -214,6 +214,9 @@ public:
case kDeviceStateSpeaking: case kDeviceStateSpeaking:
ctrl_->SetStatusColor(64, 0, 0); // red ctrl_->SetStatusColor(64, 0, 0); // red
break; break;
case kDeviceStateThinking:
ctrl_->SetStatusColor(0, 0, 64); // blue
break;
default: default:
ctrl_->SetStatusColor(0, 0, 64); // blue ctrl_->SetStatusColor(0, 0, 64); // blue
break; break;

View File

@ -7,6 +7,8 @@ class Camera {
public: public:
virtual void SetExplainUrl(const std::string& url, const std::string& token) = 0; virtual void SetExplainUrl(const std::string& url, const std::string& token) = 0;
virtual bool Capture() = 0; virtual bool Capture() = 0;
virtual bool CaptureBackground() { return Capture(); }
virtual bool CaptureToJpeg(std::string& jpeg_data, bool show_preview = false) { return false; }
virtual bool SetHMirror(bool enabled) = 0; virtual bool SetHMirror(bool enabled) = 0;
virtual bool SetVFlip(bool enabled) = 0; virtual bool SetVFlip(bool enabled) = 0;
virtual bool SetSwapBytes(bool enabled) { return false; } // Optional, default no-op virtual bool SetSwapBytes(bool enabled) { return false; } // Optional, default no-op

View File

@ -24,6 +24,7 @@
#include "lvgl_display.h" #include "lvgl_display.h"
#include "mcp_server.h" #include "mcp_server.h"
#include "system_info.h" #include "system_info.h"
#include "esp_timer.h"
#ifdef CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE #ifdef CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE
#undef LOG_LOCAL_LEVEL #undef LOG_LOCAL_LEVEL
@ -55,6 +56,7 @@
#define TAG "EspVideo" #define TAG "EspVideo"
#define FOREGROUND_CAPTURE_PROTECTION_US (10 * 1000 * 1000)
#if defined(CONFIG_CAMERA_SENSOR_SWAP_PIXEL_BYTE_ORDER) || defined(CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP) #if defined(CONFIG_CAMERA_SENSOR_SWAP_PIXEL_BYTE_ORDER) || defined(CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP)
#warning \ #warning \
@ -381,11 +383,47 @@ EspVideo::~EspVideo() {
} }
void EspVideo::SetExplainUrl(const std::string& url, const std::string& token) { void EspVideo::SetExplainUrl(const std::string& url, const std::string& token) {
std::lock_guard<std::mutex> lock(frame_mutex_);
explain_url_ = url; explain_url_ = url;
explain_token_ = token; explain_token_ = token;
} }
bool EspVideo::Capture() { bool EspVideo::Capture() {
return CaptureFrame(true);
}
bool EspVideo::CaptureBackground() {
return CaptureFrame(false);
}
bool EspVideo::CaptureToJpeg(std::string& jpeg_data, bool show_preview) {
jpeg_data.clear();
if (!CaptureFrame(show_preview)) {
return false;
}
std::lock_guard<std::mutex> lock(frame_mutex_);
if (frame_.data == nullptr || frame_.len == 0) {
return false;
}
uint16_t w = frame_.width ? frame_.width : 320;
uint16_t h = frame_.height ? frame_.height : 240;
return image_to_jpeg_cb(
frame_.data, frame_.len, w, h, frame_.format, 60,
[](void* arg, size_t index, const void* data, size_t len) -> size_t {
auto jpeg_data = static_cast<std::string*>(arg);
if (data != nullptr && len > 0) {
jpeg_data->append(static_cast<const char*>(data), len);
}
return len;
},
&jpeg_data);
}
bool EspVideo::CaptureFrame(bool show_preview) {
std::lock_guard<std::mutex> lock(frame_mutex_);
if (encoder_thread_.joinable()) { if (encoder_thread_.joinable()) {
encoder_thread_.join(); encoder_thread_.join();
} }
@ -394,6 +432,10 @@ bool EspVideo::Capture() {
return false; return false;
} }
if (!show_preview && esp_timer_get_time() < foreground_capture_protected_until_us_) {
return true;
}
for (int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
struct v4l2_buffer buf = {}; struct v4l2_buffer buf = {};
buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
@ -729,9 +771,14 @@ bool EspVideo::Capture() {
} }
} }
// 显示预览图片 if (show_preview) {
auto display = dynamic_cast<LvglDisplay*>(Board::GetInstance().GetDisplay()); foreground_capture_protected_until_us_ = esp_timer_get_time() + FOREGROUND_CAPTURE_PROTECTION_US;
if (display != nullptr) { }
if (show_preview) {
// 显示预览图片
auto display = dynamic_cast<LvglDisplay*>(Board::GetInstance().GetDisplay());
if (display != nullptr) {
if (!frame_.data) { if (!frame_.data) {
ESP_LOGE(TAG, "frame.data is null"); ESP_LOGE(TAG, "frame.data is null");
return false; return false;
@ -836,6 +883,7 @@ bool EspVideo::Capture() {
auto image = std::make_unique<LvglAllocatedImage>(data, lvgl_image_size, w, h, stride, color_format); auto image = std::make_unique<LvglAllocatedImage>(data, lvgl_image_size, w, h, stride, color_format);
display->SetPreviewImage(std::move(image)); display->SetPreviewImage(std::move(image));
}
} }
return true; return true;
} }
@ -898,10 +946,16 @@ bool EspVideo::SetVFlip(bool enabled) {
* @warning 如果摄像头缓冲区为空或网络连接失败,将返回错误信息 * @warning 如果摄像头缓冲区为空或网络连接失败,将返回错误信息
*/ */
std::string EspVideo::Explain(const std::string& question) { std::string EspVideo::Explain(const std::string& question) {
std::lock_guard<std::mutex> lock(frame_mutex_);
if (explain_url_.empty()) { if (explain_url_.empty()) {
throw std::runtime_error("Image explain URL or token is not set"); throw std::runtime_error("Image explain URL or token is not set");
} }
if (frame_.data == nullptr || frame_.len == 0) {
throw std::runtime_error("No camera frame captured");
}
// 创建局部的 JPEG 队列, 40 entries is about to store 512 * 40 = 20480 bytes of JPEG data // 创建局部的 JPEG 队列, 40 entries is about to store 512 * 40 = 20480 bytes of JPEG data
QueueHandle_t jpeg_queue = xQueueCreate(40, sizeof(JpegChunk)); QueueHandle_t jpeg_queue = xQueueCreate(40, sizeof(JpegChunk));
if (jpeg_queue == nullptr) { if (jpeg_queue == nullptr) {

View File

@ -5,6 +5,8 @@
#include <thread> #include <thread>
#include <memory> #include <memory>
#include <vector> #include <vector>
#include <mutex>
#include <cstdint>
#include <freertos/FreeRTOS.h> #include <freertos/FreeRTOS.h>
#include <freertos/queue.h> #include <freertos/queue.h>
@ -39,6 +41,10 @@ private:
std::string explain_url_; std::string explain_url_;
std::string explain_token_; std::string explain_token_;
std::thread encoder_thread_; std::thread encoder_thread_;
std::mutex frame_mutex_;
int64_t foreground_capture_protected_until_us_ = 0;
bool CaptureFrame(bool show_preview);
public: public:
EspVideo(const esp_video_init_config_t& config); EspVideo(const esp_video_init_config_t& config);
@ -46,6 +52,8 @@ public:
virtual void SetExplainUrl(const std::string& url, const std::string& token); virtual void SetExplainUrl(const std::string& url, const std::string& token);
virtual bool Capture(); virtual bool Capture();
virtual bool CaptureBackground() override;
virtual bool CaptureToJpeg(std::string& jpeg_data, bool show_preview = false) override;
// 翻转控制函数 // 翻转控制函数
virtual bool SetHMirror(bool enabled) override; virtual bool SetHMirror(bool enabled) override;
virtual bool SetVFlip(bool enabled) override; virtual bool SetVFlip(bool enabled) override;

View File

@ -203,7 +203,10 @@ void WifiBoard::EnterWifiConfigMode() {
auto& app = Application::GetInstance(); auto& app = Application::GetInstance();
auto state = app.GetDeviceState(); auto state = app.GetDeviceState();
if (state == kDeviceStateSpeaking || state == kDeviceStateListening || state == kDeviceStateIdle) { if (state == kDeviceStateSpeaking ||
state == kDeviceStateThinking ||
state == kDeviceStateListening ||
state == kDeviceStateIdle) {
// Reset protocol (close audio channel, reset protocol) // Reset protocol (close audio channel, reset protocol)
Application::GetInstance().ResetProtocol(); Application::GetInstance().ResetProtocol();

View File

@ -85,6 +85,13 @@ void ElectronEmojiDisplay::SetStatus(const char* status) {
lv_obj_add_flag(network_label_, LV_OBJ_FLAG_HIDDEN); lv_obj_add_flag(network_label_, LV_OBJ_FLAG_HIDDEN);
lv_obj_add_flag(battery_label_, LV_OBJ_FLAG_HIDDEN); lv_obj_add_flag(battery_label_, LV_OBJ_FLAG_HIDDEN);
return; return;
} else if (strcmp(status, Lang::Strings::THINKING) == 0) {
lv_obj_set_style_text_font(status_label_, text_font, 0);
lv_label_set_text(status_label_, status);
lv_obj_clear_flag(status_label_, LV_OBJ_FLAG_HIDDEN);
lv_obj_add_flag(network_label_, LV_OBJ_FLAG_HIDDEN);
lv_obj_add_flag(battery_label_, LV_OBJ_FLAG_HIDDEN);
return;
} else if (strcmp(status, Lang::Strings::CONNECTING) == 0) { } else if (strcmp(status, Lang::Strings::CONNECTING) == 0) {
lv_obj_set_style_text_font(status_label_, &OTTO_ICON_FONT, 0); lv_obj_set_style_text_font(status_label_, &OTTO_ICON_FONT, 0);
lv_label_set_text(status_label_, "\xEF\x83\x81"); // U+F0c1 连接图标 lv_label_set_text(status_label_, "\xEF\x83\x81"); // U+F0c1 连接图标
@ -102,4 +109,4 @@ void ElectronEmojiDisplay::SetStatus(const char* status) {
lv_obj_clear_flag(status_label_, LV_OBJ_FLAG_HIDDEN); lv_obj_clear_flag(status_label_, LV_OBJ_FLAG_HIDDEN);
lv_obj_clear_flag(network_label_, LV_OBJ_FLAG_HIDDEN); lv_obj_clear_flag(network_label_, LV_OBJ_FLAG_HIDDEN);
lv_obj_clear_flag(battery_label_, LV_OBJ_FLAG_HIDDEN); lv_obj_clear_flag(battery_label_, LV_OBJ_FLAG_HIDDEN);
} }

View File

@ -155,6 +155,8 @@ void EmojiWidget::SetStatus(const char* status)
if (player_) { if (player_) {
if (strcmp(status, Lang::Strings::LISTENING) == 0) { if (strcmp(status, Lang::Strings::LISTENING) == 0) {
player_->StartPlayer("asking", true, 15); player_->StartPlayer("asking", true, 15);
} else if (strcmp(status, Lang::Strings::THINKING) == 0) {
player_->StartPlayer("thinking", true, 15);
} else if (strcmp(status, Lang::Strings::STANDBY) == 0) { } else if (strcmp(status, Lang::Strings::STANDBY) == 0) {
player_->StartPlayer("wake", true, 15); player_->StartPlayer("wake", true, 15);
} }

View File

@ -231,9 +231,9 @@ private:
// 如果当前是聆听状态,切换到待命状态 // 如果当前是聆听状态,切换到待命状态
ESP_LOGI(TAG, "从聆听状态切换到待命状态"); ESP_LOGI(TAG, "从聆听状态切换到待命状态");
app.ToggleChatState(); // 切换到待命状态 app.ToggleChatState(); // 切换到待命状态
} else if (current_state == kDeviceStateSpeaking) { } else if (current_state == kDeviceStateSpeaking || current_state == kDeviceStateThinking) {
// 如果当前是说话状态,终止说话并切换到待命状态 // 如果当前是说话或思考状态,终止并切换到待命状态
ESP_LOGI(TAG, "从说话状态切换到待命状态"); ESP_LOGI(TAG, "从说话/思考状态切换到待命状态");
app.ToggleChatState(); // 终止说话 app.ToggleChatState(); // 终止说话
} else { } else {
// 其他状态下只唤醒设备 // 其他状态下只唤醒设备

View File

@ -1,21 +1,23 @@
#include "wifi_board.h" #include "application.h"
#include "axp2101.h"
#include "config.h"
#include "cores3_audio_codec.h" #include "cores3_audio_codec.h"
#include "display/lcd_display.h" #include "display/lcd_display.h"
#include "application.h"
#include "config.h"
#include "power_save_timer.h"
#include "i2c_device.h" #include "i2c_device.h"
#include "axp2101.h" #include "power_save_timer.h"
#include "wifi_board.h"
#include <esp_log.h>
#include <driver/i2c_master.h> #include <driver/i2c_master.h>
#include <esp_lcd_ili9341.h>
#include <esp_lcd_panel_io.h> #include <esp_lcd_panel_io.h>
#include <esp_lcd_panel_ops.h> #include <esp_lcd_panel_ops.h>
#include <esp_lcd_ili9341.h> #include <esp_log.h>
#include <esp_timer.h> #include <esp_timer.h>
#include "esp_video.h" #include "esp_video.h"
#define TAG "M5StackCoreS3Board" #define TAG "M5StackCoreS3Board"
#define BACKGROUND_VISION_INITIAL_DELAY_MS 8000
#define BACKGROUND_VISION_SAMPLE_INTERVAL_MS 100
class Pmic : public Axp2101 { class Pmic : public Axp2101 {
public: public:
@ -41,7 +43,7 @@ public:
class CustomBacklight : public Backlight { class CustomBacklight : public Backlight {
public: public:
CustomBacklight(Pmic *pmic) : pmic_(pmic) {} CustomBacklight(Pmic* pmic) : pmic_(pmic) {}
void SetBrightnessImpl(uint8_t brightness) override { void SetBrightnessImpl(uint8_t brightness) override {
pmic_->SetBrightness(target_brightness_); pmic_->SetBrightness(target_brightness_);
@ -49,7 +51,7 @@ public:
} }
private: private:
Pmic *pmic_; Pmic* pmic_;
}; };
class Aw9523 : public I2cDevice { class Aw9523 : public I2cDevice {
@ -89,16 +91,14 @@ public:
int x = -1; int x = -1;
int y = -1; int y = -1;
}; };
Ft6336(i2c_master_bus_handle_t i2c_bus, uint8_t addr) : I2cDevice(i2c_bus, addr) { Ft6336(i2c_master_bus_handle_t i2c_bus, uint8_t addr) : I2cDevice(i2c_bus, addr) {
uint8_t chip_id = ReadReg(0xA3); uint8_t chip_id = ReadReg(0xA3);
ESP_LOGI(TAG, "Get chip ID: 0x%02X", chip_id); ESP_LOGI(TAG, "Get chip ID: 0x%02X", chip_id);
read_buffer_ = new uint8_t[6]; read_buffer_ = new uint8_t[6];
} }
~Ft6336() { ~Ft6336() { delete[] read_buffer_; }
delete[] read_buffer_;
}
void UpdateTouchPoint() { void UpdateTouchPoint() {
ReadRegs(0x02, read_buffer_, 6); ReadRegs(0x02, read_buffer_, 6);
@ -107,9 +107,7 @@ public:
tp_.y = ((read_buffer_[3] & 0x0F) << 8) | read_buffer_[4]; tp_.y = ((read_buffer_[3] & 0x0F) << 8) | read_buffer_[4];
} }
inline const TouchPoint_t& GetTouchPoint() { inline const TouchPoint_t& GetTouchPoint() { return tp_; }
return tp_;
}
private: private:
uint8_t* read_buffer_ = nullptr; uint8_t* read_buffer_ = nullptr;
@ -137,9 +135,7 @@ private:
GetDisplay()->SetPowerSaveMode(false); GetDisplay()->SetPowerSaveMode(false);
GetBacklight()->RestoreBrightness(); GetBacklight()->RestoreBrightness();
}); });
power_save_timer_->OnShutdownRequest([this]() { power_save_timer_->OnShutdownRequest([this]() { pmic_->PowerOff(); });
pmic_->PowerOff();
});
power_save_timer_->SetEnabled(true); power_save_timer_->SetEnabled(true);
} }
@ -153,9 +149,10 @@ private:
.glitch_ignore_cnt = 7, .glitch_ignore_cnt = 7,
.intr_priority = 0, .intr_priority = 0,
.trans_queue_depth = 0, .trans_queue_depth = 0,
.flags = { .flags =
.enable_internal_pullup = 1, {
}, .enable_internal_pullup = 1,
},
}; };
ESP_ERROR_CHECK(i2c_new_master_bus(&i2c_bus_cfg, &i2c_bus_)); ESP_ERROR_CHECK(i2c_new_master_bus(&i2c_bus_cfg, &i2c_bus_));
} }
@ -195,29 +192,37 @@ private:
void PollTouchpad() { void PollTouchpad() {
static bool was_touched = false; static bool was_touched = false;
static int64_t touch_start_time = 0; static int64_t touch_start_time = 0;
static int touch_start_x = -1;
const int64_t TOUCH_THRESHOLD_MS = 500; // 触摸时长阈值超过500ms视为长按 const int64_t TOUCH_THRESHOLD_MS = 500; // 触摸时长阈值超过500ms视为长按
ft6336_->UpdateTouchPoint(); ft6336_->UpdateTouchPoint();
auto& touch_point = ft6336_->GetTouchPoint(); auto& touch_point = ft6336_->GetTouchPoint();
// 检测触摸开始 // 检测触摸开始
if (touch_point.num > 0 && !was_touched) { if (touch_point.num > 0 && !was_touched) {
was_touched = true; was_touched = true;
touch_start_time = esp_timer_get_time() / 1000; // 转换为毫秒 touch_start_time = esp_timer_get_time() / 1000; // 转换为毫秒
} touch_start_x = touch_point.x;
}
// 检测触摸释放 // 检测触摸释放
else if (touch_point.num == 0 && was_touched) { else if (touch_point.num == 0 && was_touched) {
was_touched = false; was_touched = false;
int64_t touch_duration = (esp_timer_get_time() / 1000) - touch_start_time; int64_t touch_duration = (esp_timer_get_time() / 1000) - touch_start_time;
bool beaver_mode = touch_start_x >= DISPLAY_WIDTH / 2;
// 只有短触才触发 auto agent_mode = beaver_mode ? kChatAgentModeBeaver : kChatAgentModeNormal;
if (touch_duration < TOUCH_THRESHOLD_MS) { if (touch_duration < TOUCH_THRESHOLD_MS) {
auto& app = Application::GetInstance(); auto& app = Application::GetInstance();
if (app.GetDeviceState() == kDeviceStateStarting) { if (app.GetDeviceState() == kDeviceStateStarting) {
EnterWifiConfigMode(); EnterWifiConfigMode();
return; return;
} }
app.ToggleChatState(); ESP_LOGI(TAG, "Touch short: %s text-only mode", beaver_mode ? "beaver" : "normal");
app.ToggleChatStateForMode(agent_mode, false);
} else {
auto& app = Application::GetInstance();
ESP_LOGI(TAG, "Touch long: %s vision+text mode", beaver_mode ? "beaver" : "normal");
app.ToggleChatStateForMode(agent_mode, true);
} }
} }
} }
@ -225,19 +230,20 @@ private:
void InitializeFt6336TouchPad() { void InitializeFt6336TouchPad() {
ESP_LOGI(TAG, "Init FT6336"); ESP_LOGI(TAG, "Init FT6336");
ft6336_ = new Ft6336(i2c_bus_, 0x38); ft6336_ = new Ft6336(i2c_bus_, 0x38);
// 创建定时器20ms 间隔 // 创建定时器20ms 间隔
esp_timer_create_args_t timer_args = { esp_timer_create_args_t timer_args = {
.callback = [](void* arg) { .callback =
M5StackCoreS3Board* board = (M5StackCoreS3Board*)arg; [](void* arg) {
board->PollTouchpad(); M5StackCoreS3Board* board = (M5StackCoreS3Board*)arg;
}, board->PollTouchpad();
},
.arg = this, .arg = this,
.dispatch_method = ESP_TIMER_TASK, .dispatch_method = ESP_TIMER_TASK,
.name = "touchpad_timer", .name = "touchpad_timer",
.skip_unhandled_events = true, .skip_unhandled_events = true,
}; };
ESP_ERROR_CHECK(esp_timer_create(&timer_args, &touchpad_timer_)); ESP_ERROR_CHECK(esp_timer_create(&timer_args, &touchpad_timer_));
ESP_ERROR_CHECK(esp_timer_start_periodic(touchpad_timer_, 20 * 1000)); ESP_ERROR_CHECK(esp_timer_start_periodic(touchpad_timer_, 20 * 1000));
} }
@ -276,7 +282,7 @@ private:
panel_config.rgb_ele_order = LCD_RGB_ELEMENT_ORDER_BGR; panel_config.rgb_ele_order = LCD_RGB_ELEMENT_ORDER_BGR;
panel_config.bits_per_pixel = 16; panel_config.bits_per_pixel = 16;
ESP_ERROR_CHECK(esp_lcd_new_panel_ili9341(panel_io, &panel_config, &panel)); ESP_ERROR_CHECK(esp_lcd_new_panel_ili9341(panel_io, &panel_config, &panel));
esp_lcd_panel_reset(panel); esp_lcd_panel_reset(panel);
aw9523_->ResetIli9342(); aw9523_->ResetIli9342();
@ -285,23 +291,25 @@ private:
esp_lcd_panel_swap_xy(panel, DISPLAY_SWAP_XY); esp_lcd_panel_swap_xy(panel, DISPLAY_SWAP_XY);
esp_lcd_panel_mirror(panel, DISPLAY_MIRROR_X, DISPLAY_MIRROR_Y); esp_lcd_panel_mirror(panel, DISPLAY_MIRROR_X, DISPLAY_MIRROR_Y);
display_ = new SpiLcdDisplay(panel_io, panel, display_ = new SpiLcdDisplay(panel_io, panel, DISPLAY_WIDTH, DISPLAY_HEIGHT,
DISPLAY_WIDTH, DISPLAY_HEIGHT, DISPLAY_OFFSET_X, DISPLAY_OFFSET_Y, DISPLAY_MIRROR_X, DISPLAY_MIRROR_Y, DISPLAY_SWAP_XY); DISPLAY_OFFSET_X, DISPLAY_OFFSET_Y, DISPLAY_MIRROR_X,
DISPLAY_MIRROR_Y, DISPLAY_SWAP_XY);
} }
void InitializeCamera() { void InitializeCamera() {
static esp_cam_ctlr_dvp_pin_config_t dvp_pin_config = { static esp_cam_ctlr_dvp_pin_config_t dvp_pin_config = {
.data_width = CAM_CTLR_DATA_WIDTH_8, .data_width = CAM_CTLR_DATA_WIDTH_8,
.data_io = { .data_io =
[0] = CAMERA_PIN_D0, {
[1] = CAMERA_PIN_D1, [0] = CAMERA_PIN_D0,
[2] = CAMERA_PIN_D2, [1] = CAMERA_PIN_D1,
[3] = CAMERA_PIN_D3, [2] = CAMERA_PIN_D2,
[4] = CAMERA_PIN_D4, [3] = CAMERA_PIN_D3,
[5] = CAMERA_PIN_D5, [4] = CAMERA_PIN_D4,
[6] = CAMERA_PIN_D6, [5] = CAMERA_PIN_D5,
[7] = CAMERA_PIN_D7, [6] = CAMERA_PIN_D6,
}, [7] = CAMERA_PIN_D7,
},
.vsync_io = CAMERA_PIN_VSYNC, .vsync_io = CAMERA_PIN_VSYNC,
.de_io = CAMERA_PIN_HREF, .de_io = CAMERA_PIN_HREF,
.pclk_io = CAMERA_PIN_PCLK, .pclk_io = CAMERA_PIN_PCLK,
@ -330,6 +338,42 @@ private:
camera_->SetHMirror(false); camera_->SetHMirror(false);
} }
void InitializeBackgroundVisionSampler() {
xTaskCreate(
[](void* arg) {
auto board = static_cast<M5StackCoreS3Board*>(arg);
bool has_logged_success = false;
bool has_logged_failure = false;
vTaskDelay(pdMS_TO_TICKS(BACKGROUND_VISION_INITIAL_DELAY_MS));
while (true) {
if (!Application::GetInstance().IsVisionTextModeEnabled()) {
vTaskDelay(pdMS_TO_TICKS(BACKGROUND_VISION_SAMPLE_INTERVAL_MS));
continue;
}
if (board->camera_ == nullptr) {
vTaskDelay(pdMS_TO_TICKS(BACKGROUND_VISION_SAMPLE_INTERVAL_MS));
continue;
}
if (board->camera_->Capture()) {
if (!has_logged_success) {
ESP_LOGI(TAG, "Vision preview sampler started");
has_logged_success = true;
}
} else if (!has_logged_failure) {
ESP_LOGW(TAG, "Vision preview sampler is waiting for camera");
has_logged_failure = true;
}
vTaskDelay(pdMS_TO_TICKS(BACKGROUND_VISION_SAMPLE_INTERVAL_MS));
}
},
"BgVisionSampler", 4096, this, 1, nullptr);
}
public: public:
M5StackCoreS3Board() { M5StackCoreS3Board() {
InitializePowerSaveTimer(); InitializePowerSaveTimer();
@ -340,34 +384,24 @@ public:
InitializeSpi(); InitializeSpi();
InitializeIli9342Display(); InitializeIli9342Display();
InitializeCamera(); InitializeCamera();
InitializeBackgroundVisionSampler();
InitializeFt6336TouchPad(); InitializeFt6336TouchPad();
GetBacklight()->RestoreBrightness(); GetBacklight()->RestoreBrightness();
} }
virtual AudioCodec* GetAudioCodec() override { virtual AudioCodec* GetAudioCodec() override {
static CoreS3AudioCodec audio_codec(i2c_bus_, static CoreS3AudioCodec audio_codec(
AUDIO_INPUT_SAMPLE_RATE, i2c_bus_, AUDIO_INPUT_SAMPLE_RATE, AUDIO_OUTPUT_SAMPLE_RATE, AUDIO_I2S_GPIO_MCLK,
AUDIO_OUTPUT_SAMPLE_RATE, AUDIO_I2S_GPIO_BCLK, AUDIO_I2S_GPIO_WS, AUDIO_I2S_GPIO_DOUT, AUDIO_I2S_GPIO_DIN,
AUDIO_I2S_GPIO_MCLK, AUDIO_CODEC_AW88298_ADDR, AUDIO_CODEC_ES7210_ADDR, AUDIO_INPUT_REFERENCE);
AUDIO_I2S_GPIO_BCLK,
AUDIO_I2S_GPIO_WS,
AUDIO_I2S_GPIO_DOUT,
AUDIO_I2S_GPIO_DIN,
AUDIO_CODEC_AW88298_ADDR,
AUDIO_CODEC_ES7210_ADDR,
AUDIO_INPUT_REFERENCE);
return &audio_codec; return &audio_codec;
} }
virtual Display* GetDisplay() override { virtual Display* GetDisplay() override { return display_; }
return display_;
}
virtual Camera* GetCamera() override { virtual Camera* GetCamera() override { return camera_; }
return camera_;
}
virtual bool GetBatteryLevel(int &level, bool& charging, bool& discharging) override { virtual bool GetBatteryLevel(int& level, bool& charging, bool& discharging) override {
static bool last_discharging = false; static bool last_discharging = false;
charging = pmic_->IsCharging(); charging = pmic_->IsCharging();
discharging = pmic_->IsDischarging(); discharging = pmic_->IsDischarging();
@ -387,7 +421,7 @@ public:
WifiBoard::SetPowerSaveLevel(level); WifiBoard::SetPowerSaveLevel(level);
} }
virtual Backlight *GetBacklight() override { virtual Backlight* GetBacklight() override {
static CustomBacklight backlight(pmic_); static CustomBacklight backlight(pmic_);
return &backlight; return &backlight;
} }

View File

@ -77,6 +77,13 @@ void OttoEmojiDisplay::SetStatus(const char* status) {
lv_obj_add_flag(network_label_, LV_OBJ_FLAG_HIDDEN); lv_obj_add_flag(network_label_, LV_OBJ_FLAG_HIDDEN);
lv_obj_add_flag(battery_label_, LV_OBJ_FLAG_HIDDEN); lv_obj_add_flag(battery_label_, LV_OBJ_FLAG_HIDDEN);
return; return;
} else if (strcmp(status, Lang::Strings::THINKING) == 0) {
lv_obj_set_style_text_font(status_label_, text_font, 0);
lv_label_set_text(status_label_, status);
lv_obj_clear_flag(status_label_, LV_OBJ_FLAG_HIDDEN);
lv_obj_add_flag(network_label_, LV_OBJ_FLAG_HIDDEN);
lv_obj_add_flag(battery_label_, LV_OBJ_FLAG_HIDDEN);
return;
} else if (strcmp(status, Lang::Strings::CONNECTING) == 0) { } else if (strcmp(status, Lang::Strings::CONNECTING) == 0) {
lv_obj_set_style_text_font(status_label_, &OTTO_ICON_FONT, 0); lv_obj_set_style_text_font(status_label_, &OTTO_ICON_FONT, 0);
lv_label_set_text(status_label_, "\xEF\x83\x81"); // U+F0c1 连接图标 lv_label_set_text(status_label_, "\xEF\x83\x81"); // U+F0c1 连接图标
@ -131,4 +138,4 @@ void OttoEmojiDisplay::SetPreviewImage(std::unique_ptr<LvglImage> image) {
lv_obj_remove_flag(preview_image_, LV_OBJ_FLAG_HIDDEN); lv_obj_remove_flag(preview_image_, LV_OBJ_FLAG_HIDDEN);
esp_timer_stop(preview_timer_); esp_timer_stop(preview_timer_);
ESP_ERROR_CHECK(esp_timer_start_once(preview_timer_, PREVIEW_IMAGE_DURATION_MS * 1000)); ESP_ERROR_CHECK(esp_timer_start_once(preview_timer_, PREVIEW_IMAGE_DURATION_MS * 1000));
} }

View File

@ -598,6 +598,10 @@ CONFIG_PARTITION_TABLE_MD5=y
# Xiaozhi Assistant # Xiaozhi Assistant
# #
CONFIG_OTA_URL="https://api.tenclass.net/xiaozhi/ota/" CONFIG_OTA_URL="https://api.tenclass.net/xiaozhi/ota/"
CONFIG_USE_DIRECT_WEBSOCKET=y
CONFIG_WEBSOCKET_URL="ws://172.19.0.240:8080"
CONFIG_WEBSOCKET_TOKEN=""
CONFIG_WEBSOCKET_PROTOCOL_VERSION=1
# CONFIG_FLASH_NONE_ASSETS is not set # CONFIG_FLASH_NONE_ASSETS is not set
CONFIG_FLASH_DEFAULT_ASSETS=y CONFIG_FLASH_DEFAULT_ASSETS=y
# CONFIG_FLASH_CUSTOM_ASSETS is not set # CONFIG_FLASH_CUSTOM_ASSETS is not set

1929
main/bridge_server.py Normal file

File diff suppressed because it is too large Load Diff

View File

@ -8,6 +8,7 @@ enum DeviceState {
kDeviceStateIdle, kDeviceStateIdle,
kDeviceStateConnecting, kDeviceStateConnecting,
kDeviceStateListening, kDeviceStateListening,
kDeviceStateThinking,
kDeviceStateSpeaking, kDeviceStateSpeaking,
kDeviceStateUpgrading, kDeviceStateUpgrading,
kDeviceStateActivating, kDeviceStateActivating,
@ -15,4 +16,4 @@ enum DeviceState {
kDeviceStateFatalError kDeviceStateFatalError
}; };
#endif // _DEVICE_STATE_H_ #endif // _DEVICE_STATE_H_

View File

@ -13,6 +13,7 @@ static const char* const STATE_STRINGS[] = {
"idle", "idle",
"connecting", "connecting",
"listening", "listening",
"thinking",
"speaking", "speaking",
"upgrading", "upgrading",
"activating", "activating",
@ -69,9 +70,10 @@ bool DeviceStateMachine::IsValidTransition(DeviceState from, DeviceState to) con
to == kDeviceStateActivating; to == kDeviceStateActivating;
case kDeviceStateIdle: case kDeviceStateIdle:
// Can go to connecting, listening (manual mode), speaking, activating, upgrading, or wifi configuring // Can go to connecting, listening (manual mode), thinking, speaking, activating, upgrading, or wifi configuring
return to == kDeviceStateConnecting || return to == kDeviceStateConnecting ||
to == kDeviceStateListening || to == kDeviceStateListening ||
to == kDeviceStateThinking ||
to == kDeviceStateSpeaking || to == kDeviceStateSpeaking ||
to == kDeviceStateActivating || to == kDeviceStateActivating ||
to == kDeviceStateUpgrading || to == kDeviceStateUpgrading ||
@ -83,8 +85,15 @@ bool DeviceStateMachine::IsValidTransition(DeviceState from, DeviceState to) con
to == kDeviceStateListening; to == kDeviceStateListening;
case kDeviceStateListening: case kDeviceStateListening:
// Can go to speaking or idle // Can go to thinking, speaking, or idle
return to == kDeviceStateThinking ||
to == kDeviceStateSpeaking ||
to == kDeviceStateIdle;
case kDeviceStateThinking:
// Can go to speaking, listening, or idle
return to == kDeviceStateSpeaking || return to == kDeviceStateSpeaking ||
to == kDeviceStateListening ||
to == kDeviceStateIdle; to == kDeviceStateIdle;
case kDeviceStateSpeaking: case kDeviceStateSpeaking:

View File

@ -167,6 +167,8 @@ void EmoteDisplay::SetStatus(const char* const status)
emote_set_event_msg(emote_handle_, EMOTE_MGR_EVT_LISTEN, NULL); emote_set_event_msg(emote_handle_, EMOTE_MGR_EVT_LISTEN, NULL);
} else if (std::strcmp(status, Lang::Strings::STANDBY) == 0) { } else if (std::strcmp(status, Lang::Strings::STANDBY) == 0) {
emote_set_event_msg(emote_handle_, EMOTE_MGR_EVT_IDLE, NULL); emote_set_event_msg(emote_handle_, EMOTE_MGR_EVT_IDLE, NULL);
} else if (std::strcmp(status, Lang::Strings::THINKING) == 0) {
emote_set_event_msg(emote_handle_, EMOTE_MGR_EVT_LISTEN, NULL);
} else if (std::strcmp(status, Lang::Strings::SPEAKING) == 0) { } else if (std::strcmp(status, Lang::Strings::SPEAKING) == 0) {
emote_set_event_msg(emote_handle_, EMOTE_MGR_EVT_SPEAK, NULL); emote_set_event_msg(emote_handle_, EMOTE_MGR_EVT_SPEAK, NULL);
} else if (std::strcmp(status, Lang::Strings::ERROR) == 0) { } else if (std::strcmp(status, Lang::Strings::ERROR) == 0) {
@ -247,4 +249,4 @@ void EmoteDisplay::RefreshAll()
} }
} }
} // namespace emote } // namespace emote

View File

@ -203,6 +203,7 @@ void LvglDisplay::UpdateStatusBar(bool update_all) {
kDeviceStateStarting, kDeviceStateStarting,
kDeviceStateWifiConfiguring, kDeviceStateWifiConfiguring,
kDeviceStateListening, kDeviceStateListening,
kDeviceStateThinking,
kDeviceStateActivating, kDeviceStateActivating,
}; };
if (std::find(allowed_states.begin(), allowed_states.end(), device_state) != allowed_states.end()) { if (std::find(allowed_states.begin(), allowed_states.end(), device_state) != allowed_states.end()) {

View File

@ -228,6 +228,11 @@ void CircularStrip::OnStateChanged() {
SetAllColor(color); SetAllColor(color);
break; break;
} }
case kDeviceStateThinking: {
StripColor color = { low_brightness_, low_brightness_, default_brightness_ };
Blink(color, 300);
break;
}
case kDeviceStateUpgrading: { case kDeviceStateUpgrading: {
StripColor color = { low_brightness_, default_brightness_, low_brightness_ }; StripColor color = { low_brightness_, default_brightness_, low_brightness_ };
Blink(color, 100); Blink(color, 100);

View File

@ -235,6 +235,10 @@ void GpioLed::OnStateChanged() {
// TurnOn(); // TurnOn();
StartFadeTask(); StartFadeTask();
break; break;
case kDeviceStateThinking:
SetBrightness(DEFAULT_BRIGHTNESS);
StartContinuousBlink(300);
break;
case kDeviceStateSpeaking: case kDeviceStateSpeaking:
SetBrightness(SPEAKING_BRIGHTNESS); SetBrightness(SPEAKING_BRIGHTNESS);
TurnOn(); TurnOn();
@ -260,4 +264,4 @@ void GpioLed::EventTask(void* arg) {
ulTaskNotifyTake(pdTRUE, portMAX_DELAY); ulTaskNotifyTake(pdTRUE, portMAX_DELAY);
led->OnFadeEnd(); led->OnFadeEnd();
} }
} }

View File

@ -152,6 +152,10 @@ void SingleLed::OnStateChanged() {
SetColor(0, DEFAULT_BRIGHTNESS, 0); SetColor(0, DEFAULT_BRIGHTNESS, 0);
TurnOn(); TurnOn();
break; break;
case kDeviceStateThinking:
SetColor(0, 0, DEFAULT_BRIGHTNESS);
StartContinuousBlink(300);
break;
case kDeviceStateUpgrading: case kDeviceStateUpgrading:
SetColor(0, DEFAULT_BRIGHTNESS, 0); SetColor(0, DEFAULT_BRIGHTNESS, 0);
StartContinuousBlink(100); StartContinuousBlink(100);

View File

@ -1,9 +1,22 @@
#include "protocol.h" #include "protocol.h"
#include <esp_log.h> #include <esp_log.h>
#include <mbedtls/base64.h>
#define TAG "Protocol" #define TAG "Protocol"
static std::string Base64Encode(const std::string& data) {
size_t encoded_length = 0;
size_t output_length = 0;
mbedtls_base64_encode(nullptr, 0, &encoded_length,
reinterpret_cast<const unsigned char*>(data.data()), data.size());
std::string result(encoded_length, 0);
mbedtls_base64_encode(reinterpret_cast<unsigned char*>(result.data()), result.size(), &output_length,
reinterpret_cast<const unsigned char*>(data.data()), data.size());
result.resize(output_length);
return result;
}
void Protocol::OnIncomingJson(std::function<void(const cJSON* root)> callback) { void Protocol::OnIncomingJson(std::function<void(const cJSON* root)> callback) {
on_incoming_json_ = callback; on_incoming_json_ = callback;
} }
@ -78,6 +91,27 @@ void Protocol::SendMcpMessage(const std::string& payload) {
SendText(message); SendText(message);
} }
void Protocol::SendVisionFrame(const std::string& jpeg_data) {
if (jpeg_data.empty()) {
return;
}
cJSON* root = cJSON_CreateObject();
cJSON_AddStringToObject(root, "session_id", session_id_.c_str());
cJSON_AddStringToObject(root, "type", "vision");
cJSON_AddStringToObject(root, "state", "frame");
cJSON_AddStringToObject(root, "mime_type", "image/jpeg");
auto encoded = Base64Encode(jpeg_data);
cJSON_AddStringToObject(root, "image", encoded.c_str());
char* json_str = cJSON_PrintUnformatted(root);
if (json_str != nullptr) {
SendText(json_str);
cJSON_free(json_str);
}
cJSON_Delete(root);
}
bool Protocol::IsTimeout() const { bool Protocol::IsTimeout() const {
const int kTimeoutSeconds = 120; const int kTimeoutSeconds = 120;
auto now = std::chrono::steady_clock::now(); auto now = std::chrono::steady_clock::now();

View File

@ -73,6 +73,7 @@ public:
virtual void SendStopListening(); virtual void SendStopListening();
virtual void SendAbortSpeaking(AbortReason reason); virtual void SendAbortSpeaking(AbortReason reason);
virtual void SendMcpMessage(const std::string& message); virtual void SendMcpMessage(const std::string& message);
virtual void SendVisionFrame(const std::string& jpeg_data);
protected: protected:
std::function<void(const cJSON* root)> on_incoming_json_; std::function<void(const cJSON* root)> on_incoming_json_;
@ -95,4 +96,3 @@ protected:
}; };
#endif // PROTOCOL_H #endif // PROTOCOL_H

View File

@ -85,10 +85,21 @@ bool WebsocketProtocol::OpenAudioChannel() {
std::string url = settings.GetString("url"); std::string url = settings.GetString("url");
std::string token = settings.GetString("token"); std::string token = settings.GetString("token");
int version = settings.GetInt("version"); int version = settings.GetInt("version");
#if CONFIG_USE_DIRECT_WEBSOCKET
url = CONFIG_WEBSOCKET_URL;
token = CONFIG_WEBSOCKET_TOKEN;
version = CONFIG_WEBSOCKET_PROTOCOL_VERSION;
#endif
if (version != 0) { if (version != 0) {
version_ = version; version_ = version;
} }
if (url.empty()) {
ESP_LOGE(TAG, "Websocket URL is not set");
SetError(Lang::Strings::SERVER_NOT_CONNECTED);
return false;
}
error_occurred_ = false; error_occurred_ = false;
auto network = Board::GetInstance().GetNetwork(); auto network = Board::GetInstance().GetNetwork();
@ -108,6 +119,8 @@ bool WebsocketProtocol::OpenAudioChannel() {
websocket_->SetHeader("Protocol-Version", std::to_string(version_).c_str()); websocket_->SetHeader("Protocol-Version", std::to_string(version_).c_str());
websocket_->SetHeader("Device-Id", SystemInfo::GetMacAddress().c_str()); websocket_->SetHeader("Device-Id", SystemInfo::GetMacAddress().c_str());
websocket_->SetHeader("Client-Id", Board::GetInstance().GetUuid().c_str()); websocket_->SetHeader("Client-Id", Board::GetInstance().GetUuid().c_str());
websocket_->SetHeader("Agent-Mode", Application::GetInstance().GetChatAgentModeName());
websocket_->SetHeader("Chat-Mode", Application::GetInstance().GetChatModeName());
websocket_->OnData([this](const char* data, size_t len, bool binary) { websocket_->OnData([this](const char* data, size_t len, bool binary) {
if (binary) { if (binary) {