From b92e6e1b07495f7258fa04b9906847496c4b6296 Mon Sep 17 00:00:00 2001 From: 0Xiao0 <511201264@qq.com> Date: Fri, 29 May 2026 14:53:58 +0800 Subject: [PATCH] feat: remove background cam every time --- main/application.cc | 31 +++++++++++++------ main/application.h | 4 ++- .../boards/m5stack-core-s3/m5stack_core_s3.cc | 14 +++++---- 3 files changed, 32 insertions(+), 17 deletions(-) diff --git a/main/application.cc b/main/application.cc index 1095710..448a794 100644 --- a/main/application.cc +++ b/main/application.cc @@ -81,6 +81,7 @@ void Application::Initialize() { xEventGroupSetBits(event_group_, MAIN_EVENT_WAKE_WORD_DETECTED); }; callbacks.on_vad_change = [this](bool speaking) { + vad_speaking_.store(speaking); xEventGroupSetBits(event_group_, MAIN_EVENT_VAD_CHANGE); }; audio_service_.SetCallbacks(callbacks); @@ -233,6 +234,13 @@ void Application::Run() { if (GetDeviceState() == kDeviceStateListening) { auto led = Board::GetInstance().GetLed(); led->OnStateChanged(); + + if (vad_speaking_.load() && vision_text_mode_enabled_.load() && + !vision_frame_sent_for_current_listen_.exchange(true)) { + if (!SendCurrentVisionFrame()) { + vision_frame_sent_for_current_listen_.store(false); + } + } } } @@ -674,11 +682,13 @@ void Application::DismissAlert() { void Application::ToggleChatState() { vision_text_mode_enabled_.store(false); + vision_frame_sent_for_current_listen_.store(false); xEventGroupSetBits(event_group_, MAIN_EVENT_TOGGLE_CHAT); } void Application::ToggleChatStateWithVision() { vision_text_mode_enabled_.store(true); + vision_frame_sent_for_current_listen_.store(false); xEventGroupSetBits(event_group_, MAIN_EVENT_TOGGLE_CHAT); } @@ -688,6 +698,7 @@ bool Application::IsVisionTextModeEnabled() const { void Application::StartListening() { vision_text_mode_enabled_.store(false); + vision_frame_sent_for_current_listen_.store(false); xEventGroupSetBits(event_group_, MAIN_EVENT_START_LISTENING); } @@ -697,9 +708,6 @@ void Application::StopListening() { void Application::HandleToggleChatEvent() { auto state = GetDeviceState(); - if (state != kDeviceStateIdle) { - vision_text_mode_enabled_.store(false); - } if (state == kDeviceStateActivating) { SetDeviceState(kDeviceStateIdle); @@ -896,6 +904,7 @@ void Application::HandleStateChangedEvent() { switch (new_state) { case kDeviceStateUnknown: case kDeviceStateIdle: + vision_frame_sent_for_current_listen_.store(false); display->SetStatus(Lang::Strings::STANDBY); display->ClearChatMessages(); // Clear messages first display->SetEmotion("neutral"); // Then set emotion (wechat mode checks child count) @@ -908,6 +917,8 @@ void Application::HandleStateChangedEvent() { display->SetChatMessage("system", ""); break; case kDeviceStateListening: + vad_speaking_.store(false); + vision_frame_sent_for_current_listen_.store(false); display->SetStatus(Lang::Strings::LISTENING); display->SetEmotion("neutral"); @@ -919,9 +930,6 @@ void Application::HandleStateChangedEvent() { audio_service_.WaitForPlaybackQueueEmpty(); } - if (vision_text_mode_enabled_.load()) { - SendCurrentVisionFrame(); - } // Send the start listening command protocol_->SendStartListening(listening_mode_); audio_service_.EnableVoiceProcessing(true); @@ -961,24 +969,25 @@ void Application::HandleStateChangedEvent() { } } -void Application::SendCurrentVisionFrame() { +bool Application::SendCurrentVisionFrame() { if (!protocol_ || !protocol_->IsAudioChannelOpened()) { - return; + return false; } auto camera = Board::GetInstance().GetCamera(); if (camera == nullptr) { - return; + return false; } std::string jpeg_data; if (!camera->CaptureToJpeg(jpeg_data, true)) { ESP_LOGW(TAG, "Failed to capture vision frame"); - return; + return false; } protocol_->SendVisionFrame(jpeg_data); ESP_LOGI(TAG, "Sent vision frame, size=%u bytes", static_cast(jpeg_data.size())); + return true; } void Application::Schedule(std::function&& callback) { @@ -999,6 +1008,8 @@ void Application::AbortSpeaking(AbortReason reason) { void Application::SetListeningMode(ListeningMode mode) { listening_mode_ = mode; + vad_speaking_.store(false); + vision_frame_sent_for_current_listen_.store(false); SetDeviceState(kDeviceStateListening); } diff --git a/main/application.h b/main/application.h index a4fb7a5..8eab256 100644 --- a/main/application.h +++ b/main/application.h @@ -148,6 +148,8 @@ private: bool assets_version_checked_ = false; bool play_popup_on_listening_ = false; // Flag to play popup sound after state changes to listening std::atomic vision_text_mode_enabled_ = false; + std::atomic vad_speaking_ = false; + std::atomic vision_frame_sent_for_current_listen_ = false; int clock_ticks_ = 0; TaskHandle_t activation_task_handle_ = nullptr; @@ -163,7 +165,7 @@ private: void HandleWakeWordDetectedEvent(); void ContinueOpenAudioChannel(ListeningMode mode); void ContinueWakeWordInvoke(const std::string& wake_word); - void SendCurrentVisionFrame(); + bool SendCurrentVisionFrame(); // Activation task (runs in background) void ActivationTask(); diff --git a/main/boards/m5stack-core-s3/m5stack_core_s3.cc b/main/boards/m5stack-core-s3/m5stack_core_s3.cc index 33c8cd4..6da3198 100644 --- a/main/boards/m5stack-core-s3/m5stack_core_s3.cc +++ b/main/boards/m5stack-core-s3/m5stack_core_s3.cc @@ -344,21 +344,23 @@ private: vTaskDelay(pdMS_TO_TICKS(BACKGROUND_VISION_INITIAL_DELAY_MS)); while (true) { + if (!Application::GetInstance().IsVisionTextModeEnabled()) { + vTaskDelay(pdMS_TO_TICKS(BACKGROUND_VISION_SAMPLE_INTERVAL_MS)); + continue; + } + if (board->camera_ == nullptr) { vTaskDelay(pdMS_TO_TICKS(BACKGROUND_VISION_SAMPLE_INTERVAL_MS)); continue; } - bool capture_ok = Application::GetInstance().IsVisionTextModeEnabled() - ? board->camera_->Capture() - : board->camera_->CaptureBackground(); - if (capture_ok) { + if (board->camera_->Capture()) { if (!has_logged_success) { - ESP_LOGI(TAG, "Background vision sampler started"); + ESP_LOGI(TAG, "Vision preview sampler started"); has_logged_success = true; } } else if (!has_logged_failure) { - ESP_LOGW(TAG, "Background vision sampler is waiting for camera"); + ESP_LOGW(TAG, "Vision preview sampler is waiting for camera"); has_logged_failure = true; }