feat: remove background cam every time
This commit is contained in:
@ -81,6 +81,7 @@ void Application::Initialize() {
|
|||||||
xEventGroupSetBits(event_group_, MAIN_EVENT_WAKE_WORD_DETECTED);
|
xEventGroupSetBits(event_group_, MAIN_EVENT_WAKE_WORD_DETECTED);
|
||||||
};
|
};
|
||||||
callbacks.on_vad_change = [this](bool speaking) {
|
callbacks.on_vad_change = [this](bool speaking) {
|
||||||
|
vad_speaking_.store(speaking);
|
||||||
xEventGroupSetBits(event_group_, MAIN_EVENT_VAD_CHANGE);
|
xEventGroupSetBits(event_group_, MAIN_EVENT_VAD_CHANGE);
|
||||||
};
|
};
|
||||||
audio_service_.SetCallbacks(callbacks);
|
audio_service_.SetCallbacks(callbacks);
|
||||||
@ -233,6 +234,13 @@ void Application::Run() {
|
|||||||
if (GetDeviceState() == kDeviceStateListening) {
|
if (GetDeviceState() == kDeviceStateListening) {
|
||||||
auto led = Board::GetInstance().GetLed();
|
auto led = Board::GetInstance().GetLed();
|
||||||
led->OnStateChanged();
|
led->OnStateChanged();
|
||||||
|
|
||||||
|
if (vad_speaking_.load() && vision_text_mode_enabled_.load() &&
|
||||||
|
!vision_frame_sent_for_current_listen_.exchange(true)) {
|
||||||
|
if (!SendCurrentVisionFrame()) {
|
||||||
|
vision_frame_sent_for_current_listen_.store(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -674,11 +682,13 @@ void Application::DismissAlert() {
|
|||||||
|
|
||||||
void Application::ToggleChatState() {
|
void Application::ToggleChatState() {
|
||||||
vision_text_mode_enabled_.store(false);
|
vision_text_mode_enabled_.store(false);
|
||||||
|
vision_frame_sent_for_current_listen_.store(false);
|
||||||
xEventGroupSetBits(event_group_, MAIN_EVENT_TOGGLE_CHAT);
|
xEventGroupSetBits(event_group_, MAIN_EVENT_TOGGLE_CHAT);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Application::ToggleChatStateWithVision() {
|
void Application::ToggleChatStateWithVision() {
|
||||||
vision_text_mode_enabled_.store(true);
|
vision_text_mode_enabled_.store(true);
|
||||||
|
vision_frame_sent_for_current_listen_.store(false);
|
||||||
xEventGroupSetBits(event_group_, MAIN_EVENT_TOGGLE_CHAT);
|
xEventGroupSetBits(event_group_, MAIN_EVENT_TOGGLE_CHAT);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -688,6 +698,7 @@ bool Application::IsVisionTextModeEnabled() const {
|
|||||||
|
|
||||||
void Application::StartListening() {
|
void Application::StartListening() {
|
||||||
vision_text_mode_enabled_.store(false);
|
vision_text_mode_enabled_.store(false);
|
||||||
|
vision_frame_sent_for_current_listen_.store(false);
|
||||||
xEventGroupSetBits(event_group_, MAIN_EVENT_START_LISTENING);
|
xEventGroupSetBits(event_group_, MAIN_EVENT_START_LISTENING);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -697,9 +708,6 @@ void Application::StopListening() {
|
|||||||
|
|
||||||
void Application::HandleToggleChatEvent() {
|
void Application::HandleToggleChatEvent() {
|
||||||
auto state = GetDeviceState();
|
auto state = GetDeviceState();
|
||||||
if (state != kDeviceStateIdle) {
|
|
||||||
vision_text_mode_enabled_.store(false);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (state == kDeviceStateActivating) {
|
if (state == kDeviceStateActivating) {
|
||||||
SetDeviceState(kDeviceStateIdle);
|
SetDeviceState(kDeviceStateIdle);
|
||||||
@ -896,6 +904,7 @@ void Application::HandleStateChangedEvent() {
|
|||||||
switch (new_state) {
|
switch (new_state) {
|
||||||
case kDeviceStateUnknown:
|
case kDeviceStateUnknown:
|
||||||
case kDeviceStateIdle:
|
case kDeviceStateIdle:
|
||||||
|
vision_frame_sent_for_current_listen_.store(false);
|
||||||
display->SetStatus(Lang::Strings::STANDBY);
|
display->SetStatus(Lang::Strings::STANDBY);
|
||||||
display->ClearChatMessages(); // Clear messages first
|
display->ClearChatMessages(); // Clear messages first
|
||||||
display->SetEmotion("neutral"); // Then set emotion (wechat mode checks child count)
|
display->SetEmotion("neutral"); // Then set emotion (wechat mode checks child count)
|
||||||
@ -908,6 +917,8 @@ void Application::HandleStateChangedEvent() {
|
|||||||
display->SetChatMessage("system", "");
|
display->SetChatMessage("system", "");
|
||||||
break;
|
break;
|
||||||
case kDeviceStateListening:
|
case kDeviceStateListening:
|
||||||
|
vad_speaking_.store(false);
|
||||||
|
vision_frame_sent_for_current_listen_.store(false);
|
||||||
display->SetStatus(Lang::Strings::LISTENING);
|
display->SetStatus(Lang::Strings::LISTENING);
|
||||||
display->SetEmotion("neutral");
|
display->SetEmotion("neutral");
|
||||||
|
|
||||||
@ -919,9 +930,6 @@ void Application::HandleStateChangedEvent() {
|
|||||||
audio_service_.WaitForPlaybackQueueEmpty();
|
audio_service_.WaitForPlaybackQueueEmpty();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vision_text_mode_enabled_.load()) {
|
|
||||||
SendCurrentVisionFrame();
|
|
||||||
}
|
|
||||||
// Send the start listening command
|
// Send the start listening command
|
||||||
protocol_->SendStartListening(listening_mode_);
|
protocol_->SendStartListening(listening_mode_);
|
||||||
audio_service_.EnableVoiceProcessing(true);
|
audio_service_.EnableVoiceProcessing(true);
|
||||||
@ -961,24 +969,25 @@ void Application::HandleStateChangedEvent() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Application::SendCurrentVisionFrame() {
|
bool Application::SendCurrentVisionFrame() {
|
||||||
if (!protocol_ || !protocol_->IsAudioChannelOpened()) {
|
if (!protocol_ || !protocol_->IsAudioChannelOpened()) {
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto camera = Board::GetInstance().GetCamera();
|
auto camera = Board::GetInstance().GetCamera();
|
||||||
if (camera == nullptr) {
|
if (camera == nullptr) {
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string jpeg_data;
|
std::string jpeg_data;
|
||||||
if (!camera->CaptureToJpeg(jpeg_data, true)) {
|
if (!camera->CaptureToJpeg(jpeg_data, true)) {
|
||||||
ESP_LOGW(TAG, "Failed to capture vision frame");
|
ESP_LOGW(TAG, "Failed to capture vision frame");
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
protocol_->SendVisionFrame(jpeg_data);
|
protocol_->SendVisionFrame(jpeg_data);
|
||||||
ESP_LOGI(TAG, "Sent vision frame, size=%u bytes", static_cast<unsigned>(jpeg_data.size()));
|
ESP_LOGI(TAG, "Sent vision frame, size=%u bytes", static_cast<unsigned>(jpeg_data.size()));
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Application::Schedule(std::function<void()>&& callback) {
|
void Application::Schedule(std::function<void()>&& callback) {
|
||||||
@ -999,6 +1008,8 @@ void Application::AbortSpeaking(AbortReason reason) {
|
|||||||
|
|
||||||
void Application::SetListeningMode(ListeningMode mode) {
|
void Application::SetListeningMode(ListeningMode mode) {
|
||||||
listening_mode_ = mode;
|
listening_mode_ = mode;
|
||||||
|
vad_speaking_.store(false);
|
||||||
|
vision_frame_sent_for_current_listen_.store(false);
|
||||||
SetDeviceState(kDeviceStateListening);
|
SetDeviceState(kDeviceStateListening);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -148,6 +148,8 @@ private:
|
|||||||
bool assets_version_checked_ = false;
|
bool assets_version_checked_ = false;
|
||||||
bool play_popup_on_listening_ = false; // Flag to play popup sound after state changes to listening
|
bool play_popup_on_listening_ = false; // Flag to play popup sound after state changes to listening
|
||||||
std::atomic<bool> vision_text_mode_enabled_ = false;
|
std::atomic<bool> vision_text_mode_enabled_ = false;
|
||||||
|
std::atomic<bool> vad_speaking_ = false;
|
||||||
|
std::atomic<bool> vision_frame_sent_for_current_listen_ = false;
|
||||||
int clock_ticks_ = 0;
|
int clock_ticks_ = 0;
|
||||||
TaskHandle_t activation_task_handle_ = nullptr;
|
TaskHandle_t activation_task_handle_ = nullptr;
|
||||||
|
|
||||||
@ -163,7 +165,7 @@ private:
|
|||||||
void HandleWakeWordDetectedEvent();
|
void HandleWakeWordDetectedEvent();
|
||||||
void ContinueOpenAudioChannel(ListeningMode mode);
|
void ContinueOpenAudioChannel(ListeningMode mode);
|
||||||
void ContinueWakeWordInvoke(const std::string& wake_word);
|
void ContinueWakeWordInvoke(const std::string& wake_word);
|
||||||
void SendCurrentVisionFrame();
|
bool SendCurrentVisionFrame();
|
||||||
|
|
||||||
// Activation task (runs in background)
|
// Activation task (runs in background)
|
||||||
void ActivationTask();
|
void ActivationTask();
|
||||||
|
|||||||
@ -344,21 +344,23 @@ private:
|
|||||||
vTaskDelay(pdMS_TO_TICKS(BACKGROUND_VISION_INITIAL_DELAY_MS));
|
vTaskDelay(pdMS_TO_TICKS(BACKGROUND_VISION_INITIAL_DELAY_MS));
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
|
if (!Application::GetInstance().IsVisionTextModeEnabled()) {
|
||||||
|
vTaskDelay(pdMS_TO_TICKS(BACKGROUND_VISION_SAMPLE_INTERVAL_MS));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (board->camera_ == nullptr) {
|
if (board->camera_ == nullptr) {
|
||||||
vTaskDelay(pdMS_TO_TICKS(BACKGROUND_VISION_SAMPLE_INTERVAL_MS));
|
vTaskDelay(pdMS_TO_TICKS(BACKGROUND_VISION_SAMPLE_INTERVAL_MS));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool capture_ok = Application::GetInstance().IsVisionTextModeEnabled()
|
if (board->camera_->Capture()) {
|
||||||
? board->camera_->Capture()
|
|
||||||
: board->camera_->CaptureBackground();
|
|
||||||
if (capture_ok) {
|
|
||||||
if (!has_logged_success) {
|
if (!has_logged_success) {
|
||||||
ESP_LOGI(TAG, "Background vision sampler started");
|
ESP_LOGI(TAG, "Vision preview sampler started");
|
||||||
has_logged_success = true;
|
has_logged_success = true;
|
||||||
}
|
}
|
||||||
} else if (!has_logged_failure) {
|
} else if (!has_logged_failure) {
|
||||||
ESP_LOGW(TAG, "Background vision sampler is waiting for camera");
|
ESP_LOGW(TAG, "Vision preview sampler is waiting for camera");
|
||||||
has_logged_failure = true;
|
has_logged_failure = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user