Files
xiaozhi-esp32/main/audio/wake_words/afe_wake_word.h
Xiaoxia 2b025c4ea6 Enhance audio processing and wake word detection (#1739)
* Enhance audio processing and wake word detection

- Set task priority in Application::Run to improve responsiveness.
- Log detected wake words with their state in HandleWakeWordDetectedEvent.
- Streamline audio feeding in AudioService to handle both wake word and audio processor events.
- Implement input buffering in AfeAudioProcessor, AfeWakeWord, CustomWakeWord, and EspWakeWord to manage audio data more efficiently.
- Clear input buffers on stop to prevent residual data issues.

* Refactor audio processing to enhance thread safety and state management

- Implement early return checks in Feed methods of AfeAudioProcessor, AfeWakeWord, CustomWakeWord, and EspWakeWord to prevent processing when not running.
- Introduce std::atomic for running state in CustomWakeWord and EspWakeWord to ensure thread-safe access.
- Consolidate input buffer management with mutex locks to avoid race conditions during Stop and Feed operations.

* Refactor listening mode handling and wake word detection configuration

- Replace direct mode setting logic with a new GetDefaultListeningMode method for improved clarity and maintainability.
- Update HandleToggleChatEvent, HandleWakeWordDetectedEvent, and ContinueWakeWordInvoke to utilize the new method for determining listening mode.
- Introduce Kconfig option WAKE_WORD_DETECTION_IN_LISTENING to enable or disable wake word detection during listening mode, enhancing configurability.
2026-02-04 14:28:21 +08:00

63 lines
1.8 KiB
C++

#ifndef AFE_WAKE_WORD_H
#define AFE_WAKE_WORD_H
#include <freertos/FreeRTOS.h>
#include <freertos/task.h>
#include <freertos/event_groups.h>
#include <esp_afe_sr_models.h>
#include <esp_nsn_models.h>
#include <model_path.h>
#include <deque>
#include <string>
#include <vector>
#include <functional>
#include <mutex>
#include <condition_variable>
#include "audio_codec.h"
#include "wake_word.h"
class AfeWakeWord : public WakeWord {
public:
AfeWakeWord();
~AfeWakeWord();
bool Initialize(AudioCodec* codec, srmodel_list_t* models_list);
void Feed(const std::vector<int16_t>& data);
void OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback);
void Start();
void Stop();
size_t GetFeedSize();
void EncodeWakeWordData();
bool GetWakeWordOpus(std::vector<uint8_t>& opus);
const std::string& GetLastDetectedWakeWord() const { return last_detected_wake_word_; }
private:
srmodel_list_t *models_ = nullptr;
const esp_afe_sr_iface_t* afe_iface_ = nullptr;
esp_afe_sr_data_t* afe_data_ = nullptr;
char* wakenet_model_ = NULL;
std::vector<std::string> wake_words_;
EventGroupHandle_t event_group_;
std::function<void(const std::string& wake_word)> wake_word_detected_callback_;
AudioCodec* codec_ = nullptr;
std::string last_detected_wake_word_;
std::vector<int16_t> input_buffer_;
std::mutex input_buffer_mutex_;
TaskHandle_t wake_word_encode_task_ = nullptr;
StaticTask_t* wake_word_encode_task_buffer_ = nullptr;
StackType_t* wake_word_encode_task_stack_ = nullptr;
std::deque<std::vector<int16_t>> wake_word_pcm_;
std::deque<std::vector<uint8_t>> wake_word_opus_;
std::mutex wake_word_mutex_;
std::condition_variable wake_word_cv_;
void StoreWakeWordData(const int16_t* data, size_t size);
void AudioDetectionTask();
};
#endif