Enhance audio processing and wake word detection (#1739)

* Enhance audio processing and wake word detection

- Set task priority in Application::Run to improve responsiveness.
- Log detected wake words with their state in HandleWakeWordDetectedEvent.
- Streamline audio feeding in AudioService to handle both wake word and audio processor events.
- Implement input buffering in AfeAudioProcessor, AfeWakeWord, CustomWakeWord, and EspWakeWord to manage audio data more efficiently.
- Clear input buffers on stop to prevent residual data issues.

* Refactor audio processing to enhance thread safety and state management

- Implement early return checks in Feed methods of AfeAudioProcessor, AfeWakeWord, CustomWakeWord, and EspWakeWord to prevent processing when not running.
- Introduce std::atomic for running state in CustomWakeWord and EspWakeWord to ensure thread-safe access.
- Consolidate input buffer management with mutex locks to avoid race conditions during Stop and Feed operations.

* Refactor listening mode handling and wake word detection configuration

- Replace direct mode setting logic with a new GetDefaultListeningMode method for improved clarity and maintainability.
- Update HandleToggleChatEvent, HandleWakeWordDetectedEvent, and ContinueWakeWordInvoke to utilize the new method for determining listening mode.
- Introduce Kconfig option WAKE_WORD_DETECTION_IN_LISTENING to enable or disable wake word detection during listening mode, enhancing configurability.
This commit is contained in:
Xiaoxia
2026-02-04 14:28:21 +08:00
committed by GitHub
parent 37110a9d05
commit 2b025c4ea6
13 changed files with 167 additions and 61 deletions

View File

@ -54,21 +54,44 @@ void EspWakeWord::Start() {
void EspWakeWord::Stop() {
running_ = false;
std::lock_guard<std::mutex> lock(input_buffer_mutex_);
input_buffer_.clear();
}
void EspWakeWord::Feed(const std::vector<int16_t>& data) {
if (wakenet_data_ == nullptr || !running_) {
if (wakenet_data_ == nullptr) {
return;
}
int res = wakenet_iface_->detect(wakenet_data_, (int16_t *)data.data());
if (res > 0) {
last_detected_wake_word_ = wakenet_iface_->get_word_name(wakenet_data_, res);
running_ = false;
std::lock_guard<std::mutex> lock(input_buffer_mutex_);
// Check running state inside lock to avoid TOCTOU race with Stop()
if (!running_) {
return;
}
if (wake_word_detected_callback_) {
wake_word_detected_callback_(last_detected_wake_word_);
if (codec_->input_channels() == 2) {
for (size_t i = 0; i < data.size(); i += 2) {
input_buffer_.push_back(data[i]);
}
} else {
input_buffer_.insert(input_buffer_.end(), data.begin(), data.end());
}
int chunksize = wakenet_iface_->get_samp_chunksize(wakenet_data_);
while (input_buffer_.size() >= chunksize) {
int res = wakenet_iface_->detect(wakenet_data_, input_buffer_.data());
if (res > 0) {
last_detected_wake_word_ = wakenet_iface_->get_word_name(wakenet_data_, res);
running_ = false;
input_buffer_.clear();
if (wake_word_detected_callback_) {
wake_word_detected_callback_(last_detected_wake_word_);
}
break;
}
input_buffer_.erase(input_buffer_.begin(), input_buffer_.begin() + chunksize);
}
}