v1.8.0: Audio 代码重构与低功耗优化 (#943)
* Reconstruct Audio Code * Remove old IoT implementation * Add MQTT-UDP documentation * OTA升级失败时,可以继续使用
This commit is contained in:
@ -6,26 +6,8 @@
|
||||
#include "mqtt_protocol.h"
|
||||
#include "websocket_protocol.h"
|
||||
#include "font_awesome_symbols.h"
|
||||
#include "iot/thing_manager.h"
|
||||
#include "assets/lang_config.h"
|
||||
#include "mcp_server.h"
|
||||
#include "audio_debugger.h"
|
||||
|
||||
#if CONFIG_USE_AUDIO_PROCESSOR
|
||||
#include "afe_audio_processor.h"
|
||||
#else
|
||||
#include "no_audio_processor.h"
|
||||
#endif
|
||||
|
||||
#if CONFIG_USE_AFE_WAKE_WORD
|
||||
#include "afe_wake_word.h"
|
||||
#elif CONFIG_USE_ESP_WAKE_WORD
|
||||
#include "esp_wake_word.h"
|
||||
#elif CONFIG_USE_CUSTOM_WAKE_WORD
|
||||
#include "custom_wake_word.h"
|
||||
#else
|
||||
#include "no_wake_word.h"
|
||||
#endif
|
||||
|
||||
#include <cstring>
|
||||
#include <esp_log.h>
|
||||
@ -53,7 +35,6 @@ static const char* const STATE_STRINGS[] = {
|
||||
|
||||
Application::Application() {
|
||||
event_group_ = xEventGroupCreate();
|
||||
background_task_ = new BackgroundTask(4096 * 7);
|
||||
|
||||
#if CONFIG_USE_DEVICE_AEC
|
||||
aec_mode_ = kAecOnDeviceSide;
|
||||
@ -63,22 +44,6 @@ Application::Application() {
|
||||
aec_mode_ = kAecOff;
|
||||
#endif
|
||||
|
||||
#if CONFIG_USE_AUDIO_PROCESSOR
|
||||
audio_processor_ = std::make_unique<AfeAudioProcessor>();
|
||||
#else
|
||||
audio_processor_ = std::make_unique<NoAudioProcessor>();
|
||||
#endif
|
||||
|
||||
#if CONFIG_USE_AFE_WAKE_WORD
|
||||
wake_word_ = std::make_unique<AfeWakeWord>();
|
||||
#elif CONFIG_USE_ESP_WAKE_WORD
|
||||
wake_word_ = std::make_unique<EspWakeWord>();
|
||||
#elif CONFIG_USE_CUSTOM_WAKE_WORD
|
||||
wake_word_ = std::make_unique<CustomWakeWord>();
|
||||
#else
|
||||
wake_word_ = std::make_unique<NoWakeWord>();
|
||||
#endif
|
||||
|
||||
esp_timer_create_args_t clock_timer_args = {
|
||||
.callback = [](void* arg) {
|
||||
Application* app = (Application*)arg;
|
||||
@ -97,9 +62,6 @@ Application::~Application() {
|
||||
esp_timer_stop(clock_timer_handle_);
|
||||
esp_timer_delete(clock_timer_handle_);
|
||||
}
|
||||
if (background_task_ != nullptr) {
|
||||
delete background_task_;
|
||||
}
|
||||
vEventGroupDelete(event_group_);
|
||||
}
|
||||
|
||||
@ -108,9 +70,10 @@ void Application::CheckNewVersion(Ota& ota) {
|
||||
int retry_count = 0;
|
||||
int retry_delay = 10; // 初始重试延迟为10秒
|
||||
|
||||
auto& board = Board::GetInstance();
|
||||
while (true) {
|
||||
SetDeviceState(kDeviceStateActivating);
|
||||
auto display = Board::GetInstance().GetDisplay();
|
||||
auto display = board.GetDisplay();
|
||||
display->SetStatus(Lang::Strings::CHECKING_NEW_VERSION);
|
||||
|
||||
if (!ota.CheckVersion()) {
|
||||
@ -148,40 +111,38 @@ void Application::CheckNewVersion(Ota& ota) {
|
||||
std::string message = std::string(Lang::Strings::NEW_VERSION) + ota.GetFirmwareVersion();
|
||||
display->SetChatMessage("system", message.c_str());
|
||||
|
||||
auto& board = Board::GetInstance();
|
||||
board.SetPowerSaveMode(false);
|
||||
wake_word_->StopDetection();
|
||||
// 预先关闭音频输出,避免升级过程有音频操作
|
||||
auto codec = board.GetAudioCodec();
|
||||
codec->EnableInput(false);
|
||||
codec->EnableOutput(false);
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
audio_decode_queue_.clear();
|
||||
}
|
||||
background_task_->WaitForCompletion();
|
||||
delete background_task_;
|
||||
background_task_ = nullptr;
|
||||
audio_service_.Stop();
|
||||
vTaskDelay(pdMS_TO_TICKS(1000));
|
||||
|
||||
ota.StartUpgrade([display](int progress, size_t speed) {
|
||||
bool upgrade_success = ota.StartUpgrade([display](int progress, size_t speed) {
|
||||
char buffer[64];
|
||||
snprintf(buffer, sizeof(buffer), "%d%% %uKB/s", progress, speed / 1024);
|
||||
display->SetChatMessage("system", buffer);
|
||||
});
|
||||
|
||||
// If upgrade success, the device will reboot and never reach here
|
||||
display->SetStatus(Lang::Strings::UPGRADE_FAILED);
|
||||
ESP_LOGI(TAG, "Firmware upgrade failed...");
|
||||
vTaskDelay(pdMS_TO_TICKS(3000));
|
||||
Reboot();
|
||||
return;
|
||||
if (!upgrade_success) {
|
||||
// Upgrade failed, restart audio service and continue running
|
||||
ESP_LOGE(TAG, "Firmware upgrade failed, restarting audio service and continuing operation...");
|
||||
audio_service_.Start(); // Restart audio service
|
||||
board.SetPowerSaveMode(true); // Restore power save mode
|
||||
Alert(Lang::Strings::ERROR, Lang::Strings::UPGRADE_FAILED, "sad", Lang::Sounds::P3_EXCLAMATION);
|
||||
vTaskDelay(pdMS_TO_TICKS(3000));
|
||||
// Continue to normal operation (don't break, just fall through)
|
||||
} else {
|
||||
// Upgrade success, reboot immediately
|
||||
ESP_LOGI(TAG, "Firmware upgrade successful, rebooting...");
|
||||
display->SetChatMessage("system", "Upgrade successful, rebooting...");
|
||||
vTaskDelay(pdMS_TO_TICKS(1000)); // Brief pause to show message
|
||||
Reboot();
|
||||
return; // This line will never be reached after reboot
|
||||
}
|
||||
}
|
||||
|
||||
// No new version, mark the current version as valid
|
||||
ota.MarkCurrentVersionValid();
|
||||
if (!ota.HasActivationCode() && !ota.HasActivationChallenge()) {
|
||||
xEventGroupSetBits(event_group_, CHECK_NEW_VERSION_DONE_EVENT);
|
||||
xEventGroupSetBits(event_group_, MAIN_EVENT_CHECK_NEW_VERSION_DONE);
|
||||
// Exit the loop if done checking new version
|
||||
break;
|
||||
}
|
||||
@ -197,7 +158,7 @@ void Application::CheckNewVersion(Ota& ota) {
|
||||
ESP_LOGI(TAG, "Activating... %d/%d", i + 1, 10);
|
||||
esp_err_t err = ota.Activate();
|
||||
if (err == ESP_OK) {
|
||||
xEventGroupSetBits(event_group_, CHECK_NEW_VERSION_DONE_EVENT);
|
||||
xEventGroupSetBits(event_group_, MAIN_EVENT_CHECK_NEW_VERSION_DONE);
|
||||
break;
|
||||
} else if (err == ESP_ERR_TIMEOUT) {
|
||||
vTaskDelay(pdMS_TO_TICKS(3000));
|
||||
@ -236,7 +197,7 @@ void Application::ShowActivationCode(const std::string& code, const std::string&
|
||||
auto it = std::find_if(digit_sounds.begin(), digit_sounds.end(),
|
||||
[digit](const digit_sound& ds) { return ds.digit == digit; });
|
||||
if (it != digit_sounds.end()) {
|
||||
PlaySound(it->sound);
|
||||
audio_service_.PlaySound(it->sound);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -248,8 +209,7 @@ void Application::Alert(const char* status, const char* message, const char* emo
|
||||
display->SetEmotion(emotion);
|
||||
display->SetChatMessage("system", message);
|
||||
if (!sound.empty()) {
|
||||
ResetDecoder();
|
||||
PlaySound(sound);
|
||||
audio_service_.PlaySound(sound);
|
||||
}
|
||||
}
|
||||
|
||||
@ -262,59 +222,17 @@ void Application::DismissAlert() {
|
||||
}
|
||||
}
|
||||
|
||||
void Application::PlaySound(const std::string_view& sound) {
|
||||
// Wait for the previous sound to finish
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
audio_decode_cv_.wait(lock, [this]() {
|
||||
return audio_decode_queue_.empty();
|
||||
});
|
||||
}
|
||||
background_task_->WaitForCompletion();
|
||||
|
||||
const char* data = sound.data();
|
||||
size_t size = sound.size();
|
||||
for (const char* p = data; p < data + size; ) {
|
||||
auto p3 = (BinaryProtocol3*)p;
|
||||
p += sizeof(BinaryProtocol3);
|
||||
|
||||
auto payload_size = ntohs(p3->payload_size);
|
||||
AudioStreamPacket packet;
|
||||
packet.sample_rate = 16000;
|
||||
packet.frame_duration = 60;
|
||||
packet.payload.resize(payload_size);
|
||||
memcpy(packet.payload.data(), p3->payload, payload_size);
|
||||
p += payload_size;
|
||||
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
audio_decode_queue_.emplace_back(std::move(packet));
|
||||
}
|
||||
}
|
||||
|
||||
void Application::EnterAudioTestingMode() {
|
||||
ESP_LOGI(TAG, "Entering audio testing mode");
|
||||
ResetDecoder();
|
||||
SetDeviceState(kDeviceStateAudioTesting);
|
||||
}
|
||||
|
||||
void Application::ExitAudioTestingMode() {
|
||||
ESP_LOGI(TAG, "Exiting audio testing mode");
|
||||
SetDeviceState(kDeviceStateWifiConfiguring);
|
||||
// Copy audio_testing_queue_ to audio_decode_queue_
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
audio_decode_queue_ = std::move(audio_testing_queue_);
|
||||
audio_decode_cv_.notify_all();
|
||||
}
|
||||
|
||||
void Application::ToggleChatState() {
|
||||
if (device_state_ == kDeviceStateActivating) {
|
||||
SetDeviceState(kDeviceStateIdle);
|
||||
return;
|
||||
} else if (device_state_ == kDeviceStateWifiConfiguring) {
|
||||
EnterAudioTestingMode();
|
||||
audio_service_.EnableAudioTesting(true);
|
||||
SetDeviceState(kDeviceStateAudioTesting);
|
||||
return;
|
||||
} else if (device_state_ == kDeviceStateAudioTesting) {
|
||||
ExitAudioTestingMode();
|
||||
audio_service_.EnableAudioTesting(false);
|
||||
SetDeviceState(kDeviceStateWifiConfiguring);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -350,7 +268,8 @@ void Application::StartListening() {
|
||||
SetDeviceState(kDeviceStateIdle);
|
||||
return;
|
||||
} else if (device_state_ == kDeviceStateWifiConfiguring) {
|
||||
EnterAudioTestingMode();
|
||||
audio_service_.EnableAudioTesting(true);
|
||||
SetDeviceState(kDeviceStateAudioTesting);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -380,7 +299,8 @@ void Application::StartListening() {
|
||||
|
||||
void Application::StopListening() {
|
||||
if (device_state_ == kDeviceStateAudioTesting) {
|
||||
ExitAudioTestingMode();
|
||||
audio_service_.EnableAudioTesting(false);
|
||||
SetDeviceState(kDeviceStateWifiConfiguring);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -409,43 +329,22 @@ void Application::Start() {
|
||||
/* Setup the display */
|
||||
auto display = board.GetDisplay();
|
||||
|
||||
/* Setup the audio codec */
|
||||
/* Setup the audio service */
|
||||
auto codec = board.GetAudioCodec();
|
||||
opus_decoder_ = std::make_unique<OpusDecoderWrapper>(codec->output_sample_rate(), 1, OPUS_FRAME_DURATION_MS);
|
||||
opus_encoder_ = std::make_unique<OpusEncoderWrapper>(16000, 1, OPUS_FRAME_DURATION_MS);
|
||||
opus_encoder_->SetComplexity(0);
|
||||
if (aec_mode_ != kAecOff) {
|
||||
ESP_LOGI(TAG, "AEC mode: %d, setting opus encoder complexity to 0", aec_mode_);
|
||||
opus_encoder_->SetComplexity(0);
|
||||
} else {
|
||||
#if CONFIG_USE_AUDIO_PROCESSOR
|
||||
ESP_LOGI(TAG, "Audio processor detected, setting opus encoder complexity to 5");
|
||||
opus_encoder_->SetComplexity(5);
|
||||
#else
|
||||
ESP_LOGI(TAG, "Audio processor not detected, setting opus encoder complexity to 0");
|
||||
opus_encoder_->SetComplexity(0);
|
||||
#endif
|
||||
}
|
||||
audio_service_.Initialize(codec);
|
||||
audio_service_.Start();
|
||||
|
||||
if (codec->input_sample_rate() != 16000) {
|
||||
input_resampler_.Configure(codec->input_sample_rate(), 16000);
|
||||
reference_resampler_.Configure(codec->input_sample_rate(), 16000);
|
||||
}
|
||||
codec->Start();
|
||||
|
||||
#if CONFIG_USE_AUDIO_PROCESSOR
|
||||
xTaskCreatePinnedToCore([](void* arg) {
|
||||
Application* app = (Application*)arg;
|
||||
app->AudioLoop();
|
||||
vTaskDelete(NULL);
|
||||
}, "audio_loop", 4096 * 2, this, 8, &audio_loop_task_handle_, 1);
|
||||
#else
|
||||
xTaskCreate([](void* arg) {
|
||||
Application* app = (Application*)arg;
|
||||
app->AudioLoop();
|
||||
vTaskDelete(NULL);
|
||||
}, "audio_loop", 4096 * 2, this, 8, &audio_loop_task_handle_);
|
||||
#endif
|
||||
AudioServiceCallbacks callbacks;
|
||||
callbacks.on_send_queue_available = [this]() {
|
||||
xEventGroupSetBits(event_group_, MAIN_EVENT_SEND_AUDIO);
|
||||
};
|
||||
callbacks.on_wake_word_detected = [this](const std::string& wake_word) {
|
||||
xEventGroupSetBits(event_group_, MAIN_EVENT_WAKE_WORD_DETECTED);
|
||||
};
|
||||
callbacks.on_vad_change = [this](bool speaking) {
|
||||
xEventGroupSetBits(event_group_, MAIN_EVENT_VAD_CHANGE);
|
||||
};
|
||||
audio_service_.SetCallbacks(callbacks);
|
||||
|
||||
/* Start the clock timer to update the status bar */
|
||||
esp_timer_start_periodic(clock_timer_handle_, 1000000);
|
||||
@ -464,9 +363,7 @@ void Application::Start() {
|
||||
display->SetStatus(Lang::Strings::LOADING_PROTOCOL);
|
||||
|
||||
// Add MCP common tools before initializing the protocol
|
||||
#if CONFIG_IOT_PROTOCOL_MCP
|
||||
McpServer::GetInstance().AddCommonTools();
|
||||
#endif
|
||||
|
||||
if (ota.HasMqttConfig()) {
|
||||
protocol_ = std::make_unique<MqttProtocol>();
|
||||
@ -478,13 +375,12 @@ void Application::Start() {
|
||||
}
|
||||
|
||||
protocol_->OnNetworkError([this](const std::string& message) {
|
||||
SetDeviceState(kDeviceStateIdle);
|
||||
Alert(Lang::Strings::ERROR, message.c_str(), "sad", Lang::Sounds::P3_EXCLAMATION);
|
||||
last_error_message_ = message;
|
||||
xEventGroupSetBits(event_group_, MAIN_EVENT_ERROR);
|
||||
});
|
||||
protocol_->OnIncomingAudio([this](AudioStreamPacket&& packet) {
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
if (device_state_ == kDeviceStateSpeaking && audio_decode_queue_.size() < MAX_AUDIO_PACKETS_IN_QUEUE) {
|
||||
audio_decode_queue_.emplace_back(std::move(packet));
|
||||
protocol_->OnIncomingAudio([this](std::unique_ptr<AudioStreamPacket> packet) {
|
||||
if (device_state_ == kDeviceStateSpeaking) {
|
||||
audio_service_.PushPacketToDecodeQueue(std::move(packet));
|
||||
}
|
||||
});
|
||||
protocol_->OnAudioChannelOpened([this, codec, &board]() {
|
||||
@ -493,15 +389,6 @@ void Application::Start() {
|
||||
ESP_LOGW(TAG, "Server sample rate %d does not match device output sample rate %d, resampling may cause distortion",
|
||||
protocol_->server_sample_rate(), codec->output_sample_rate());
|
||||
}
|
||||
|
||||
#if CONFIG_IOT_PROTOCOL_XIAOZHI
|
||||
auto& thing_manager = iot::ThingManager::GetInstance();
|
||||
protocol_->SendIotDescriptors(thing_manager.GetDescriptorsJson());
|
||||
std::string states;
|
||||
if (thing_manager.GetStatesJson(states, false)) {
|
||||
protocol_->SendIotStates(states);
|
||||
}
|
||||
#endif
|
||||
});
|
||||
protocol_->OnAudioChannelClosed([this, &board]() {
|
||||
board.SetPowerSaveMode(true);
|
||||
@ -525,7 +412,6 @@ void Application::Start() {
|
||||
});
|
||||
} else if (strcmp(state->valuestring, "stop") == 0) {
|
||||
Schedule([this]() {
|
||||
background_task_->WaitForCompletion();
|
||||
if (device_state_ == kDeviceStateSpeaking) {
|
||||
if (listening_mode_ == kListeningModeManualStop) {
|
||||
SetDeviceState(kDeviceStateIdle);
|
||||
@ -558,36 +444,11 @@ void Application::Start() {
|
||||
display->SetEmotion(emotion_str.c_str());
|
||||
});
|
||||
}
|
||||
#if CONFIG_RECEIVE_CUSTOM_MESSAGE
|
||||
} else if (strcmp(type->valuestring, "custom") == 0) {
|
||||
auto payload = cJSON_GetObjectItem(root, "payload");
|
||||
ESP_LOGI(TAG, "Received custom message: %s", cJSON_PrintUnformatted(root));
|
||||
if (cJSON_IsObject(payload)) {
|
||||
Schedule([this, display, payload_str = std::string(cJSON_PrintUnformatted(payload))]() {
|
||||
display->SetChatMessage("system", payload_str.c_str());
|
||||
});
|
||||
} else {
|
||||
ESP_LOGW(TAG, "Invalid custom message format: missing payload");
|
||||
}
|
||||
#endif
|
||||
#if CONFIG_IOT_PROTOCOL_MCP
|
||||
} else if (strcmp(type->valuestring, "mcp") == 0) {
|
||||
auto payload = cJSON_GetObjectItem(root, "payload");
|
||||
if (cJSON_IsObject(payload)) {
|
||||
McpServer::GetInstance().ParseMessage(payload);
|
||||
}
|
||||
#endif
|
||||
#if CONFIG_IOT_PROTOCOL_XIAOZHI
|
||||
} else if (strcmp(type->valuestring, "iot") == 0) {
|
||||
auto commands = cJSON_GetObjectItem(root, "commands");
|
||||
if (cJSON_IsArray(commands)) {
|
||||
auto& thing_manager = iot::ThingManager::GetInstance();
|
||||
for (int i = 0; i < cJSON_GetArraySize(commands); ++i) {
|
||||
auto command = cJSON_GetArrayItem(commands, i);
|
||||
thing_manager.Invoke(command);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} else if (strcmp(type->valuestring, "system") == 0) {
|
||||
auto command = cJSON_GetObjectItem(root, "command");
|
||||
if (cJSON_IsString(command)) {
|
||||
@ -610,112 +471,24 @@ void Application::Start() {
|
||||
} else {
|
||||
ESP_LOGW(TAG, "Alert command requires status, message and emotion");
|
||||
}
|
||||
#if CONFIG_RECEIVE_CUSTOM_MESSAGE
|
||||
} else if (strcmp(type->valuestring, "custom") == 0) {
|
||||
auto payload = cJSON_GetObjectItem(root, "payload");
|
||||
ESP_LOGI(TAG, "Received custom message: %s", cJSON_PrintUnformatted(root));
|
||||
if (cJSON_IsObject(payload)) {
|
||||
Schedule([this, display, payload_str = std::string(cJSON_PrintUnformatted(payload))]() {
|
||||
display->SetChatMessage("system", payload_str.c_str());
|
||||
});
|
||||
} else {
|
||||
ESP_LOGW(TAG, "Invalid custom message format: missing payload");
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
ESP_LOGW(TAG, "Unknown message type: %s", type->valuestring);
|
||||
}
|
||||
});
|
||||
bool protocol_started = protocol_->Start();
|
||||
|
||||
audio_debugger_ = std::make_unique<AudioDebugger>();
|
||||
audio_processor_->Initialize(codec);
|
||||
audio_processor_->OnOutput([this](std::vector<int16_t>&& data) {
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
if (audio_send_queue_.size() >= MAX_AUDIO_PACKETS_IN_QUEUE) {
|
||||
ESP_LOGW(TAG, "Too many audio packets in queue, drop the newest packet");
|
||||
return;
|
||||
}
|
||||
}
|
||||
background_task_->Schedule([this, data = std::move(data)]() mutable {
|
||||
opus_encoder_->Encode(std::move(data), [this](std::vector<uint8_t>&& opus) {
|
||||
AudioStreamPacket packet;
|
||||
packet.payload = std::move(opus);
|
||||
#ifdef CONFIG_USE_SERVER_AEC
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(timestamp_mutex_);
|
||||
if (!timestamp_queue_.empty()) {
|
||||
packet.timestamp = timestamp_queue_.front();
|
||||
timestamp_queue_.pop_front();
|
||||
} else {
|
||||
packet.timestamp = 0;
|
||||
}
|
||||
|
||||
if (timestamp_queue_.size() > 3) { // 限制队列长度3
|
||||
timestamp_queue_.pop_front(); // 该包发送前先出队保持队列长度
|
||||
return;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
if (audio_send_queue_.size() >= MAX_AUDIO_PACKETS_IN_QUEUE) {
|
||||
ESP_LOGW(TAG, "Too many audio packets in queue, drop the oldest packet");
|
||||
audio_send_queue_.pop_front();
|
||||
}
|
||||
audio_send_queue_.emplace_back(std::move(packet));
|
||||
xEventGroupSetBits(event_group_, SEND_AUDIO_EVENT);
|
||||
});
|
||||
});
|
||||
});
|
||||
audio_processor_->OnVadStateChange([this](bool speaking) {
|
||||
if (device_state_ == kDeviceStateListening) {
|
||||
Schedule([this, speaking]() {
|
||||
if (speaking) {
|
||||
voice_detected_ = true;
|
||||
} else {
|
||||
voice_detected_ = false;
|
||||
}
|
||||
auto led = Board::GetInstance().GetLed();
|
||||
led->OnStateChanged();
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
wake_word_->Initialize(codec);
|
||||
wake_word_->OnWakeWordDetected([this](const std::string& wake_word) {
|
||||
Schedule([this, &wake_word]() {
|
||||
if (!protocol_) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (device_state_ == kDeviceStateIdle) {
|
||||
wake_word_->EncodeWakeWordData();
|
||||
|
||||
if (!protocol_->IsAudioChannelOpened()) {
|
||||
SetDeviceState(kDeviceStateConnecting);
|
||||
if (!protocol_->OpenAudioChannel()) {
|
||||
wake_word_->StartDetection();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
ESP_LOGI(TAG, "Wake word detected: %s", wake_word.c_str());
|
||||
#if CONFIG_USE_AFE_WAKE_WORD || CONFIG_USE_CUSTOM_WAKE_WORD
|
||||
AudioStreamPacket packet;
|
||||
// Encode and send the wake word data to the server
|
||||
while (wake_word_->GetWakeWordOpus(packet.payload)) {
|
||||
protocol_->SendAudio(packet);
|
||||
}
|
||||
// Set the chat state to wake word detected
|
||||
protocol_->SendWakeWordDetected(wake_word);
|
||||
#else
|
||||
// Play the pop up sound to indicate the wake word is detected
|
||||
// And wait 60ms to make sure the queue has been processed by audio task
|
||||
ResetDecoder();
|
||||
PlaySound(Lang::Sounds::P3_POPUP);
|
||||
vTaskDelay(pdMS_TO_TICKS(60));
|
||||
#endif
|
||||
SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
|
||||
} else if (device_state_ == kDeviceStateSpeaking) {
|
||||
AbortSpeaking(kAbortReasonWakeWordDetected);
|
||||
} else if (device_state_ == kDeviceStateActivating) {
|
||||
SetDeviceState(kDeviceStateIdle);
|
||||
}
|
||||
});
|
||||
});
|
||||
wake_word_->StartDetection();
|
||||
|
||||
// Wait for the new version check to finish
|
||||
xEventGroupWaitBits(event_group_, CHECK_NEW_VERSION_DONE_EVENT, pdTRUE, pdFALSE, portMAX_DELAY);
|
||||
SetDeviceState(kDeviceStateIdle);
|
||||
|
||||
has_server_time_ = ota.HasServerTime();
|
||||
@ -724,8 +497,7 @@ void Application::Start() {
|
||||
display->ShowNotification(message.c_str());
|
||||
display->SetChatMessage("system", "");
|
||||
// Play the success sound to indicate the device is ready
|
||||
ResetDecoder();
|
||||
PlaySound(Lang::Sounds::P3_SUCCESS);
|
||||
audio_service_.PlaySound(Lang::Sounds::P3_SUCCESS);
|
||||
}
|
||||
|
||||
// Print heap stats
|
||||
@ -746,19 +518,6 @@ void Application::OnClockTimer() {
|
||||
// SystemInfo::PrintTaskCpuUsage(pdMS_TO_TICKS(1000));
|
||||
// SystemInfo::PrintTaskList();
|
||||
SystemInfo::PrintHeapStats();
|
||||
|
||||
// If we have synchronized server time, set the status to clock "HH:MM" if the device is idle
|
||||
if (has_server_time_) {
|
||||
if (device_state_ == kDeviceStateIdle) {
|
||||
Schedule([this]() {
|
||||
// Set status to clock "HH:MM"
|
||||
time_t now = time(NULL);
|
||||
char time_str[64];
|
||||
strftime(time_str, sizeof(time_str), "%H:%M ", localtime(&now));
|
||||
Board::GetInstance().GetDisplay()->SetStatus(time_str);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -768,7 +527,7 @@ void Application::Schedule(std::function<void()> callback) {
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
main_tasks_.push_back(std::move(callback));
|
||||
}
|
||||
xEventGroupSetBits(event_group_, SCHEDULE_EVENT);
|
||||
xEventGroupSetBits(event_group_, MAIN_EVENT_SCHEDULE);
|
||||
}
|
||||
|
||||
// The Main Event Loop controls the chat state and websocket connection
|
||||
@ -779,20 +538,36 @@ void Application::MainEventLoop() {
|
||||
vTaskPrioritySet(NULL, 3);
|
||||
|
||||
while (true) {
|
||||
auto bits = xEventGroupWaitBits(event_group_, SCHEDULE_EVENT | SEND_AUDIO_EVENT, pdTRUE, pdFALSE, portMAX_DELAY);
|
||||
auto bits = xEventGroupWaitBits(event_group_, MAIN_EVENT_SCHEDULE |
|
||||
MAIN_EVENT_SEND_AUDIO |
|
||||
MAIN_EVENT_WAKE_WORD_DETECTED |
|
||||
MAIN_EVENT_VAD_CHANGE |
|
||||
MAIN_EVENT_ERROR, pdTRUE, pdFALSE, portMAX_DELAY);
|
||||
if (bits & MAIN_EVENT_ERROR) {
|
||||
SetDeviceState(kDeviceStateIdle);
|
||||
Alert(Lang::Strings::ERROR, last_error_message_.c_str(), "sad", Lang::Sounds::P3_EXCLAMATION);
|
||||
}
|
||||
|
||||
if (bits & SEND_AUDIO_EVENT) {
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
auto packets = std::move(audio_send_queue_);
|
||||
lock.unlock();
|
||||
for (auto& packet : packets) {
|
||||
if (!protocol_->SendAudio(packet)) {
|
||||
if (bits & MAIN_EVENT_SEND_AUDIO) {
|
||||
while (auto packet = audio_service_.PopPacketFromSendQueue()) {
|
||||
if (!protocol_->SendAudio(std::move(packet))) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (bits & SCHEDULE_EVENT) {
|
||||
if (bits & MAIN_EVENT_WAKE_WORD_DETECTED) {
|
||||
OnWakeWordDetected();
|
||||
}
|
||||
|
||||
if (bits & MAIN_EVENT_VAD_CHANGE) {
|
||||
if (device_state_ == kDeviceStateListening) {
|
||||
auto led = Board::GetInstance().GetLed();
|
||||
led->OnStateChanged();
|
||||
}
|
||||
}
|
||||
|
||||
if (bits & MAIN_EVENT_SCHEDULE) {
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
auto tasks = std::move(main_tasks_);
|
||||
lock.unlock();
|
||||
@ -803,170 +578,43 @@ void Application::MainEventLoop() {
|
||||
}
|
||||
}
|
||||
|
||||
// The Audio Loop is used to input and output audio data
|
||||
void Application::AudioLoop() {
|
||||
auto codec = Board::GetInstance().GetAudioCodec();
|
||||
while (true) {
|
||||
OnAudioInput();
|
||||
if (codec->output_enabled()) {
|
||||
OnAudioOutput();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Application::OnAudioOutput() {
|
||||
if (busy_decoding_audio_) {
|
||||
void Application::OnWakeWordDetected() {
|
||||
if (!protocol_) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto now = std::chrono::steady_clock::now();
|
||||
auto codec = Board::GetInstance().GetAudioCodec();
|
||||
const int max_silence_seconds = 10;
|
||||
if (device_state_ == kDeviceStateIdle) {
|
||||
audio_service_.EncodeWakeWord();
|
||||
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
if (audio_decode_queue_.empty()) {
|
||||
// Disable the output if there is no audio data for a long time
|
||||
if (device_state_ == kDeviceStateIdle) {
|
||||
auto duration = std::chrono::duration_cast<std::chrono::seconds>(now - last_output_time_).count();
|
||||
if (duration > max_silence_seconds) {
|
||||
codec->EnableOutput(false);
|
||||
if (!protocol_->IsAudioChannelOpened()) {
|
||||
SetDeviceState(kDeviceStateConnecting);
|
||||
if (!protocol_->OpenAudioChannel()) {
|
||||
audio_service_.EnableWakeWordDetection(true);
|
||||
return;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
auto packet = std::move(audio_decode_queue_.front());
|
||||
audio_decode_queue_.pop_front();
|
||||
lock.unlock();
|
||||
audio_decode_cv_.notify_all();
|
||||
|
||||
// Synchronize the sample rate and frame duration
|
||||
SetDecodeSampleRate(packet.sample_rate, packet.frame_duration);
|
||||
|
||||
busy_decoding_audio_ = true;
|
||||
if (!background_task_->Schedule([this, codec, packet = std::move(packet)]() mutable {
|
||||
busy_decoding_audio_ = false;
|
||||
if (aborted_) {
|
||||
return;
|
||||
auto wake_word = audio_service_.GetLastWakeWord();
|
||||
ESP_LOGI(TAG, "Wake word detected: %s", wake_word.c_str());
|
||||
#if CONFIG_USE_AFE_WAKE_WORD || CONFIG_USE_CUSTOM_WAKE_WORD
|
||||
// Encode and send the wake word data to the server
|
||||
while (auto packet = audio_service_.PopWakeWordPacket()) {
|
||||
protocol_->SendAudio(std::move(packet));
|
||||
}
|
||||
|
||||
std::vector<int16_t> pcm;
|
||||
if (!opus_decoder_->Decode(std::move(packet.payload), pcm)) {
|
||||
return;
|
||||
}
|
||||
// Resample if the sample rate is different
|
||||
if (opus_decoder_->sample_rate() != codec->output_sample_rate()) {
|
||||
int target_size = output_resampler_.GetOutputSamples(pcm.size());
|
||||
std::vector<int16_t> resampled(target_size);
|
||||
output_resampler_.Process(pcm.data(), pcm.size(), resampled.data());
|
||||
pcm = std::move(resampled);
|
||||
}
|
||||
codec->OutputData(pcm);
|
||||
#ifdef CONFIG_USE_SERVER_AEC
|
||||
std::lock_guard<std::mutex> lock(timestamp_mutex_);
|
||||
timestamp_queue_.push_back(packet.timestamp);
|
||||
// Set the chat state to wake word detected
|
||||
protocol_->SendWakeWordDetected(wake_word);
|
||||
#else
|
||||
// Play the pop up sound to indicate the wake word is detected
|
||||
audio_service_.PlaySound(Lang::Sounds::P3_POPUP);
|
||||
#endif
|
||||
last_output_time_ = std::chrono::steady_clock::now();
|
||||
})) {
|
||||
busy_decoding_audio_ = false;
|
||||
SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
|
||||
} else if (device_state_ == kDeviceStateSpeaking) {
|
||||
AbortSpeaking(kAbortReasonWakeWordDetected);
|
||||
} else if (device_state_ == kDeviceStateActivating) {
|
||||
SetDeviceState(kDeviceStateIdle);
|
||||
}
|
||||
}
|
||||
|
||||
void Application::OnAudioInput() {
|
||||
if (device_state_ == kDeviceStateAudioTesting) {
|
||||
if (audio_testing_queue_.size() >= AUDIO_TESTING_MAX_DURATION_MS / OPUS_FRAME_DURATION_MS) {
|
||||
ExitAudioTestingMode();
|
||||
return;
|
||||
}
|
||||
std::vector<int16_t> data;
|
||||
int samples = OPUS_FRAME_DURATION_MS * 16000 / 1000;
|
||||
if (ReadAudio(data, 16000, samples)) {
|
||||
background_task_->Schedule([this, data = std::move(data)]() mutable {
|
||||
opus_encoder_->Encode(std::move(data), [this](std::vector<uint8_t>&& opus) {
|
||||
AudioStreamPacket packet;
|
||||
packet.payload = std::move(opus);
|
||||
packet.frame_duration = OPUS_FRAME_DURATION_MS;
|
||||
packet.sample_rate = 16000;
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
audio_testing_queue_.push_back(std::move(packet));
|
||||
});
|
||||
});
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (wake_word_->IsDetectionRunning()) {
|
||||
std::vector<int16_t> data;
|
||||
int samples = wake_word_->GetFeedSize();
|
||||
if (samples > 0) {
|
||||
if (ReadAudio(data, 16000, samples)) {
|
||||
wake_word_->Feed(data);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (audio_processor_->IsRunning()) {
|
||||
std::vector<int16_t> data;
|
||||
int samples = audio_processor_->GetFeedSize();
|
||||
if (samples > 0) {
|
||||
if (ReadAudio(data, 16000, samples)) {
|
||||
audio_processor_->Feed(data);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vTaskDelay(pdMS_TO_TICKS(OPUS_FRAME_DURATION_MS / 2));
|
||||
}
|
||||
|
||||
bool Application::ReadAudio(std::vector<int16_t>& data, int sample_rate, int samples) {
|
||||
auto codec = Board::GetInstance().GetAudioCodec();
|
||||
if (!codec->input_enabled()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (codec->input_sample_rate() != sample_rate) {
|
||||
data.resize(samples * codec->input_sample_rate() / sample_rate);
|
||||
if (!codec->InputData(data)) {
|
||||
return false;
|
||||
}
|
||||
if (codec->input_channels() == 2) {
|
||||
auto mic_channel = std::vector<int16_t>(data.size() / 2);
|
||||
auto reference_channel = std::vector<int16_t>(data.size() / 2);
|
||||
for (size_t i = 0, j = 0; i < mic_channel.size(); ++i, j += 2) {
|
||||
mic_channel[i] = data[j];
|
||||
reference_channel[i] = data[j + 1];
|
||||
}
|
||||
auto resampled_mic = std::vector<int16_t>(input_resampler_.GetOutputSamples(mic_channel.size()));
|
||||
auto resampled_reference = std::vector<int16_t>(reference_resampler_.GetOutputSamples(reference_channel.size()));
|
||||
input_resampler_.Process(mic_channel.data(), mic_channel.size(), resampled_mic.data());
|
||||
reference_resampler_.Process(reference_channel.data(), reference_channel.size(), resampled_reference.data());
|
||||
data.resize(resampled_mic.size() + resampled_reference.size());
|
||||
for (size_t i = 0, j = 0; i < resampled_mic.size(); ++i, j += 2) {
|
||||
data[j] = resampled_mic[i];
|
||||
data[j + 1] = resampled_reference[i];
|
||||
}
|
||||
} else {
|
||||
auto resampled = std::vector<int16_t>(input_resampler_.GetOutputSamples(data.size()));
|
||||
input_resampler_.Process(data.data(), data.size(), resampled.data());
|
||||
data = std::move(resampled);
|
||||
}
|
||||
} else {
|
||||
data.resize(samples);
|
||||
if (!codec->InputData(data)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// 音频调试:发送原始音频数据
|
||||
if (audio_debugger_) {
|
||||
audio_debugger_->Feed(data);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void Application::AbortSpeaking(AbortReason reason) {
|
||||
ESP_LOGI(TAG, "Abort speaking");
|
||||
aborted_ = true;
|
||||
@ -987,8 +635,6 @@ void Application::SetDeviceState(DeviceState state) {
|
||||
auto previous_state = device_state_;
|
||||
device_state_ = state;
|
||||
ESP_LOGI(TAG, "STATE: %s", STATE_STRINGS[device_state_]);
|
||||
// The state is changed, wait for all background tasks to finish
|
||||
background_task_->WaitForCompletion();
|
||||
|
||||
// Send the state change event
|
||||
DeviceStateEventManager::GetInstance().PostStateChangeEvent(previous_state, state);
|
||||
@ -1002,51 +648,39 @@ void Application::SetDeviceState(DeviceState state) {
|
||||
case kDeviceStateIdle:
|
||||
display->SetStatus(Lang::Strings::STANDBY);
|
||||
display->SetEmotion("neutral");
|
||||
audio_processor_->Stop();
|
||||
wake_word_->StartDetection();
|
||||
audio_service_.EnableVoiceProcessing(false);
|
||||
audio_service_.EnableWakeWordDetection(true);
|
||||
break;
|
||||
case kDeviceStateConnecting:
|
||||
display->SetStatus(Lang::Strings::CONNECTING);
|
||||
display->SetEmotion("neutral");
|
||||
display->SetChatMessage("system", "");
|
||||
timestamp_queue_.clear();
|
||||
break;
|
||||
case kDeviceStateListening:
|
||||
display->SetStatus(Lang::Strings::LISTENING);
|
||||
display->SetEmotion("neutral");
|
||||
// Update the IoT states before sending the start listening command
|
||||
#if CONFIG_IOT_PROTOCOL_XIAOZHI
|
||||
UpdateIotStates();
|
||||
#endif
|
||||
|
||||
// Make sure the audio processor is running
|
||||
if (!audio_processor_->IsRunning()) {
|
||||
if (!audio_service_.IsAudioProcessorRunning()) {
|
||||
// Send the start listening command
|
||||
protocol_->SendStartListening(listening_mode_);
|
||||
if (previous_state == kDeviceStateSpeaking) {
|
||||
audio_decode_queue_.clear();
|
||||
audio_decode_cv_.notify_all();
|
||||
// FIXME: Wait for the speaker to empty the buffer
|
||||
vTaskDelay(pdMS_TO_TICKS(120));
|
||||
}
|
||||
opus_encoder_->ResetState();
|
||||
audio_processor_->Start();
|
||||
wake_word_->StopDetection();
|
||||
audio_service_.EnableVoiceProcessing(true);
|
||||
audio_service_.EnableWakeWordDetection(false);
|
||||
}
|
||||
break;
|
||||
case kDeviceStateSpeaking:
|
||||
display->SetStatus(Lang::Strings::SPEAKING);
|
||||
|
||||
if (listening_mode_ != kListeningModeRealtime) {
|
||||
audio_processor_->Stop();
|
||||
audio_service_.EnableVoiceProcessing(false);
|
||||
// Only AFE wake word can be detected in speaking mode
|
||||
#if CONFIG_USE_AFE_WAKE_WORD
|
||||
wake_word_->StartDetection();
|
||||
audio_service_.EnableWakeWordDetection(true);
|
||||
#else
|
||||
wake_word_->StopDetection();
|
||||
audio_service_.EnableWakeWordDetection(false);
|
||||
#endif
|
||||
}
|
||||
ResetDecoder();
|
||||
audio_service_.ResetDecoder();
|
||||
break;
|
||||
default:
|
||||
// Do nothing
|
||||
@ -1054,41 +688,6 @@ void Application::SetDeviceState(DeviceState state) {
|
||||
}
|
||||
}
|
||||
|
||||
void Application::ResetDecoder() {
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
opus_decoder_->ResetState();
|
||||
audio_decode_queue_.clear();
|
||||
audio_decode_cv_.notify_all();
|
||||
last_output_time_ = std::chrono::steady_clock::now();
|
||||
auto codec = Board::GetInstance().GetAudioCodec();
|
||||
codec->EnableOutput(true);
|
||||
}
|
||||
|
||||
void Application::SetDecodeSampleRate(int sample_rate, int frame_duration) {
|
||||
if (opus_decoder_->sample_rate() == sample_rate && opus_decoder_->duration_ms() == frame_duration) {
|
||||
return;
|
||||
}
|
||||
|
||||
opus_decoder_.reset();
|
||||
opus_decoder_ = std::make_unique<OpusDecoderWrapper>(sample_rate, 1, frame_duration);
|
||||
|
||||
auto codec = Board::GetInstance().GetAudioCodec();
|
||||
if (opus_decoder_->sample_rate() != codec->output_sample_rate()) {
|
||||
ESP_LOGI(TAG, "Resampling audio from %d to %d", opus_decoder_->sample_rate(), codec->output_sample_rate());
|
||||
output_resampler_.Configure(opus_decoder_->sample_rate(), codec->output_sample_rate());
|
||||
}
|
||||
}
|
||||
|
||||
void Application::UpdateIotStates() {
|
||||
#if CONFIG_IOT_PROTOCOL_XIAOZHI
|
||||
auto& thing_manager = iot::ThingManager::GetInstance();
|
||||
std::string states;
|
||||
if (thing_manager.GetStatesJson(states, true)) {
|
||||
protocol_->SendIotStates(states);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void Application::Reboot() {
|
||||
ESP_LOGI(TAG, "Rebooting...");
|
||||
esp_restart();
|
||||
@ -1124,6 +723,10 @@ bool Application::CanEnterSleepMode() {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!audio_service_.IsIdle()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Now it is safe to enter sleep mode
|
||||
return true;
|
||||
}
|
||||
@ -1143,15 +746,15 @@ void Application::SetAecMode(AecMode mode) {
|
||||
auto display = board.GetDisplay();
|
||||
switch (aec_mode_) {
|
||||
case kAecOff:
|
||||
audio_processor_->EnableDeviceAec(false);
|
||||
audio_service_.EnableDeviceAec(false);
|
||||
display->ShowNotification(Lang::Strings::RTC_MODE_OFF);
|
||||
break;
|
||||
case kAecOnServerSide:
|
||||
audio_processor_->EnableDeviceAec(false);
|
||||
audio_service_.EnableDeviceAec(false);
|
||||
display->ShowNotification(Lang::Strings::RTC_MODE_ON);
|
||||
break;
|
||||
case kAecOnDeviceSide:
|
||||
audio_processor_->EnableDeviceAec(true);
|
||||
audio_service_.EnableDeviceAec(true);
|
||||
display->ShowNotification(Lang::Strings::RTC_MODE_ON);
|
||||
break;
|
||||
}
|
||||
@ -1162,3 +765,7 @@ void Application::SetAecMode(AecMode mode) {
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void Application::PlaySound(const std::string_view& sound) {
|
||||
audio_service_.PlaySound(sound);
|
||||
}
|
||||
Reference in New Issue
Block a user