Files
xiaozhi-esp32/main/protocols/protocol.cc
2026-05-25 17:21:11 +08:00

125 lines
4.0 KiB
C++

#include "protocol.h"
#include <esp_log.h>
#include <mbedtls/base64.h>
#define TAG "Protocol"
static std::string Base64Encode(const std::string& data) {
size_t encoded_length = 0;
size_t output_length = 0;
mbedtls_base64_encode(nullptr, 0, &encoded_length,
reinterpret_cast<const unsigned char*>(data.data()), data.size());
std::string result(encoded_length, 0);
mbedtls_base64_encode(reinterpret_cast<unsigned char*>(result.data()), result.size(), &output_length,
reinterpret_cast<const unsigned char*>(data.data()), data.size());
result.resize(output_length);
return result;
}
void Protocol::OnIncomingJson(std::function<void(const cJSON* root)> callback) {
on_incoming_json_ = callback;
}
void Protocol::OnIncomingAudio(std::function<void(std::unique_ptr<AudioStreamPacket> packet)> callback) {
on_incoming_audio_ = callback;
}
void Protocol::OnAudioChannelOpened(std::function<void()> callback) {
on_audio_channel_opened_ = callback;
}
void Protocol::OnAudioChannelClosed(std::function<void()> callback) {
on_audio_channel_closed_ = callback;
}
void Protocol::OnNetworkError(std::function<void(const std::string& message)> callback) {
on_network_error_ = callback;
}
void Protocol::OnConnected(std::function<void()> callback) {
on_connected_ = callback;
}
void Protocol::OnDisconnected(std::function<void()> callback) {
on_disconnected_ = callback;
}
void Protocol::SetError(const std::string& message) {
error_occurred_ = true;
if (on_network_error_ != nullptr) {
on_network_error_(message);
}
}
void Protocol::SendAbortSpeaking(AbortReason reason) {
std::string message = "{\"session_id\":\"" + session_id_ + "\",\"type\":\"abort\"";
if (reason == kAbortReasonWakeWordDetected) {
message += ",\"reason\":\"wake_word_detected\"";
}
message += "}";
SendText(message);
}
void Protocol::SendWakeWordDetected(const std::string& wake_word) {
std::string json = "{\"session_id\":\"" + session_id_ +
"\",\"type\":\"listen\",\"state\":\"detect\",\"text\":\"" + wake_word + "\"}";
SendText(json);
}
void Protocol::SendStartListening(ListeningMode mode) {
std::string message = "{\"session_id\":\"" + session_id_ + "\"";
message += ",\"type\":\"listen\",\"state\":\"start\"";
if (mode == kListeningModeRealtime) {
message += ",\"mode\":\"realtime\"";
} else if (mode == kListeningModeAutoStop) {
message += ",\"mode\":\"auto\"";
} else {
message += ",\"mode\":\"manual\"";
}
message += "}";
SendText(message);
}
void Protocol::SendStopListening() {
std::string message = "{\"session_id\":\"" + session_id_ + "\",\"type\":\"listen\",\"state\":\"stop\"}";
SendText(message);
}
void Protocol::SendMcpMessage(const std::string& payload) {
std::string message = "{\"session_id\":\"" + session_id_ + "\",\"type\":\"mcp\",\"payload\":" + payload + "}";
SendText(message);
}
void Protocol::SendVisionFrame(const std::string& jpeg_data) {
if (jpeg_data.empty()) {
return;
}
cJSON* root = cJSON_CreateObject();
cJSON_AddStringToObject(root, "session_id", session_id_.c_str());
cJSON_AddStringToObject(root, "type", "vision");
cJSON_AddStringToObject(root, "state", "frame");
cJSON_AddStringToObject(root, "mime_type", "image/jpeg");
auto encoded = Base64Encode(jpeg_data);
cJSON_AddStringToObject(root, "image", encoded.c_str());
char* json_str = cJSON_PrintUnformatted(root);
if (json_str != nullptr) {
SendText(json_str);
cJSON_free(json_str);
}
cJSON_Delete(root);
}
bool Protocol::IsTimeout() const {
const int kTimeoutSeconds = 120;
auto now = std::chrono::steady_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::seconds>(now - last_incoming_time_);
bool timeout = duration.count() > kTimeoutSeconds;
if (timeout) {
ESP_LOGE(TAG, "Channel timeout %ld seconds", (long)duration.count());
}
return timeout;
}