5 Commits
main ... cam

Author SHA1 Message Date
fc6302661d feat: support camera capture to livekit 2026-05-25 17:21:11 +08:00
4953244c7c fix: voice interrupt 2026-05-22 10:20:00 +08:00
5223333418 fix: voice interrupt 2026-05-22 10:10:16 +08:00
61ad9dafd9 fix: text display 2026-05-21 17:05:09 +08:00
928d40826f feat: ws connect 2026-05-18 15:56:50 +08:00
13 changed files with 1415 additions and 76 deletions

1
.gitignore vendored
View File

@ -10,6 +10,7 @@ sdkconfig
dependencies.lock
.env
releases/
vision_frames/
main/assets/lang_config.h
main/mmap_generate_emoji.h
.DS_Store

View File

@ -6,6 +6,34 @@ config OTA_URL
help
The application will access this URL to check for new firmwares and server address.
config USE_DIRECT_WEBSOCKET
bool "Use direct WebSocket without OTA"
default n
help
Skip the OTA server check and use the WebSocket settings below directly.
config WEBSOCKET_URL
string "Default WebSocket URL"
depends on USE_DIRECT_WEBSOCKET
default "ws://172.19.0.240:8080"
help
The WebSocket server URL used when direct WebSocket mode is enabled.
config WEBSOCKET_TOKEN
string "Default WebSocket token"
depends on USE_DIRECT_WEBSOCKET
default ""
help
Optional Authorization token for the direct WebSocket server.
config WEBSOCKET_PROTOCOL_VERSION
int "Default WebSocket protocol version"
depends on USE_DIRECT_WEBSOCKET
range 1 3
default 1
help
Protocol-Version header and hello version used by the WebSocket protocol.
choice
prompt "Flash Assets"
default FLASH_DEFAULT_ASSETS if !USE_EMOTE_MESSAGE_STYLE

View File

@ -302,11 +302,15 @@ void Application::HandleActivationDoneEvent() {
SystemInfo::PrintHeapStats();
SetDeviceState(kDeviceStateIdle);
has_server_time_ = ota_->HasServerTime();
if (ota_ != nullptr) {
has_server_time_ = ota_->HasServerTime();
}
auto display = Board::GetInstance().GetDisplay();
std::string message = std::string(Lang::Strings::VERSION) + ota_->GetCurrentVersion();
display->ShowNotification(message.c_str());
if (ota_ != nullptr) {
std::string message = std::string(Lang::Strings::VERSION) + ota_->GetCurrentVersion();
display->ShowNotification(message.c_str());
}
display->SetChatMessage("system", "");
// Release OTA object after activation is complete
@ -321,6 +325,10 @@ void Application::HandleActivationDoneEvent() {
}
void Application::ActivationTask() {
#if CONFIG_USE_DIRECT_WEBSOCKET
CheckAssetsVersion();
InitializeProtocol();
#else
// Create OTA object for activation process
ota_ = std::make_unique<Ota>();
@ -332,6 +340,7 @@ void Application::ActivationTask() {
// Initialize the protocol
InitializeProtocol();
#endif
// Signal completion to main loop
xEventGroupSetBits(event_group_, MAIN_EVENT_ACTIVATION_DONE);
@ -477,6 +486,9 @@ void Application::InitializeProtocol() {
display->SetStatus(Lang::Strings::LOADING_PROTOCOL);
#if CONFIG_USE_DIRECT_WEBSOCKET
protocol_ = std::make_unique<WebsocketProtocol>();
#else
if (ota_->HasMqttConfig()) {
protocol_ = std::make_unique<MqttProtocol>();
} else if (ota_->HasWebsocketConfig()) {
@ -485,6 +497,7 @@ void Application::InitializeProtocol() {
ESP_LOGW(TAG, "No protocol specified in the OTA config, using MQTT");
protocol_ = std::make_unique<MqttProtocol>();
}
#endif
protocol_->OnConnected([this]() {
DismissAlert();
@ -660,10 +673,17 @@ void Application::DismissAlert() {
}
void Application::ToggleChatState() {
vision_text_mode_enabled_.store(false);
xEventGroupSetBits(event_group_, MAIN_EVENT_TOGGLE_CHAT);
}
void Application::ToggleChatStateWithVision() {
vision_text_mode_enabled_.store(true);
xEventGroupSetBits(event_group_, MAIN_EVENT_TOGGLE_CHAT);
}
void Application::StartListening() {
vision_text_mode_enabled_.store(false);
xEventGroupSetBits(event_group_, MAIN_EVENT_START_LISTENING);
}
@ -673,6 +693,9 @@ void Application::StopListening() {
void Application::HandleToggleChatEvent() {
auto state = GetDeviceState();
if (state != kDeviceStateIdle) {
vision_text_mode_enabled_.store(false);
}
if (state == kDeviceStateActivating) {
SetDeviceState(kDeviceStateIdle);
@ -892,6 +915,9 @@ void Application::HandleStateChangedEvent() {
audio_service_.WaitForPlaybackQueueEmpty();
}
if (vision_text_mode_enabled_.load()) {
SendCurrentVisionFrame();
}
// Send the start listening command
protocol_->SendStartListening(listening_mode_);
audio_service_.EnableVoiceProcessing(true);
@ -931,6 +957,26 @@ void Application::HandleStateChangedEvent() {
}
}
void Application::SendCurrentVisionFrame() {
if (!protocol_ || !protocol_->IsAudioChannelOpened()) {
return;
}
auto camera = Board::GetInstance().GetCamera();
if (camera == nullptr) {
return;
}
std::string jpeg_data;
if (!camera->CaptureToJpeg(jpeg_data, false)) {
ESP_LOGW(TAG, "Failed to capture vision frame");
return;
}
protocol_->SendVisionFrame(jpeg_data);
ESP_LOGI(TAG, "Sent vision frame, size=%u bytes", static_cast<unsigned>(jpeg_data.size()));
}
void Application::Schedule(std::function<void()>&& callback) {
{
std::lock_guard<std::mutex> lock(mutex_);
@ -1128,4 +1174,3 @@ void Application::ResetProtocol() {
protocol_.reset();
});
}

View File

@ -11,6 +11,7 @@
#include <deque>
#include <memory>
#include <functional>
#include <atomic>
#include "protocol.h"
#include "ota.h"
@ -91,6 +92,7 @@ public:
* Sends MAIN_EVENT_TOGGLE_CHAT to be handled in Run()
*/
void ToggleChatState();
void ToggleChatStateWithVision();
/**
* Start listening (event-based, thread-safe)
@ -144,6 +146,7 @@ private:
bool aborted_ = false;
bool assets_version_checked_ = false;
bool play_popup_on_listening_ = false; // Flag to play popup sound after state changes to listening
std::atomic<bool> vision_text_mode_enabled_ = false;
int clock_ticks_ = 0;
TaskHandle_t activation_task_handle_ = nullptr;
@ -159,6 +162,7 @@ private:
void HandleWakeWordDetectedEvent();
void ContinueOpenAudioChannel(ListeningMode mode);
void ContinueWakeWordInvoke(const std::string& wake_word);
void SendCurrentVisionFrame();
// Activation task (runs in background)
void ActivationTask();

View File

@ -7,6 +7,8 @@ class Camera {
public:
virtual void SetExplainUrl(const std::string& url, const std::string& token) = 0;
virtual bool Capture() = 0;
virtual bool CaptureBackground() { return Capture(); }
virtual bool CaptureToJpeg(std::string& jpeg_data, bool show_preview = false) { return false; }
virtual bool SetHMirror(bool enabled) = 0;
virtual bool SetVFlip(bool enabled) = 0;
virtual bool SetSwapBytes(bool enabled) { return false; } // Optional, default no-op

View File

@ -24,6 +24,7 @@
#include "lvgl_display.h"
#include "mcp_server.h"
#include "system_info.h"
#include "esp_timer.h"
#ifdef CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE
#undef LOG_LOCAL_LEVEL
@ -55,6 +56,7 @@
#define TAG "EspVideo"
#define FOREGROUND_CAPTURE_PROTECTION_US (10 * 1000 * 1000)
#if defined(CONFIG_CAMERA_SENSOR_SWAP_PIXEL_BYTE_ORDER) || defined(CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP)
#warning \
@ -381,11 +383,47 @@ EspVideo::~EspVideo() {
}
void EspVideo::SetExplainUrl(const std::string& url, const std::string& token) {
std::lock_guard<std::mutex> lock(frame_mutex_);
explain_url_ = url;
explain_token_ = token;
}
bool EspVideo::Capture() {
return CaptureFrame(true);
}
bool EspVideo::CaptureBackground() {
return CaptureFrame(false);
}
bool EspVideo::CaptureToJpeg(std::string& jpeg_data, bool show_preview) {
jpeg_data.clear();
if (!CaptureFrame(show_preview)) {
return false;
}
std::lock_guard<std::mutex> lock(frame_mutex_);
if (frame_.data == nullptr || frame_.len == 0) {
return false;
}
uint16_t w = frame_.width ? frame_.width : 320;
uint16_t h = frame_.height ? frame_.height : 240;
return image_to_jpeg_cb(
frame_.data, frame_.len, w, h, frame_.format, 60,
[](void* arg, size_t index, const void* data, size_t len) -> size_t {
auto jpeg_data = static_cast<std::string*>(arg);
if (data != nullptr && len > 0) {
jpeg_data->append(static_cast<const char*>(data), len);
}
return len;
},
&jpeg_data);
}
bool EspVideo::CaptureFrame(bool show_preview) {
std::lock_guard<std::mutex> lock(frame_mutex_);
if (encoder_thread_.joinable()) {
encoder_thread_.join();
}
@ -394,6 +432,10 @@ bool EspVideo::Capture() {
return false;
}
if (!show_preview && esp_timer_get_time() < foreground_capture_protected_until_us_) {
return true;
}
for (int i = 0; i < 3; i++) {
struct v4l2_buffer buf = {};
buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
@ -729,9 +771,14 @@ bool EspVideo::Capture() {
}
}
// 显示预览图片
auto display = dynamic_cast<LvglDisplay*>(Board::GetInstance().GetDisplay());
if (display != nullptr) {
if (show_preview) {
foreground_capture_protected_until_us_ = esp_timer_get_time() + FOREGROUND_CAPTURE_PROTECTION_US;
}
if (show_preview) {
// 显示预览图片
auto display = dynamic_cast<LvglDisplay*>(Board::GetInstance().GetDisplay());
if (display != nullptr) {
if (!frame_.data) {
ESP_LOGE(TAG, "frame.data is null");
return false;
@ -836,6 +883,7 @@ bool EspVideo::Capture() {
auto image = std::make_unique<LvglAllocatedImage>(data, lvgl_image_size, w, h, stride, color_format);
display->SetPreviewImage(std::move(image));
}
}
return true;
}
@ -898,10 +946,16 @@ bool EspVideo::SetVFlip(bool enabled) {
* @warning 如果摄像头缓冲区为空或网络连接失败,将返回错误信息
*/
std::string EspVideo::Explain(const std::string& question) {
std::lock_guard<std::mutex> lock(frame_mutex_);
if (explain_url_.empty()) {
throw std::runtime_error("Image explain URL or token is not set");
}
if (frame_.data == nullptr || frame_.len == 0) {
throw std::runtime_error("No camera frame captured");
}
// 创建局部的 JPEG 队列, 40 entries is about to store 512 * 40 = 20480 bytes of JPEG data
QueueHandle_t jpeg_queue = xQueueCreate(40, sizeof(JpegChunk));
if (jpeg_queue == nullptr) {

View File

@ -5,6 +5,8 @@
#include <thread>
#include <memory>
#include <vector>
#include <mutex>
#include <cstdint>
#include <freertos/FreeRTOS.h>
#include <freertos/queue.h>
@ -39,6 +41,10 @@ private:
std::string explain_url_;
std::string explain_token_;
std::thread encoder_thread_;
std::mutex frame_mutex_;
int64_t foreground_capture_protected_until_us_ = 0;
bool CaptureFrame(bool show_preview);
public:
EspVideo(const esp_video_init_config_t& config);
@ -46,6 +52,8 @@ public:
virtual void SetExplainUrl(const std::string& url, const std::string& token);
virtual bool Capture();
virtual bool CaptureBackground() override;
virtual bool CaptureToJpeg(std::string& jpeg_data, bool show_preview = false) override;
// 翻转控制函数
virtual bool SetHMirror(bool enabled) override;
virtual bool SetVFlip(bool enabled) override;

View File

@ -1,21 +1,23 @@
#include "wifi_board.h"
#include "application.h"
#include "axp2101.h"
#include "config.h"
#include "cores3_audio_codec.h"
#include "display/lcd_display.h"
#include "application.h"
#include "config.h"
#include "power_save_timer.h"
#include "i2c_device.h"
#include "axp2101.h"
#include "power_save_timer.h"
#include "wifi_board.h"
#include <esp_log.h>
#include <driver/i2c_master.h>
#include <esp_lcd_ili9341.h>
#include <esp_lcd_panel_io.h>
#include <esp_lcd_panel_ops.h>
#include <esp_lcd_ili9341.h>
#include <esp_log.h>
#include <esp_timer.h>
#include "esp_video.h"
#define TAG "M5StackCoreS3Board"
#define BACKGROUND_VISION_INITIAL_DELAY_MS 8000
#define BACKGROUND_VISION_SAMPLE_INTERVAL_MS 100
class Pmic : public Axp2101 {
public:
@ -41,7 +43,7 @@ public:
class CustomBacklight : public Backlight {
public:
CustomBacklight(Pmic *pmic) : pmic_(pmic) {}
CustomBacklight(Pmic* pmic) : pmic_(pmic) {}
void SetBrightnessImpl(uint8_t brightness) override {
pmic_->SetBrightness(target_brightness_);
@ -49,7 +51,7 @@ public:
}
private:
Pmic *pmic_;
Pmic* pmic_;
};
class Aw9523 : public I2cDevice {
@ -96,9 +98,7 @@ public:
read_buffer_ = new uint8_t[6];
}
~Ft6336() {
delete[] read_buffer_;
}
~Ft6336() { delete[] read_buffer_; }
void UpdateTouchPoint() {
ReadRegs(0x02, read_buffer_, 6);
@ -107,9 +107,7 @@ public:
tp_.y = ((read_buffer_[3] & 0x0F) << 8) | read_buffer_[4];
}
inline const TouchPoint_t& GetTouchPoint() {
return tp_;
}
inline const TouchPoint_t& GetTouchPoint() { return tp_; }
private:
uint8_t* read_buffer_ = nullptr;
@ -137,9 +135,7 @@ private:
GetDisplay()->SetPowerSaveMode(false);
GetBacklight()->RestoreBrightness();
});
power_save_timer_->OnShutdownRequest([this]() {
pmic_->PowerOff();
});
power_save_timer_->OnShutdownRequest([this]() { pmic_->PowerOff(); });
power_save_timer_->SetEnabled(true);
}
@ -153,9 +149,10 @@ private:
.glitch_ignore_cnt = 7,
.intr_priority = 0,
.trans_queue_depth = 0,
.flags = {
.enable_internal_pullup = 1,
},
.flags =
{
.enable_internal_pullup = 1,
},
};
ESP_ERROR_CHECK(i2c_new_master_bus(&i2c_bus_cfg, &i2c_bus_));
}
@ -203,21 +200,25 @@ private:
// 检测触摸开始
if (touch_point.num > 0 && !was_touched) {
was_touched = true;
touch_start_time = esp_timer_get_time() / 1000; // 转换为毫秒
touch_start_time = esp_timer_get_time() / 1000; // 转换为毫秒
}
// 检测触摸释放
else if (touch_point.num == 0 && was_touched) {
was_touched = false;
int64_t touch_duration = (esp_timer_get_time() / 1000) - touch_start_time;
// 只有短触才触发
if (touch_duration < TOUCH_THRESHOLD_MS) {
auto& app = Application::GetInstance();
if (app.GetDeviceState() == kDeviceStateStarting) {
EnterWifiConfigMode();
return;
}
ESP_LOGI(TAG, "Touch short: text-only mode");
app.ToggleChatState();
} else {
auto& app = Application::GetInstance();
ESP_LOGI(TAG, "Touch long: vision+text mode");
app.ToggleChatStateWithVision();
}
}
}
@ -228,10 +229,11 @@ private:
// 创建定时器20ms 间隔
esp_timer_create_args_t timer_args = {
.callback = [](void* arg) {
M5StackCoreS3Board* board = (M5StackCoreS3Board*)arg;
board->PollTouchpad();
},
.callback =
[](void* arg) {
M5StackCoreS3Board* board = (M5StackCoreS3Board*)arg;
board->PollTouchpad();
},
.arg = this,
.dispatch_method = ESP_TIMER_TASK,
.name = "touchpad_timer",
@ -285,23 +287,25 @@ private:
esp_lcd_panel_swap_xy(panel, DISPLAY_SWAP_XY);
esp_lcd_panel_mirror(panel, DISPLAY_MIRROR_X, DISPLAY_MIRROR_Y);
display_ = new SpiLcdDisplay(panel_io, panel,
DISPLAY_WIDTH, DISPLAY_HEIGHT, DISPLAY_OFFSET_X, DISPLAY_OFFSET_Y, DISPLAY_MIRROR_X, DISPLAY_MIRROR_Y, DISPLAY_SWAP_XY);
display_ = new SpiLcdDisplay(panel_io, panel, DISPLAY_WIDTH, DISPLAY_HEIGHT,
DISPLAY_OFFSET_X, DISPLAY_OFFSET_Y, DISPLAY_MIRROR_X,
DISPLAY_MIRROR_Y, DISPLAY_SWAP_XY);
}
void InitializeCamera() {
void InitializeCamera() {
static esp_cam_ctlr_dvp_pin_config_t dvp_pin_config = {
.data_width = CAM_CTLR_DATA_WIDTH_8,
.data_io = {
[0] = CAMERA_PIN_D0,
[1] = CAMERA_PIN_D1,
[2] = CAMERA_PIN_D2,
[3] = CAMERA_PIN_D3,
[4] = CAMERA_PIN_D4,
[5] = CAMERA_PIN_D5,
[6] = CAMERA_PIN_D6,
[7] = CAMERA_PIN_D7,
},
.data_io =
{
[0] = CAMERA_PIN_D0,
[1] = CAMERA_PIN_D1,
[2] = CAMERA_PIN_D2,
[3] = CAMERA_PIN_D3,
[4] = CAMERA_PIN_D4,
[5] = CAMERA_PIN_D5,
[6] = CAMERA_PIN_D6,
[7] = CAMERA_PIN_D7,
},
.vsync_io = CAMERA_PIN_VSYNC,
.de_io = CAMERA_PIN_HREF,
.pclk_io = CAMERA_PIN_PCLK,
@ -330,6 +334,37 @@ private:
camera_->SetHMirror(false);
}
void InitializeBackgroundVisionSampler() {
xTaskCreate(
[](void* arg) {
auto board = static_cast<M5StackCoreS3Board*>(arg);
bool has_logged_success = false;
bool has_logged_failure = false;
vTaskDelay(pdMS_TO_TICKS(BACKGROUND_VISION_INITIAL_DELAY_MS));
while (true) {
if (board->camera_ == nullptr) {
vTaskDelay(pdMS_TO_TICKS(BACKGROUND_VISION_SAMPLE_INTERVAL_MS));
continue;
}
if (board->camera_->CaptureBackground()) {
if (!has_logged_success) {
ESP_LOGI(TAG, "Background vision sampler started");
has_logged_success = true;
}
} else if (!has_logged_failure) {
ESP_LOGW(TAG, "Background vision sampler is waiting for camera");
has_logged_failure = true;
}
vTaskDelay(pdMS_TO_TICKS(BACKGROUND_VISION_SAMPLE_INTERVAL_MS));
}
},
"BgVisionSampler", 4096, this, 1, nullptr);
}
public:
M5StackCoreS3Board() {
InitializePowerSaveTimer();
@ -340,34 +375,24 @@ public:
InitializeSpi();
InitializeIli9342Display();
InitializeCamera();
InitializeBackgroundVisionSampler();
InitializeFt6336TouchPad();
GetBacklight()->RestoreBrightness();
}
virtual AudioCodec* GetAudioCodec() override {
static CoreS3AudioCodec audio_codec(i2c_bus_,
AUDIO_INPUT_SAMPLE_RATE,
AUDIO_OUTPUT_SAMPLE_RATE,
AUDIO_I2S_GPIO_MCLK,
AUDIO_I2S_GPIO_BCLK,
AUDIO_I2S_GPIO_WS,
AUDIO_I2S_GPIO_DOUT,
AUDIO_I2S_GPIO_DIN,
AUDIO_CODEC_AW88298_ADDR,
AUDIO_CODEC_ES7210_ADDR,
AUDIO_INPUT_REFERENCE);
static CoreS3AudioCodec audio_codec(
i2c_bus_, AUDIO_INPUT_SAMPLE_RATE, AUDIO_OUTPUT_SAMPLE_RATE, AUDIO_I2S_GPIO_MCLK,
AUDIO_I2S_GPIO_BCLK, AUDIO_I2S_GPIO_WS, AUDIO_I2S_GPIO_DOUT, AUDIO_I2S_GPIO_DIN,
AUDIO_CODEC_AW88298_ADDR, AUDIO_CODEC_ES7210_ADDR, AUDIO_INPUT_REFERENCE);
return &audio_codec;
}
virtual Display* GetDisplay() override {
return display_;
}
virtual Display* GetDisplay() override { return display_; }
virtual Camera* GetCamera() override {
return camera_;
}
virtual Camera* GetCamera() override { return camera_; }
virtual bool GetBatteryLevel(int &level, bool& charging, bool& discharging) override {
virtual bool GetBatteryLevel(int& level, bool& charging, bool& discharging) override {
static bool last_discharging = false;
charging = pmic_->IsCharging();
discharging = pmic_->IsDischarging();
@ -387,7 +412,7 @@ public:
WifiBoard::SetPowerSaveLevel(level);
}
virtual Backlight *GetBacklight() override {
virtual Backlight* GetBacklight() override {
static CustomBacklight backlight(pmic_);
return &backlight;
}

View File

@ -598,6 +598,10 @@ CONFIG_PARTITION_TABLE_MD5=y
# Xiaozhi Assistant
#
CONFIG_OTA_URL="https://api.tenclass.net/xiaozhi/ota/"
CONFIG_USE_DIRECT_WEBSOCKET=y
CONFIG_WEBSOCKET_URL="ws://172.19.0.240:8080"
CONFIG_WEBSOCKET_TOKEN=""
CONFIG_WEBSOCKET_PROTOCOL_VERSION=1
# CONFIG_FLASH_NONE_ASSETS is not set
CONFIG_FLASH_DEFAULT_ASSETS=y
# CONFIG_FLASH_CUSTOM_ASSETS is not set

1123
main/bridge_server.py Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,9 +1,22 @@
#include "protocol.h"
#include <esp_log.h>
#include <mbedtls/base64.h>
#define TAG "Protocol"
static std::string Base64Encode(const std::string& data) {
size_t encoded_length = 0;
size_t output_length = 0;
mbedtls_base64_encode(nullptr, 0, &encoded_length,
reinterpret_cast<const unsigned char*>(data.data()), data.size());
std::string result(encoded_length, 0);
mbedtls_base64_encode(reinterpret_cast<unsigned char*>(result.data()), result.size(), &output_length,
reinterpret_cast<const unsigned char*>(data.data()), data.size());
result.resize(output_length);
return result;
}
void Protocol::OnIncomingJson(std::function<void(const cJSON* root)> callback) {
on_incoming_json_ = callback;
}
@ -78,6 +91,27 @@ void Protocol::SendMcpMessage(const std::string& payload) {
SendText(message);
}
void Protocol::SendVisionFrame(const std::string& jpeg_data) {
if (jpeg_data.empty()) {
return;
}
cJSON* root = cJSON_CreateObject();
cJSON_AddStringToObject(root, "session_id", session_id_.c_str());
cJSON_AddStringToObject(root, "type", "vision");
cJSON_AddStringToObject(root, "state", "frame");
cJSON_AddStringToObject(root, "mime_type", "image/jpeg");
auto encoded = Base64Encode(jpeg_data);
cJSON_AddStringToObject(root, "image", encoded.c_str());
char* json_str = cJSON_PrintUnformatted(root);
if (json_str != nullptr) {
SendText(json_str);
cJSON_free(json_str);
}
cJSON_Delete(root);
}
bool Protocol::IsTimeout() const {
const int kTimeoutSeconds = 120;
auto now = std::chrono::steady_clock::now();

View File

@ -73,6 +73,7 @@ public:
virtual void SendStopListening();
virtual void SendAbortSpeaking(AbortReason reason);
virtual void SendMcpMessage(const std::string& message);
virtual void SendVisionFrame(const std::string& jpeg_data);
protected:
std::function<void(const cJSON* root)> on_incoming_json_;
@ -95,4 +96,3 @@ protected:
};
#endif // PROTOCOL_H

View File

@ -85,10 +85,21 @@ bool WebsocketProtocol::OpenAudioChannel() {
std::string url = settings.GetString("url");
std::string token = settings.GetString("token");
int version = settings.GetInt("version");
#if CONFIG_USE_DIRECT_WEBSOCKET
url = CONFIG_WEBSOCKET_URL;
token = CONFIG_WEBSOCKET_TOKEN;
version = CONFIG_WEBSOCKET_PROTOCOL_VERSION;
#endif
if (version != 0) {
version_ = version;
}
if (url.empty()) {
ESP_LOGE(TAG, "Websocket URL is not set");
SetError(Lang::Strings::SERVER_NOT_CONNECTED);
return false;
}
error_occurred_ = false;
auto network = Board::GetInstance().GetNetwork();