5 Commits
main ... cam

Author SHA1 Message Date
fc6302661d feat: support camera capture to livekit 2026-05-25 17:21:11 +08:00
4953244c7c fix: voice interrupt 2026-05-22 10:20:00 +08:00
5223333418 fix: voice interrupt 2026-05-22 10:10:16 +08:00
61ad9dafd9 fix: text display 2026-05-21 17:05:09 +08:00
928d40826f feat: ws connect 2026-05-18 15:56:50 +08:00
13 changed files with 1415 additions and 76 deletions

1
.gitignore vendored
View File

@ -10,6 +10,7 @@ sdkconfig
dependencies.lock dependencies.lock
.env .env
releases/ releases/
vision_frames/
main/assets/lang_config.h main/assets/lang_config.h
main/mmap_generate_emoji.h main/mmap_generate_emoji.h
.DS_Store .DS_Store

View File

@ -6,6 +6,34 @@ config OTA_URL
help help
The application will access this URL to check for new firmwares and server address. The application will access this URL to check for new firmwares and server address.
config USE_DIRECT_WEBSOCKET
bool "Use direct WebSocket without OTA"
default n
help
Skip the OTA server check and use the WebSocket settings below directly.
config WEBSOCKET_URL
string "Default WebSocket URL"
depends on USE_DIRECT_WEBSOCKET
default "ws://172.19.0.240:8080"
help
The WebSocket server URL used when direct WebSocket mode is enabled.
config WEBSOCKET_TOKEN
string "Default WebSocket token"
depends on USE_DIRECT_WEBSOCKET
default ""
help
Optional Authorization token for the direct WebSocket server.
config WEBSOCKET_PROTOCOL_VERSION
int "Default WebSocket protocol version"
depends on USE_DIRECT_WEBSOCKET
range 1 3
default 1
help
Protocol-Version header and hello version used by the WebSocket protocol.
choice choice
prompt "Flash Assets" prompt "Flash Assets"
default FLASH_DEFAULT_ASSETS if !USE_EMOTE_MESSAGE_STYLE default FLASH_DEFAULT_ASSETS if !USE_EMOTE_MESSAGE_STYLE

View File

@ -302,11 +302,15 @@ void Application::HandleActivationDoneEvent() {
SystemInfo::PrintHeapStats(); SystemInfo::PrintHeapStats();
SetDeviceState(kDeviceStateIdle); SetDeviceState(kDeviceStateIdle);
has_server_time_ = ota_->HasServerTime(); if (ota_ != nullptr) {
has_server_time_ = ota_->HasServerTime();
}
auto display = Board::GetInstance().GetDisplay(); auto display = Board::GetInstance().GetDisplay();
std::string message = std::string(Lang::Strings::VERSION) + ota_->GetCurrentVersion(); if (ota_ != nullptr) {
display->ShowNotification(message.c_str()); std::string message = std::string(Lang::Strings::VERSION) + ota_->GetCurrentVersion();
display->ShowNotification(message.c_str());
}
display->SetChatMessage("system", ""); display->SetChatMessage("system", "");
// Release OTA object after activation is complete // Release OTA object after activation is complete
@ -321,6 +325,10 @@ void Application::HandleActivationDoneEvent() {
} }
void Application::ActivationTask() { void Application::ActivationTask() {
#if CONFIG_USE_DIRECT_WEBSOCKET
CheckAssetsVersion();
InitializeProtocol();
#else
// Create OTA object for activation process // Create OTA object for activation process
ota_ = std::make_unique<Ota>(); ota_ = std::make_unique<Ota>();
@ -332,6 +340,7 @@ void Application::ActivationTask() {
// Initialize the protocol // Initialize the protocol
InitializeProtocol(); InitializeProtocol();
#endif
// Signal completion to main loop // Signal completion to main loop
xEventGroupSetBits(event_group_, MAIN_EVENT_ACTIVATION_DONE); xEventGroupSetBits(event_group_, MAIN_EVENT_ACTIVATION_DONE);
@ -477,6 +486,9 @@ void Application::InitializeProtocol() {
display->SetStatus(Lang::Strings::LOADING_PROTOCOL); display->SetStatus(Lang::Strings::LOADING_PROTOCOL);
#if CONFIG_USE_DIRECT_WEBSOCKET
protocol_ = std::make_unique<WebsocketProtocol>();
#else
if (ota_->HasMqttConfig()) { if (ota_->HasMqttConfig()) {
protocol_ = std::make_unique<MqttProtocol>(); protocol_ = std::make_unique<MqttProtocol>();
} else if (ota_->HasWebsocketConfig()) { } else if (ota_->HasWebsocketConfig()) {
@ -485,6 +497,7 @@ void Application::InitializeProtocol() {
ESP_LOGW(TAG, "No protocol specified in the OTA config, using MQTT"); ESP_LOGW(TAG, "No protocol specified in the OTA config, using MQTT");
protocol_ = std::make_unique<MqttProtocol>(); protocol_ = std::make_unique<MqttProtocol>();
} }
#endif
protocol_->OnConnected([this]() { protocol_->OnConnected([this]() {
DismissAlert(); DismissAlert();
@ -660,10 +673,17 @@ void Application::DismissAlert() {
} }
void Application::ToggleChatState() { void Application::ToggleChatState() {
vision_text_mode_enabled_.store(false);
xEventGroupSetBits(event_group_, MAIN_EVENT_TOGGLE_CHAT);
}
void Application::ToggleChatStateWithVision() {
vision_text_mode_enabled_.store(true);
xEventGroupSetBits(event_group_, MAIN_EVENT_TOGGLE_CHAT); xEventGroupSetBits(event_group_, MAIN_EVENT_TOGGLE_CHAT);
} }
void Application::StartListening() { void Application::StartListening() {
vision_text_mode_enabled_.store(false);
xEventGroupSetBits(event_group_, MAIN_EVENT_START_LISTENING); xEventGroupSetBits(event_group_, MAIN_EVENT_START_LISTENING);
} }
@ -673,7 +693,10 @@ void Application::StopListening() {
void Application::HandleToggleChatEvent() { void Application::HandleToggleChatEvent() {
auto state = GetDeviceState(); auto state = GetDeviceState();
if (state != kDeviceStateIdle) {
vision_text_mode_enabled_.store(false);
}
if (state == kDeviceStateActivating) { if (state == kDeviceStateActivating) {
SetDeviceState(kDeviceStateIdle); SetDeviceState(kDeviceStateIdle);
return; return;
@ -892,6 +915,9 @@ void Application::HandleStateChangedEvent() {
audio_service_.WaitForPlaybackQueueEmpty(); audio_service_.WaitForPlaybackQueueEmpty();
} }
if (vision_text_mode_enabled_.load()) {
SendCurrentVisionFrame();
}
// Send the start listening command // Send the start listening command
protocol_->SendStartListening(listening_mode_); protocol_->SendStartListening(listening_mode_);
audio_service_.EnableVoiceProcessing(true); audio_service_.EnableVoiceProcessing(true);
@ -931,6 +957,26 @@ void Application::HandleStateChangedEvent() {
} }
} }
void Application::SendCurrentVisionFrame() {
if (!protocol_ || !protocol_->IsAudioChannelOpened()) {
return;
}
auto camera = Board::GetInstance().GetCamera();
if (camera == nullptr) {
return;
}
std::string jpeg_data;
if (!camera->CaptureToJpeg(jpeg_data, false)) {
ESP_LOGW(TAG, "Failed to capture vision frame");
return;
}
protocol_->SendVisionFrame(jpeg_data);
ESP_LOGI(TAG, "Sent vision frame, size=%u bytes", static_cast<unsigned>(jpeg_data.size()));
}
void Application::Schedule(std::function<void()>&& callback) { void Application::Schedule(std::function<void()>&& callback) {
{ {
std::lock_guard<std::mutex> lock(mutex_); std::lock_guard<std::mutex> lock(mutex_);
@ -1128,4 +1174,3 @@ void Application::ResetProtocol() {
protocol_.reset(); protocol_.reset();
}); });
} }

View File

@ -11,6 +11,7 @@
#include <deque> #include <deque>
#include <memory> #include <memory>
#include <functional> #include <functional>
#include <atomic>
#include "protocol.h" #include "protocol.h"
#include "ota.h" #include "ota.h"
@ -91,6 +92,7 @@ public:
* Sends MAIN_EVENT_TOGGLE_CHAT to be handled in Run() * Sends MAIN_EVENT_TOGGLE_CHAT to be handled in Run()
*/ */
void ToggleChatState(); void ToggleChatState();
void ToggleChatStateWithVision();
/** /**
* Start listening (event-based, thread-safe) * Start listening (event-based, thread-safe)
@ -144,6 +146,7 @@ private:
bool aborted_ = false; bool aborted_ = false;
bool assets_version_checked_ = false; bool assets_version_checked_ = false;
bool play_popup_on_listening_ = false; // Flag to play popup sound after state changes to listening bool play_popup_on_listening_ = false; // Flag to play popup sound after state changes to listening
std::atomic<bool> vision_text_mode_enabled_ = false;
int clock_ticks_ = 0; int clock_ticks_ = 0;
TaskHandle_t activation_task_handle_ = nullptr; TaskHandle_t activation_task_handle_ = nullptr;
@ -159,6 +162,7 @@ private:
void HandleWakeWordDetectedEvent(); void HandleWakeWordDetectedEvent();
void ContinueOpenAudioChannel(ListeningMode mode); void ContinueOpenAudioChannel(ListeningMode mode);
void ContinueWakeWordInvoke(const std::string& wake_word); void ContinueWakeWordInvoke(const std::string& wake_word);
void SendCurrentVisionFrame();
// Activation task (runs in background) // Activation task (runs in background)
void ActivationTask(); void ActivationTask();

View File

@ -7,6 +7,8 @@ class Camera {
public: public:
virtual void SetExplainUrl(const std::string& url, const std::string& token) = 0; virtual void SetExplainUrl(const std::string& url, const std::string& token) = 0;
virtual bool Capture() = 0; virtual bool Capture() = 0;
virtual bool CaptureBackground() { return Capture(); }
virtual bool CaptureToJpeg(std::string& jpeg_data, bool show_preview = false) { return false; }
virtual bool SetHMirror(bool enabled) = 0; virtual bool SetHMirror(bool enabled) = 0;
virtual bool SetVFlip(bool enabled) = 0; virtual bool SetVFlip(bool enabled) = 0;
virtual bool SetSwapBytes(bool enabled) { return false; } // Optional, default no-op virtual bool SetSwapBytes(bool enabled) { return false; } // Optional, default no-op

View File

@ -24,6 +24,7 @@
#include "lvgl_display.h" #include "lvgl_display.h"
#include "mcp_server.h" #include "mcp_server.h"
#include "system_info.h" #include "system_info.h"
#include "esp_timer.h"
#ifdef CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE #ifdef CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE
#undef LOG_LOCAL_LEVEL #undef LOG_LOCAL_LEVEL
@ -55,6 +56,7 @@
#define TAG "EspVideo" #define TAG "EspVideo"
#define FOREGROUND_CAPTURE_PROTECTION_US (10 * 1000 * 1000)
#if defined(CONFIG_CAMERA_SENSOR_SWAP_PIXEL_BYTE_ORDER) || defined(CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP) #if defined(CONFIG_CAMERA_SENSOR_SWAP_PIXEL_BYTE_ORDER) || defined(CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP)
#warning \ #warning \
@ -381,11 +383,47 @@ EspVideo::~EspVideo() {
} }
void EspVideo::SetExplainUrl(const std::string& url, const std::string& token) { void EspVideo::SetExplainUrl(const std::string& url, const std::string& token) {
std::lock_guard<std::mutex> lock(frame_mutex_);
explain_url_ = url; explain_url_ = url;
explain_token_ = token; explain_token_ = token;
} }
bool EspVideo::Capture() { bool EspVideo::Capture() {
return CaptureFrame(true);
}
bool EspVideo::CaptureBackground() {
return CaptureFrame(false);
}
bool EspVideo::CaptureToJpeg(std::string& jpeg_data, bool show_preview) {
jpeg_data.clear();
if (!CaptureFrame(show_preview)) {
return false;
}
std::lock_guard<std::mutex> lock(frame_mutex_);
if (frame_.data == nullptr || frame_.len == 0) {
return false;
}
uint16_t w = frame_.width ? frame_.width : 320;
uint16_t h = frame_.height ? frame_.height : 240;
return image_to_jpeg_cb(
frame_.data, frame_.len, w, h, frame_.format, 60,
[](void* arg, size_t index, const void* data, size_t len) -> size_t {
auto jpeg_data = static_cast<std::string*>(arg);
if (data != nullptr && len > 0) {
jpeg_data->append(static_cast<const char*>(data), len);
}
return len;
},
&jpeg_data);
}
bool EspVideo::CaptureFrame(bool show_preview) {
std::lock_guard<std::mutex> lock(frame_mutex_);
if (encoder_thread_.joinable()) { if (encoder_thread_.joinable()) {
encoder_thread_.join(); encoder_thread_.join();
} }
@ -394,6 +432,10 @@ bool EspVideo::Capture() {
return false; return false;
} }
if (!show_preview && esp_timer_get_time() < foreground_capture_protected_until_us_) {
return true;
}
for (int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
struct v4l2_buffer buf = {}; struct v4l2_buffer buf = {};
buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
@ -729,9 +771,14 @@ bool EspVideo::Capture() {
} }
} }
// 显示预览图片 if (show_preview) {
auto display = dynamic_cast<LvglDisplay*>(Board::GetInstance().GetDisplay()); foreground_capture_protected_until_us_ = esp_timer_get_time() + FOREGROUND_CAPTURE_PROTECTION_US;
if (display != nullptr) { }
if (show_preview) {
// 显示预览图片
auto display = dynamic_cast<LvglDisplay*>(Board::GetInstance().GetDisplay());
if (display != nullptr) {
if (!frame_.data) { if (!frame_.data) {
ESP_LOGE(TAG, "frame.data is null"); ESP_LOGE(TAG, "frame.data is null");
return false; return false;
@ -836,6 +883,7 @@ bool EspVideo::Capture() {
auto image = std::make_unique<LvglAllocatedImage>(data, lvgl_image_size, w, h, stride, color_format); auto image = std::make_unique<LvglAllocatedImage>(data, lvgl_image_size, w, h, stride, color_format);
display->SetPreviewImage(std::move(image)); display->SetPreviewImage(std::move(image));
}
} }
return true; return true;
} }
@ -898,10 +946,16 @@ bool EspVideo::SetVFlip(bool enabled) {
* @warning 如果摄像头缓冲区为空或网络连接失败,将返回错误信息 * @warning 如果摄像头缓冲区为空或网络连接失败,将返回错误信息
*/ */
std::string EspVideo::Explain(const std::string& question) { std::string EspVideo::Explain(const std::string& question) {
std::lock_guard<std::mutex> lock(frame_mutex_);
if (explain_url_.empty()) { if (explain_url_.empty()) {
throw std::runtime_error("Image explain URL or token is not set"); throw std::runtime_error("Image explain URL or token is not set");
} }
if (frame_.data == nullptr || frame_.len == 0) {
throw std::runtime_error("No camera frame captured");
}
// 创建局部的 JPEG 队列, 40 entries is about to store 512 * 40 = 20480 bytes of JPEG data // 创建局部的 JPEG 队列, 40 entries is about to store 512 * 40 = 20480 bytes of JPEG data
QueueHandle_t jpeg_queue = xQueueCreate(40, sizeof(JpegChunk)); QueueHandle_t jpeg_queue = xQueueCreate(40, sizeof(JpegChunk));
if (jpeg_queue == nullptr) { if (jpeg_queue == nullptr) {

View File

@ -5,6 +5,8 @@
#include <thread> #include <thread>
#include <memory> #include <memory>
#include <vector> #include <vector>
#include <mutex>
#include <cstdint>
#include <freertos/FreeRTOS.h> #include <freertos/FreeRTOS.h>
#include <freertos/queue.h> #include <freertos/queue.h>
@ -39,6 +41,10 @@ private:
std::string explain_url_; std::string explain_url_;
std::string explain_token_; std::string explain_token_;
std::thread encoder_thread_; std::thread encoder_thread_;
std::mutex frame_mutex_;
int64_t foreground_capture_protected_until_us_ = 0;
bool CaptureFrame(bool show_preview);
public: public:
EspVideo(const esp_video_init_config_t& config); EspVideo(const esp_video_init_config_t& config);
@ -46,6 +52,8 @@ public:
virtual void SetExplainUrl(const std::string& url, const std::string& token); virtual void SetExplainUrl(const std::string& url, const std::string& token);
virtual bool Capture(); virtual bool Capture();
virtual bool CaptureBackground() override;
virtual bool CaptureToJpeg(std::string& jpeg_data, bool show_preview = false) override;
// 翻转控制函数 // 翻转控制函数
virtual bool SetHMirror(bool enabled) override; virtual bool SetHMirror(bool enabled) override;
virtual bool SetVFlip(bool enabled) override; virtual bool SetVFlip(bool enabled) override;

View File

@ -1,21 +1,23 @@
#include "wifi_board.h" #include "application.h"
#include "axp2101.h"
#include "config.h"
#include "cores3_audio_codec.h" #include "cores3_audio_codec.h"
#include "display/lcd_display.h" #include "display/lcd_display.h"
#include "application.h"
#include "config.h"
#include "power_save_timer.h"
#include "i2c_device.h" #include "i2c_device.h"
#include "axp2101.h" #include "power_save_timer.h"
#include "wifi_board.h"
#include <esp_log.h>
#include <driver/i2c_master.h> #include <driver/i2c_master.h>
#include <esp_lcd_ili9341.h>
#include <esp_lcd_panel_io.h> #include <esp_lcd_panel_io.h>
#include <esp_lcd_panel_ops.h> #include <esp_lcd_panel_ops.h>
#include <esp_lcd_ili9341.h> #include <esp_log.h>
#include <esp_timer.h> #include <esp_timer.h>
#include "esp_video.h" #include "esp_video.h"
#define TAG "M5StackCoreS3Board" #define TAG "M5StackCoreS3Board"
#define BACKGROUND_VISION_INITIAL_DELAY_MS 8000
#define BACKGROUND_VISION_SAMPLE_INTERVAL_MS 100
class Pmic : public Axp2101 { class Pmic : public Axp2101 {
public: public:
@ -41,7 +43,7 @@ public:
class CustomBacklight : public Backlight { class CustomBacklight : public Backlight {
public: public:
CustomBacklight(Pmic *pmic) : pmic_(pmic) {} CustomBacklight(Pmic* pmic) : pmic_(pmic) {}
void SetBrightnessImpl(uint8_t brightness) override { void SetBrightnessImpl(uint8_t brightness) override {
pmic_->SetBrightness(target_brightness_); pmic_->SetBrightness(target_brightness_);
@ -49,7 +51,7 @@ public:
} }
private: private:
Pmic *pmic_; Pmic* pmic_;
}; };
class Aw9523 : public I2cDevice { class Aw9523 : public I2cDevice {
@ -89,16 +91,14 @@ public:
int x = -1; int x = -1;
int y = -1; int y = -1;
}; };
Ft6336(i2c_master_bus_handle_t i2c_bus, uint8_t addr) : I2cDevice(i2c_bus, addr) { Ft6336(i2c_master_bus_handle_t i2c_bus, uint8_t addr) : I2cDevice(i2c_bus, addr) {
uint8_t chip_id = ReadReg(0xA3); uint8_t chip_id = ReadReg(0xA3);
ESP_LOGI(TAG, "Get chip ID: 0x%02X", chip_id); ESP_LOGI(TAG, "Get chip ID: 0x%02X", chip_id);
read_buffer_ = new uint8_t[6]; read_buffer_ = new uint8_t[6];
} }
~Ft6336() { ~Ft6336() { delete[] read_buffer_; }
delete[] read_buffer_;
}
void UpdateTouchPoint() { void UpdateTouchPoint() {
ReadRegs(0x02, read_buffer_, 6); ReadRegs(0x02, read_buffer_, 6);
@ -107,9 +107,7 @@ public:
tp_.y = ((read_buffer_[3] & 0x0F) << 8) | read_buffer_[4]; tp_.y = ((read_buffer_[3] & 0x0F) << 8) | read_buffer_[4];
} }
inline const TouchPoint_t& GetTouchPoint() { inline const TouchPoint_t& GetTouchPoint() { return tp_; }
return tp_;
}
private: private:
uint8_t* read_buffer_ = nullptr; uint8_t* read_buffer_ = nullptr;
@ -137,9 +135,7 @@ private:
GetDisplay()->SetPowerSaveMode(false); GetDisplay()->SetPowerSaveMode(false);
GetBacklight()->RestoreBrightness(); GetBacklight()->RestoreBrightness();
}); });
power_save_timer_->OnShutdownRequest([this]() { power_save_timer_->OnShutdownRequest([this]() { pmic_->PowerOff(); });
pmic_->PowerOff();
});
power_save_timer_->SetEnabled(true); power_save_timer_->SetEnabled(true);
} }
@ -153,9 +149,10 @@ private:
.glitch_ignore_cnt = 7, .glitch_ignore_cnt = 7,
.intr_priority = 0, .intr_priority = 0,
.trans_queue_depth = 0, .trans_queue_depth = 0,
.flags = { .flags =
.enable_internal_pullup = 1, {
}, .enable_internal_pullup = 1,
},
}; };
ESP_ERROR_CHECK(i2c_new_master_bus(&i2c_bus_cfg, &i2c_bus_)); ESP_ERROR_CHECK(i2c_new_master_bus(&i2c_bus_cfg, &i2c_bus_));
} }
@ -196,28 +193,32 @@ private:
static bool was_touched = false; static bool was_touched = false;
static int64_t touch_start_time = 0; static int64_t touch_start_time = 0;
const int64_t TOUCH_THRESHOLD_MS = 500; // 触摸时长阈值超过500ms视为长按 const int64_t TOUCH_THRESHOLD_MS = 500; // 触摸时长阈值超过500ms视为长按
ft6336_->UpdateTouchPoint(); ft6336_->UpdateTouchPoint();
auto& touch_point = ft6336_->GetTouchPoint(); auto& touch_point = ft6336_->GetTouchPoint();
// 检测触摸开始 // 检测触摸开始
if (touch_point.num > 0 && !was_touched) { if (touch_point.num > 0 && !was_touched) {
was_touched = true; was_touched = true;
touch_start_time = esp_timer_get_time() / 1000; // 转换为毫秒 touch_start_time = esp_timer_get_time() / 1000; // 转换为毫秒
} }
// 检测触摸释放 // 检测触摸释放
else if (touch_point.num == 0 && was_touched) { else if (touch_point.num == 0 && was_touched) {
was_touched = false; was_touched = false;
int64_t touch_duration = (esp_timer_get_time() / 1000) - touch_start_time; int64_t touch_duration = (esp_timer_get_time() / 1000) - touch_start_time;
// 只有短触才触发
if (touch_duration < TOUCH_THRESHOLD_MS) { if (touch_duration < TOUCH_THRESHOLD_MS) {
auto& app = Application::GetInstance(); auto& app = Application::GetInstance();
if (app.GetDeviceState() == kDeviceStateStarting) { if (app.GetDeviceState() == kDeviceStateStarting) {
EnterWifiConfigMode(); EnterWifiConfigMode();
return; return;
} }
ESP_LOGI(TAG, "Touch short: text-only mode");
app.ToggleChatState(); app.ToggleChatState();
} else {
auto& app = Application::GetInstance();
ESP_LOGI(TAG, "Touch long: vision+text mode");
app.ToggleChatStateWithVision();
} }
} }
} }
@ -225,19 +226,20 @@ private:
void InitializeFt6336TouchPad() { void InitializeFt6336TouchPad() {
ESP_LOGI(TAG, "Init FT6336"); ESP_LOGI(TAG, "Init FT6336");
ft6336_ = new Ft6336(i2c_bus_, 0x38); ft6336_ = new Ft6336(i2c_bus_, 0x38);
// 创建定时器20ms 间隔 // 创建定时器20ms 间隔
esp_timer_create_args_t timer_args = { esp_timer_create_args_t timer_args = {
.callback = [](void* arg) { .callback =
M5StackCoreS3Board* board = (M5StackCoreS3Board*)arg; [](void* arg) {
board->PollTouchpad(); M5StackCoreS3Board* board = (M5StackCoreS3Board*)arg;
}, board->PollTouchpad();
},
.arg = this, .arg = this,
.dispatch_method = ESP_TIMER_TASK, .dispatch_method = ESP_TIMER_TASK,
.name = "touchpad_timer", .name = "touchpad_timer",
.skip_unhandled_events = true, .skip_unhandled_events = true,
}; };
ESP_ERROR_CHECK(esp_timer_create(&timer_args, &touchpad_timer_)); ESP_ERROR_CHECK(esp_timer_create(&timer_args, &touchpad_timer_));
ESP_ERROR_CHECK(esp_timer_start_periodic(touchpad_timer_, 20 * 1000)); ESP_ERROR_CHECK(esp_timer_start_periodic(touchpad_timer_, 20 * 1000));
} }
@ -276,7 +278,7 @@ private:
panel_config.rgb_ele_order = LCD_RGB_ELEMENT_ORDER_BGR; panel_config.rgb_ele_order = LCD_RGB_ELEMENT_ORDER_BGR;
panel_config.bits_per_pixel = 16; panel_config.bits_per_pixel = 16;
ESP_ERROR_CHECK(esp_lcd_new_panel_ili9341(panel_io, &panel_config, &panel)); ESP_ERROR_CHECK(esp_lcd_new_panel_ili9341(panel_io, &panel_config, &panel));
esp_lcd_panel_reset(panel); esp_lcd_panel_reset(panel);
aw9523_->ResetIli9342(); aw9523_->ResetIli9342();
@ -285,23 +287,25 @@ private:
esp_lcd_panel_swap_xy(panel, DISPLAY_SWAP_XY); esp_lcd_panel_swap_xy(panel, DISPLAY_SWAP_XY);
esp_lcd_panel_mirror(panel, DISPLAY_MIRROR_X, DISPLAY_MIRROR_Y); esp_lcd_panel_mirror(panel, DISPLAY_MIRROR_X, DISPLAY_MIRROR_Y);
display_ = new SpiLcdDisplay(panel_io, panel, display_ = new SpiLcdDisplay(panel_io, panel, DISPLAY_WIDTH, DISPLAY_HEIGHT,
DISPLAY_WIDTH, DISPLAY_HEIGHT, DISPLAY_OFFSET_X, DISPLAY_OFFSET_Y, DISPLAY_MIRROR_X, DISPLAY_MIRROR_Y, DISPLAY_SWAP_XY); DISPLAY_OFFSET_X, DISPLAY_OFFSET_Y, DISPLAY_MIRROR_X,
DISPLAY_MIRROR_Y, DISPLAY_SWAP_XY);
} }
void InitializeCamera() { void InitializeCamera() {
static esp_cam_ctlr_dvp_pin_config_t dvp_pin_config = { static esp_cam_ctlr_dvp_pin_config_t dvp_pin_config = {
.data_width = CAM_CTLR_DATA_WIDTH_8, .data_width = CAM_CTLR_DATA_WIDTH_8,
.data_io = { .data_io =
[0] = CAMERA_PIN_D0, {
[1] = CAMERA_PIN_D1, [0] = CAMERA_PIN_D0,
[2] = CAMERA_PIN_D2, [1] = CAMERA_PIN_D1,
[3] = CAMERA_PIN_D3, [2] = CAMERA_PIN_D2,
[4] = CAMERA_PIN_D4, [3] = CAMERA_PIN_D3,
[5] = CAMERA_PIN_D5, [4] = CAMERA_PIN_D4,
[6] = CAMERA_PIN_D6, [5] = CAMERA_PIN_D5,
[7] = CAMERA_PIN_D7, [6] = CAMERA_PIN_D6,
}, [7] = CAMERA_PIN_D7,
},
.vsync_io = CAMERA_PIN_VSYNC, .vsync_io = CAMERA_PIN_VSYNC,
.de_io = CAMERA_PIN_HREF, .de_io = CAMERA_PIN_HREF,
.pclk_io = CAMERA_PIN_PCLK, .pclk_io = CAMERA_PIN_PCLK,
@ -330,6 +334,37 @@ private:
camera_->SetHMirror(false); camera_->SetHMirror(false);
} }
void InitializeBackgroundVisionSampler() {
xTaskCreate(
[](void* arg) {
auto board = static_cast<M5StackCoreS3Board*>(arg);
bool has_logged_success = false;
bool has_logged_failure = false;
vTaskDelay(pdMS_TO_TICKS(BACKGROUND_VISION_INITIAL_DELAY_MS));
while (true) {
if (board->camera_ == nullptr) {
vTaskDelay(pdMS_TO_TICKS(BACKGROUND_VISION_SAMPLE_INTERVAL_MS));
continue;
}
if (board->camera_->CaptureBackground()) {
if (!has_logged_success) {
ESP_LOGI(TAG, "Background vision sampler started");
has_logged_success = true;
}
} else if (!has_logged_failure) {
ESP_LOGW(TAG, "Background vision sampler is waiting for camera");
has_logged_failure = true;
}
vTaskDelay(pdMS_TO_TICKS(BACKGROUND_VISION_SAMPLE_INTERVAL_MS));
}
},
"BgVisionSampler", 4096, this, 1, nullptr);
}
public: public:
M5StackCoreS3Board() { M5StackCoreS3Board() {
InitializePowerSaveTimer(); InitializePowerSaveTimer();
@ -340,34 +375,24 @@ public:
InitializeSpi(); InitializeSpi();
InitializeIli9342Display(); InitializeIli9342Display();
InitializeCamera(); InitializeCamera();
InitializeBackgroundVisionSampler();
InitializeFt6336TouchPad(); InitializeFt6336TouchPad();
GetBacklight()->RestoreBrightness(); GetBacklight()->RestoreBrightness();
} }
virtual AudioCodec* GetAudioCodec() override { virtual AudioCodec* GetAudioCodec() override {
static CoreS3AudioCodec audio_codec(i2c_bus_, static CoreS3AudioCodec audio_codec(
AUDIO_INPUT_SAMPLE_RATE, i2c_bus_, AUDIO_INPUT_SAMPLE_RATE, AUDIO_OUTPUT_SAMPLE_RATE, AUDIO_I2S_GPIO_MCLK,
AUDIO_OUTPUT_SAMPLE_RATE, AUDIO_I2S_GPIO_BCLK, AUDIO_I2S_GPIO_WS, AUDIO_I2S_GPIO_DOUT, AUDIO_I2S_GPIO_DIN,
AUDIO_I2S_GPIO_MCLK, AUDIO_CODEC_AW88298_ADDR, AUDIO_CODEC_ES7210_ADDR, AUDIO_INPUT_REFERENCE);
AUDIO_I2S_GPIO_BCLK,
AUDIO_I2S_GPIO_WS,
AUDIO_I2S_GPIO_DOUT,
AUDIO_I2S_GPIO_DIN,
AUDIO_CODEC_AW88298_ADDR,
AUDIO_CODEC_ES7210_ADDR,
AUDIO_INPUT_REFERENCE);
return &audio_codec; return &audio_codec;
} }
virtual Display* GetDisplay() override { virtual Display* GetDisplay() override { return display_; }
return display_;
}
virtual Camera* GetCamera() override { virtual Camera* GetCamera() override { return camera_; }
return camera_;
}
virtual bool GetBatteryLevel(int &level, bool& charging, bool& discharging) override { virtual bool GetBatteryLevel(int& level, bool& charging, bool& discharging) override {
static bool last_discharging = false; static bool last_discharging = false;
charging = pmic_->IsCharging(); charging = pmic_->IsCharging();
discharging = pmic_->IsDischarging(); discharging = pmic_->IsDischarging();
@ -387,7 +412,7 @@ public:
WifiBoard::SetPowerSaveLevel(level); WifiBoard::SetPowerSaveLevel(level);
} }
virtual Backlight *GetBacklight() override { virtual Backlight* GetBacklight() override {
static CustomBacklight backlight(pmic_); static CustomBacklight backlight(pmic_);
return &backlight; return &backlight;
} }

View File

@ -598,6 +598,10 @@ CONFIG_PARTITION_TABLE_MD5=y
# Xiaozhi Assistant # Xiaozhi Assistant
# #
CONFIG_OTA_URL="https://api.tenclass.net/xiaozhi/ota/" CONFIG_OTA_URL="https://api.tenclass.net/xiaozhi/ota/"
CONFIG_USE_DIRECT_WEBSOCKET=y
CONFIG_WEBSOCKET_URL="ws://172.19.0.240:8080"
CONFIG_WEBSOCKET_TOKEN=""
CONFIG_WEBSOCKET_PROTOCOL_VERSION=1
# CONFIG_FLASH_NONE_ASSETS is not set # CONFIG_FLASH_NONE_ASSETS is not set
CONFIG_FLASH_DEFAULT_ASSETS=y CONFIG_FLASH_DEFAULT_ASSETS=y
# CONFIG_FLASH_CUSTOM_ASSETS is not set # CONFIG_FLASH_CUSTOM_ASSETS is not set

1123
main/bridge_server.py Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,9 +1,22 @@
#include "protocol.h" #include "protocol.h"
#include <esp_log.h> #include <esp_log.h>
#include <mbedtls/base64.h>
#define TAG "Protocol" #define TAG "Protocol"
static std::string Base64Encode(const std::string& data) {
size_t encoded_length = 0;
size_t output_length = 0;
mbedtls_base64_encode(nullptr, 0, &encoded_length,
reinterpret_cast<const unsigned char*>(data.data()), data.size());
std::string result(encoded_length, 0);
mbedtls_base64_encode(reinterpret_cast<unsigned char*>(result.data()), result.size(), &output_length,
reinterpret_cast<const unsigned char*>(data.data()), data.size());
result.resize(output_length);
return result;
}
void Protocol::OnIncomingJson(std::function<void(const cJSON* root)> callback) { void Protocol::OnIncomingJson(std::function<void(const cJSON* root)> callback) {
on_incoming_json_ = callback; on_incoming_json_ = callback;
} }
@ -78,6 +91,27 @@ void Protocol::SendMcpMessage(const std::string& payload) {
SendText(message); SendText(message);
} }
void Protocol::SendVisionFrame(const std::string& jpeg_data) {
if (jpeg_data.empty()) {
return;
}
cJSON* root = cJSON_CreateObject();
cJSON_AddStringToObject(root, "session_id", session_id_.c_str());
cJSON_AddStringToObject(root, "type", "vision");
cJSON_AddStringToObject(root, "state", "frame");
cJSON_AddStringToObject(root, "mime_type", "image/jpeg");
auto encoded = Base64Encode(jpeg_data);
cJSON_AddStringToObject(root, "image", encoded.c_str());
char* json_str = cJSON_PrintUnformatted(root);
if (json_str != nullptr) {
SendText(json_str);
cJSON_free(json_str);
}
cJSON_Delete(root);
}
bool Protocol::IsTimeout() const { bool Protocol::IsTimeout() const {
const int kTimeoutSeconds = 120; const int kTimeoutSeconds = 120;
auto now = std::chrono::steady_clock::now(); auto now = std::chrono::steady_clock::now();

View File

@ -73,6 +73,7 @@ public:
virtual void SendStopListening(); virtual void SendStopListening();
virtual void SendAbortSpeaking(AbortReason reason); virtual void SendAbortSpeaking(AbortReason reason);
virtual void SendMcpMessage(const std::string& message); virtual void SendMcpMessage(const std::string& message);
virtual void SendVisionFrame(const std::string& jpeg_data);
protected: protected:
std::function<void(const cJSON* root)> on_incoming_json_; std::function<void(const cJSON* root)> on_incoming_json_;
@ -95,4 +96,3 @@ protected:
}; };
#endif // PROTOCOL_H #endif // PROTOCOL_H

View File

@ -85,10 +85,21 @@ bool WebsocketProtocol::OpenAudioChannel() {
std::string url = settings.GetString("url"); std::string url = settings.GetString("url");
std::string token = settings.GetString("token"); std::string token = settings.GetString("token");
int version = settings.GetInt("version"); int version = settings.GetInt("version");
#if CONFIG_USE_DIRECT_WEBSOCKET
url = CONFIG_WEBSOCKET_URL;
token = CONFIG_WEBSOCKET_TOKEN;
version = CONFIG_WEBSOCKET_PROTOCOL_VERSION;
#endif
if (version != 0) { if (version != 0) {
version_ = version; version_ = version;
} }
if (url.empty()) {
ESP_LOGE(TAG, "Websocket URL is not set");
SetError(Lang::Strings::SERVER_NOT_CONNECTED);
return false;
}
error_occurred_ = false; error_occurred_ = false;
auto network = Board::GetInstance().GetNetwork(); auto network = Board::GetInstance().GetNetwork();