feat: support camera capture to livekit
This commit is contained in:
@ -7,6 +7,8 @@ class Camera {
|
||||
public:
|
||||
virtual void SetExplainUrl(const std::string& url, const std::string& token) = 0;
|
||||
virtual bool Capture() = 0;
|
||||
virtual bool CaptureBackground() { return Capture(); }
|
||||
virtual bool CaptureToJpeg(std::string& jpeg_data, bool show_preview = false) { return false; }
|
||||
virtual bool SetHMirror(bool enabled) = 0;
|
||||
virtual bool SetVFlip(bool enabled) = 0;
|
||||
virtual bool SetSwapBytes(bool enabled) { return false; } // Optional, default no-op
|
||||
|
||||
@ -24,6 +24,7 @@
|
||||
#include "lvgl_display.h"
|
||||
#include "mcp_server.h"
|
||||
#include "system_info.h"
|
||||
#include "esp_timer.h"
|
||||
|
||||
#ifdef CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE
|
||||
#undef LOG_LOCAL_LEVEL
|
||||
@ -55,6 +56,7 @@
|
||||
|
||||
|
||||
#define TAG "EspVideo"
|
||||
#define FOREGROUND_CAPTURE_PROTECTION_US (10 * 1000 * 1000)
|
||||
|
||||
#if defined(CONFIG_CAMERA_SENSOR_SWAP_PIXEL_BYTE_ORDER) || defined(CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP)
|
||||
#warning \
|
||||
@ -381,11 +383,47 @@ EspVideo::~EspVideo() {
|
||||
}
|
||||
|
||||
void EspVideo::SetExplainUrl(const std::string& url, const std::string& token) {
|
||||
std::lock_guard<std::mutex> lock(frame_mutex_);
|
||||
explain_url_ = url;
|
||||
explain_token_ = token;
|
||||
}
|
||||
|
||||
bool EspVideo::Capture() {
|
||||
return CaptureFrame(true);
|
||||
}
|
||||
|
||||
bool EspVideo::CaptureBackground() {
|
||||
return CaptureFrame(false);
|
||||
}
|
||||
|
||||
bool EspVideo::CaptureToJpeg(std::string& jpeg_data, bool show_preview) {
|
||||
jpeg_data.clear();
|
||||
if (!CaptureFrame(show_preview)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> lock(frame_mutex_);
|
||||
if (frame_.data == nullptr || frame_.len == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
uint16_t w = frame_.width ? frame_.width : 320;
|
||||
uint16_t h = frame_.height ? frame_.height : 240;
|
||||
return image_to_jpeg_cb(
|
||||
frame_.data, frame_.len, w, h, frame_.format, 60,
|
||||
[](void* arg, size_t index, const void* data, size_t len) -> size_t {
|
||||
auto jpeg_data = static_cast<std::string*>(arg);
|
||||
if (data != nullptr && len > 0) {
|
||||
jpeg_data->append(static_cast<const char*>(data), len);
|
||||
}
|
||||
return len;
|
||||
},
|
||||
&jpeg_data);
|
||||
}
|
||||
|
||||
bool EspVideo::CaptureFrame(bool show_preview) {
|
||||
std::lock_guard<std::mutex> lock(frame_mutex_);
|
||||
|
||||
if (encoder_thread_.joinable()) {
|
||||
encoder_thread_.join();
|
||||
}
|
||||
@ -394,6 +432,10 @@ bool EspVideo::Capture() {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!show_preview && esp_timer_get_time() < foreground_capture_protected_until_us_) {
|
||||
return true;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 3; i++) {
|
||||
struct v4l2_buffer buf = {};
|
||||
buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
|
||||
@ -729,9 +771,14 @@ bool EspVideo::Capture() {
|
||||
}
|
||||
}
|
||||
|
||||
// 显示预览图片
|
||||
auto display = dynamic_cast<LvglDisplay*>(Board::GetInstance().GetDisplay());
|
||||
if (display != nullptr) {
|
||||
if (show_preview) {
|
||||
foreground_capture_protected_until_us_ = esp_timer_get_time() + FOREGROUND_CAPTURE_PROTECTION_US;
|
||||
}
|
||||
|
||||
if (show_preview) {
|
||||
// 显示预览图片
|
||||
auto display = dynamic_cast<LvglDisplay*>(Board::GetInstance().GetDisplay());
|
||||
if (display != nullptr) {
|
||||
if (!frame_.data) {
|
||||
ESP_LOGE(TAG, "frame.data is null");
|
||||
return false;
|
||||
@ -836,6 +883,7 @@ bool EspVideo::Capture() {
|
||||
|
||||
auto image = std::make_unique<LvglAllocatedImage>(data, lvgl_image_size, w, h, stride, color_format);
|
||||
display->SetPreviewImage(std::move(image));
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -898,10 +946,16 @@ bool EspVideo::SetVFlip(bool enabled) {
|
||||
* @warning 如果摄像头缓冲区为空或网络连接失败,将返回错误信息
|
||||
*/
|
||||
std::string EspVideo::Explain(const std::string& question) {
|
||||
std::lock_guard<std::mutex> lock(frame_mutex_);
|
||||
|
||||
if (explain_url_.empty()) {
|
||||
throw std::runtime_error("Image explain URL or token is not set");
|
||||
}
|
||||
|
||||
if (frame_.data == nullptr || frame_.len == 0) {
|
||||
throw std::runtime_error("No camera frame captured");
|
||||
}
|
||||
|
||||
// 创建局部的 JPEG 队列, 40 entries is about to store 512 * 40 = 20480 bytes of JPEG data
|
||||
QueueHandle_t jpeg_queue = xQueueCreate(40, sizeof(JpegChunk));
|
||||
if (jpeg_queue == nullptr) {
|
||||
|
||||
@ -5,6 +5,8 @@
|
||||
#include <thread>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <mutex>
|
||||
#include <cstdint>
|
||||
|
||||
#include <freertos/FreeRTOS.h>
|
||||
#include <freertos/queue.h>
|
||||
@ -39,6 +41,10 @@ private:
|
||||
std::string explain_url_;
|
||||
std::string explain_token_;
|
||||
std::thread encoder_thread_;
|
||||
std::mutex frame_mutex_;
|
||||
int64_t foreground_capture_protected_until_us_ = 0;
|
||||
|
||||
bool CaptureFrame(bool show_preview);
|
||||
|
||||
public:
|
||||
EspVideo(const esp_video_init_config_t& config);
|
||||
@ -46,6 +52,8 @@ public:
|
||||
|
||||
virtual void SetExplainUrl(const std::string& url, const std::string& token);
|
||||
virtual bool Capture();
|
||||
virtual bool CaptureBackground() override;
|
||||
virtual bool CaptureToJpeg(std::string& jpeg_data, bool show_preview = false) override;
|
||||
// 翻转控制函数
|
||||
virtual bool SetHMirror(bool enabled) override;
|
||||
virtual bool SetVFlip(bool enabled) override;
|
||||
|
||||
@ -1,21 +1,23 @@
|
||||
#include "wifi_board.h"
|
||||
#include "application.h"
|
||||
#include "axp2101.h"
|
||||
#include "config.h"
|
||||
#include "cores3_audio_codec.h"
|
||||
#include "display/lcd_display.h"
|
||||
#include "application.h"
|
||||
#include "config.h"
|
||||
#include "power_save_timer.h"
|
||||
#include "i2c_device.h"
|
||||
#include "axp2101.h"
|
||||
#include "power_save_timer.h"
|
||||
#include "wifi_board.h"
|
||||
|
||||
#include <esp_log.h>
|
||||
#include <driver/i2c_master.h>
|
||||
#include <esp_lcd_ili9341.h>
|
||||
#include <esp_lcd_panel_io.h>
|
||||
#include <esp_lcd_panel_ops.h>
|
||||
#include <esp_lcd_ili9341.h>
|
||||
#include <esp_log.h>
|
||||
#include <esp_timer.h>
|
||||
#include "esp_video.h"
|
||||
|
||||
#define TAG "M5StackCoreS3Board"
|
||||
#define BACKGROUND_VISION_INITIAL_DELAY_MS 8000
|
||||
#define BACKGROUND_VISION_SAMPLE_INTERVAL_MS 100
|
||||
|
||||
class Pmic : public Axp2101 {
|
||||
public:
|
||||
@ -41,7 +43,7 @@ public:
|
||||
|
||||
class CustomBacklight : public Backlight {
|
||||
public:
|
||||
CustomBacklight(Pmic *pmic) : pmic_(pmic) {}
|
||||
CustomBacklight(Pmic* pmic) : pmic_(pmic) {}
|
||||
|
||||
void SetBrightnessImpl(uint8_t brightness) override {
|
||||
pmic_->SetBrightness(target_brightness_);
|
||||
@ -49,7 +51,7 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
Pmic *pmic_;
|
||||
Pmic* pmic_;
|
||||
};
|
||||
|
||||
class Aw9523 : public I2cDevice {
|
||||
@ -89,16 +91,14 @@ public:
|
||||
int x = -1;
|
||||
int y = -1;
|
||||
};
|
||||
|
||||
|
||||
Ft6336(i2c_master_bus_handle_t i2c_bus, uint8_t addr) : I2cDevice(i2c_bus, addr) {
|
||||
uint8_t chip_id = ReadReg(0xA3);
|
||||
ESP_LOGI(TAG, "Get chip ID: 0x%02X", chip_id);
|
||||
read_buffer_ = new uint8_t[6];
|
||||
}
|
||||
|
||||
~Ft6336() {
|
||||
delete[] read_buffer_;
|
||||
}
|
||||
~Ft6336() { delete[] read_buffer_; }
|
||||
|
||||
void UpdateTouchPoint() {
|
||||
ReadRegs(0x02, read_buffer_, 6);
|
||||
@ -107,9 +107,7 @@ public:
|
||||
tp_.y = ((read_buffer_[3] & 0x0F) << 8) | read_buffer_[4];
|
||||
}
|
||||
|
||||
inline const TouchPoint_t& GetTouchPoint() {
|
||||
return tp_;
|
||||
}
|
||||
inline const TouchPoint_t& GetTouchPoint() { return tp_; }
|
||||
|
||||
private:
|
||||
uint8_t* read_buffer_ = nullptr;
|
||||
@ -137,9 +135,7 @@ private:
|
||||
GetDisplay()->SetPowerSaveMode(false);
|
||||
GetBacklight()->RestoreBrightness();
|
||||
});
|
||||
power_save_timer_->OnShutdownRequest([this]() {
|
||||
pmic_->PowerOff();
|
||||
});
|
||||
power_save_timer_->OnShutdownRequest([this]() { pmic_->PowerOff(); });
|
||||
power_save_timer_->SetEnabled(true);
|
||||
}
|
||||
|
||||
@ -153,9 +149,10 @@ private:
|
||||
.glitch_ignore_cnt = 7,
|
||||
.intr_priority = 0,
|
||||
.trans_queue_depth = 0,
|
||||
.flags = {
|
||||
.enable_internal_pullup = 1,
|
||||
},
|
||||
.flags =
|
||||
{
|
||||
.enable_internal_pullup = 1,
|
||||
},
|
||||
};
|
||||
ESP_ERROR_CHECK(i2c_new_master_bus(&i2c_bus_cfg, &i2c_bus_));
|
||||
}
|
||||
@ -196,28 +193,32 @@ private:
|
||||
static bool was_touched = false;
|
||||
static int64_t touch_start_time = 0;
|
||||
const int64_t TOUCH_THRESHOLD_MS = 500; // 触摸时长阈值,超过500ms视为长按
|
||||
|
||||
|
||||
ft6336_->UpdateTouchPoint();
|
||||
auto& touch_point = ft6336_->GetTouchPoint();
|
||||
|
||||
|
||||
// 检测触摸开始
|
||||
if (touch_point.num > 0 && !was_touched) {
|
||||
was_touched = true;
|
||||
touch_start_time = esp_timer_get_time() / 1000; // 转换为毫秒
|
||||
}
|
||||
touch_start_time = esp_timer_get_time() / 1000; // 转换为毫秒
|
||||
}
|
||||
// 检测触摸释放
|
||||
else if (touch_point.num == 0 && was_touched) {
|
||||
was_touched = false;
|
||||
int64_t touch_duration = (esp_timer_get_time() / 1000) - touch_start_time;
|
||||
|
||||
// 只有短触才触发
|
||||
|
||||
if (touch_duration < TOUCH_THRESHOLD_MS) {
|
||||
auto& app = Application::GetInstance();
|
||||
if (app.GetDeviceState() == kDeviceStateStarting) {
|
||||
EnterWifiConfigMode();
|
||||
return;
|
||||
}
|
||||
ESP_LOGI(TAG, "Touch short: text-only mode");
|
||||
app.ToggleChatState();
|
||||
} else {
|
||||
auto& app = Application::GetInstance();
|
||||
ESP_LOGI(TAG, "Touch long: vision+text mode");
|
||||
app.ToggleChatStateWithVision();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -225,19 +226,20 @@ private:
|
||||
void InitializeFt6336TouchPad() {
|
||||
ESP_LOGI(TAG, "Init FT6336");
|
||||
ft6336_ = new Ft6336(i2c_bus_, 0x38);
|
||||
|
||||
|
||||
// 创建定时器,20ms 间隔
|
||||
esp_timer_create_args_t timer_args = {
|
||||
.callback = [](void* arg) {
|
||||
M5StackCoreS3Board* board = (M5StackCoreS3Board*)arg;
|
||||
board->PollTouchpad();
|
||||
},
|
||||
.callback =
|
||||
[](void* arg) {
|
||||
M5StackCoreS3Board* board = (M5StackCoreS3Board*)arg;
|
||||
board->PollTouchpad();
|
||||
},
|
||||
.arg = this,
|
||||
.dispatch_method = ESP_TIMER_TASK,
|
||||
.name = "touchpad_timer",
|
||||
.skip_unhandled_events = true,
|
||||
};
|
||||
|
||||
|
||||
ESP_ERROR_CHECK(esp_timer_create(&timer_args, &touchpad_timer_));
|
||||
ESP_ERROR_CHECK(esp_timer_start_periodic(touchpad_timer_, 20 * 1000));
|
||||
}
|
||||
@ -276,7 +278,7 @@ private:
|
||||
panel_config.rgb_ele_order = LCD_RGB_ELEMENT_ORDER_BGR;
|
||||
panel_config.bits_per_pixel = 16;
|
||||
ESP_ERROR_CHECK(esp_lcd_new_panel_ili9341(panel_io, &panel_config, &panel));
|
||||
|
||||
|
||||
esp_lcd_panel_reset(panel);
|
||||
aw9523_->ResetIli9342();
|
||||
|
||||
@ -285,23 +287,25 @@ private:
|
||||
esp_lcd_panel_swap_xy(panel, DISPLAY_SWAP_XY);
|
||||
esp_lcd_panel_mirror(panel, DISPLAY_MIRROR_X, DISPLAY_MIRROR_Y);
|
||||
|
||||
display_ = new SpiLcdDisplay(panel_io, panel,
|
||||
DISPLAY_WIDTH, DISPLAY_HEIGHT, DISPLAY_OFFSET_X, DISPLAY_OFFSET_Y, DISPLAY_MIRROR_X, DISPLAY_MIRROR_Y, DISPLAY_SWAP_XY);
|
||||
display_ = new SpiLcdDisplay(panel_io, panel, DISPLAY_WIDTH, DISPLAY_HEIGHT,
|
||||
DISPLAY_OFFSET_X, DISPLAY_OFFSET_Y, DISPLAY_MIRROR_X,
|
||||
DISPLAY_MIRROR_Y, DISPLAY_SWAP_XY);
|
||||
}
|
||||
|
||||
void InitializeCamera() {
|
||||
void InitializeCamera() {
|
||||
static esp_cam_ctlr_dvp_pin_config_t dvp_pin_config = {
|
||||
.data_width = CAM_CTLR_DATA_WIDTH_8,
|
||||
.data_io = {
|
||||
[0] = CAMERA_PIN_D0,
|
||||
[1] = CAMERA_PIN_D1,
|
||||
[2] = CAMERA_PIN_D2,
|
||||
[3] = CAMERA_PIN_D3,
|
||||
[4] = CAMERA_PIN_D4,
|
||||
[5] = CAMERA_PIN_D5,
|
||||
[6] = CAMERA_PIN_D6,
|
||||
[7] = CAMERA_PIN_D7,
|
||||
},
|
||||
.data_io =
|
||||
{
|
||||
[0] = CAMERA_PIN_D0,
|
||||
[1] = CAMERA_PIN_D1,
|
||||
[2] = CAMERA_PIN_D2,
|
||||
[3] = CAMERA_PIN_D3,
|
||||
[4] = CAMERA_PIN_D4,
|
||||
[5] = CAMERA_PIN_D5,
|
||||
[6] = CAMERA_PIN_D6,
|
||||
[7] = CAMERA_PIN_D7,
|
||||
},
|
||||
.vsync_io = CAMERA_PIN_VSYNC,
|
||||
.de_io = CAMERA_PIN_HREF,
|
||||
.pclk_io = CAMERA_PIN_PCLK,
|
||||
@ -330,6 +334,37 @@ private:
|
||||
camera_->SetHMirror(false);
|
||||
}
|
||||
|
||||
void InitializeBackgroundVisionSampler() {
|
||||
xTaskCreate(
|
||||
[](void* arg) {
|
||||
auto board = static_cast<M5StackCoreS3Board*>(arg);
|
||||
bool has_logged_success = false;
|
||||
bool has_logged_failure = false;
|
||||
|
||||
vTaskDelay(pdMS_TO_TICKS(BACKGROUND_VISION_INITIAL_DELAY_MS));
|
||||
|
||||
while (true) {
|
||||
if (board->camera_ == nullptr) {
|
||||
vTaskDelay(pdMS_TO_TICKS(BACKGROUND_VISION_SAMPLE_INTERVAL_MS));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (board->camera_->CaptureBackground()) {
|
||||
if (!has_logged_success) {
|
||||
ESP_LOGI(TAG, "Background vision sampler started");
|
||||
has_logged_success = true;
|
||||
}
|
||||
} else if (!has_logged_failure) {
|
||||
ESP_LOGW(TAG, "Background vision sampler is waiting for camera");
|
||||
has_logged_failure = true;
|
||||
}
|
||||
|
||||
vTaskDelay(pdMS_TO_TICKS(BACKGROUND_VISION_SAMPLE_INTERVAL_MS));
|
||||
}
|
||||
},
|
||||
"BgVisionSampler", 4096, this, 1, nullptr);
|
||||
}
|
||||
|
||||
public:
|
||||
M5StackCoreS3Board() {
|
||||
InitializePowerSaveTimer();
|
||||
@ -340,34 +375,24 @@ public:
|
||||
InitializeSpi();
|
||||
InitializeIli9342Display();
|
||||
InitializeCamera();
|
||||
InitializeBackgroundVisionSampler();
|
||||
InitializeFt6336TouchPad();
|
||||
GetBacklight()->RestoreBrightness();
|
||||
}
|
||||
|
||||
virtual AudioCodec* GetAudioCodec() override {
|
||||
static CoreS3AudioCodec audio_codec(i2c_bus_,
|
||||
AUDIO_INPUT_SAMPLE_RATE,
|
||||
AUDIO_OUTPUT_SAMPLE_RATE,
|
||||
AUDIO_I2S_GPIO_MCLK,
|
||||
AUDIO_I2S_GPIO_BCLK,
|
||||
AUDIO_I2S_GPIO_WS,
|
||||
AUDIO_I2S_GPIO_DOUT,
|
||||
AUDIO_I2S_GPIO_DIN,
|
||||
AUDIO_CODEC_AW88298_ADDR,
|
||||
AUDIO_CODEC_ES7210_ADDR,
|
||||
AUDIO_INPUT_REFERENCE);
|
||||
static CoreS3AudioCodec audio_codec(
|
||||
i2c_bus_, AUDIO_INPUT_SAMPLE_RATE, AUDIO_OUTPUT_SAMPLE_RATE, AUDIO_I2S_GPIO_MCLK,
|
||||
AUDIO_I2S_GPIO_BCLK, AUDIO_I2S_GPIO_WS, AUDIO_I2S_GPIO_DOUT, AUDIO_I2S_GPIO_DIN,
|
||||
AUDIO_CODEC_AW88298_ADDR, AUDIO_CODEC_ES7210_ADDR, AUDIO_INPUT_REFERENCE);
|
||||
return &audio_codec;
|
||||
}
|
||||
|
||||
virtual Display* GetDisplay() override {
|
||||
return display_;
|
||||
}
|
||||
virtual Display* GetDisplay() override { return display_; }
|
||||
|
||||
virtual Camera* GetCamera() override {
|
||||
return camera_;
|
||||
}
|
||||
virtual Camera* GetCamera() override { return camera_; }
|
||||
|
||||
virtual bool GetBatteryLevel(int &level, bool& charging, bool& discharging) override {
|
||||
virtual bool GetBatteryLevel(int& level, bool& charging, bool& discharging) override {
|
||||
static bool last_discharging = false;
|
||||
charging = pmic_->IsCharging();
|
||||
discharging = pmic_->IsDischarging();
|
||||
@ -387,7 +412,7 @@ public:
|
||||
WifiBoard::SetPowerSaveLevel(level);
|
||||
}
|
||||
|
||||
virtual Backlight *GetBacklight() override {
|
||||
virtual Backlight* GetBacklight() override {
|
||||
static CustomBacklight backlight(pmic_);
|
||||
return &backlight;
|
||||
}
|
||||
|
||||
@ -599,7 +599,7 @@ CONFIG_PARTITION_TABLE_MD5=y
|
||||
#
|
||||
CONFIG_OTA_URL="https://api.tenclass.net/xiaozhi/ota/"
|
||||
CONFIG_USE_DIRECT_WEBSOCKET=y
|
||||
CONFIG_WEBSOCKET_URL="ws://10.6.80.130:8080"
|
||||
CONFIG_WEBSOCKET_URL="ws://172.19.0.240:8080"
|
||||
CONFIG_WEBSOCKET_TOKEN=""
|
||||
CONFIG_WEBSOCKET_PROTOCOL_VERSION=1
|
||||
# CONFIG_FLASH_NONE_ASSETS is not set
|
||||
|
||||
Reference in New Issue
Block a user