Support both esp_video and esp32_camera (#1671)

* Update project version to 2.2.1 and refactor camera component handling

- Incremented project version from 2.2.0 to 2.2.1 in CMakeLists.txt.
- Removed legacy esp32_camera component and replaced it with esp_video for ESP32-S3 and ESP32-P4 boards.
- Updated board implementations to utilize the new esp_video component, ensuring compatibility and improved functionality.
- Cleaned up Kconfig options related to camera selection, streamlining the configuration process.
- Enhanced camera initialization logic across various board files to support the new component structure.

* Refactor camera handling in AtomS3R CAM/M12 EchoBase board

- Replaced the legacy EspVideo component with the new Esp32Camera class for improved camera functionality.
- Updated camera initialization logic to utilize a more structured configuration approach, enhancing clarity and maintainability.
- Removed outdated comments and code related to the previous camera implementation in the README file.

* Update camera configuration for atoms3r-cam-m12-echo-base

- Removed outdated camera configuration options from config.json to streamline the setup.
- Retained essential partition table configuration for improved clarity.

* Enhance Esp32Camera functionality and memory management

- Added esp_timer.h for improved timing functionality.
- Streamlined camera initialization by removing redundant frame buffer setup and logging.
- Improved memory allocation for JPEG encoding and added error handling for unsupported pixel formats.
- Updated comments for clarity and consistency, ensuring better understanding of the code flow.
This commit is contained in:
Xiaoxia
2026-01-20 22:44:37 +08:00
committed by GitHub
parent d5ec8f7081
commit 734b5b410a
41 changed files with 1348 additions and 1581 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,5 @@
#pragma once
#include "sdkconfig.h"
// esp32_camera (使用 esp_video 组件) 用于 ESP32-P4或 ESP32-S3 选择使用 esp_video 时
#if defined(CONFIG_IDF_TARGET_ESP32P4) || (defined(CONFIG_IDF_TARGET_ESP32S3) && defined(CONFIG_XIAOZHI_USE_ESP_VIDEO))
#include <lvgl.h>
#include <thread>
@ -12,46 +10,31 @@
#include <freertos/queue.h>
#include "camera.h"
#include "esp_camera.h"
#include "jpg/image_to_jpeg.h"
#include "esp_video_init.h"
struct JpegChunk {
uint8_t* data;
struct JpegChunk
{
uint8_t *data;
size_t len;
};
class Esp32Camera : public Camera {
class Esp32Camera : public Camera
{
private:
struct FrameBuffer {
uint8_t *data = nullptr;
size_t len = 0;
uint16_t width = 0;
uint16_t height = 0;
v4l2_pix_fmt_t format = 0;
} frame_;
v4l2_pix_fmt_t sensor_format_ = 0;
#ifdef CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE
uint16_t sensor_width_ = 0;
uint16_t sensor_height_ = 0;
#endif // CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE
int video_fd_ = -1;
bool streaming_on_ = false;
struct MmapBuffer { void *start = nullptr; size_t length = 0; };
std::vector<MmapBuffer> mmap_buffers_;
std::string explain_url_;
std::string explain_token_;
std::thread encoder_thread_;
camera_fb_t *current_fb_ = nullptr;
public:
Esp32Camera(const esp_video_init_config_t& config);
Esp32Camera(const camera_config_t &config);
~Esp32Camera();
virtual void SetExplainUrl(const std::string& url, const std::string& token);
virtual bool Capture();
// 翻转控制函数
virtual void SetExplainUrl(const std::string &url, const std::string &token) override;
virtual bool Capture() override;
virtual bool SetHMirror(bool enabled) override;
virtual bool SetVFlip(bool enabled) override;
virtual std::string Explain(const std::string& question);
virtual std::string Explain(const std::string &question) override;
};
#endif // ndef CONFIG_IDF_TARGET_ESP32

View File

@ -1,413 +0,0 @@
#include "sdkconfig.h"
// esp32s3_camera (使用 esp_camera 组件) 仅用于 ESP32-S3 且选择使用 esp_camera 时
#if defined(CONFIG_IDF_TARGET_ESP32S3) && defined(CONFIG_XIAOZHI_USE_ESP_CAMERA)
#include <esp_heap_caps.h>
#include <cstdio>
#include <cstring>
#include <esp_log.h>
#include "esp32s3_camera.h"
#include "board.h"
#include "display.h"
#include "lvgl_display.h"
#include "mcp_server.h"
#include "system_info.h"
#include "jpg/image_to_jpeg.h"
#define TAG "Esp32S3Camera"
// V4L2 兼容的格式定义
#define V4L2_PIX_FMT_RGB565 0x50424752 // 'RGBP'
#define V4L2_PIX_FMT_YUYV 0x56595559 // 'YUYV'
#define V4L2_PIX_FMT_JPEG 0x4745504A // 'JPEG'
#define V4L2_PIX_FMT_RGB24 0x33424752 // 'RGB3'
#define V4L2_PIX_FMT_GREY 0x59455247 // 'GREY'
static uint32_t pixformat_to_v4l2(pixformat_t fmt)
{
switch (fmt)
{
case PIXFORMAT_RGB565:
return V4L2_PIX_FMT_RGB565;
case PIXFORMAT_YUV422:
return V4L2_PIX_FMT_YUYV;
case PIXFORMAT_JPEG:
return V4L2_PIX_FMT_JPEG;
case PIXFORMAT_RGB888:
return V4L2_PIX_FMT_RGB24;
case PIXFORMAT_GRAYSCALE:
return V4L2_PIX_FMT_GREY;
default:
return 0;
}
}
Esp32S3Camera::Esp32S3Camera(const camera_config_t &config)
{
esp_err_t err = esp_camera_init(&config);
if (err != ESP_OK)
{
ESP_LOGE(TAG, "esp_camera_init failed with error 0x%x", err);
return;
}
sensor_t *s = esp_camera_sensor_get();
if (s)
{
frame_.width = config.frame_size == FRAMESIZE_QVGA ? 320 : config.frame_size == FRAMESIZE_VGA ? 640
: config.frame_size == FRAMESIZE_SVGA ? 800
: config.frame_size == FRAMESIZE_XGA ? 1024
: config.frame_size == FRAMESIZE_HD ? 1280
: config.frame_size == FRAMESIZE_SXGA ? 1280
: config.frame_size == FRAMESIZE_UXGA ? 1600
: 320;
frame_.height = config.frame_size == FRAMESIZE_QVGA ? 240 : config.frame_size == FRAMESIZE_VGA ? 480
: config.frame_size == FRAMESIZE_SVGA ? 600
: config.frame_size == FRAMESIZE_XGA ? 768
: config.frame_size == FRAMESIZE_HD ? 720
: config.frame_size == FRAMESIZE_SXGA ? 1024
: config.frame_size == FRAMESIZE_UXGA ? 1200
: 240;
frame_.format = config.pixel_format;
ESP_LOGI(TAG, "Camera initialized: %dx%d, format=%d", frame_.width, frame_.height, config.pixel_format);
}
streaming_on_ = true;
ESP_LOGI(TAG, "ESP32-S3 Camera init success");
}
Esp32S3Camera::~Esp32S3Camera()
{
if (streaming_on_)
{
if (current_fb_)
{
esp_camera_fb_return(current_fb_);
current_fb_ = nullptr;
}
esp_camera_deinit();
streaming_on_ = false;
}
if (frame_.data)
{
heap_caps_free(frame_.data);
frame_.data = nullptr;
}
}
void Esp32S3Camera::SetExplainUrl(const std::string &url, const std::string &token)
{
explain_url_ = url;
explain_token_ = token;
}
bool Esp32S3Camera::Capture()
{
if (encoder_thread_.joinable())
{
encoder_thread_.join();
}
if (!streaming_on_)
{
return false;
}
// 释放之前的帧
if (current_fb_)
{
esp_camera_fb_return(current_fb_);
current_fb_ = nullptr;
}
// 丢弃前两帧,获取最新帧
for (int i = 0; i < 3; i++)
{
camera_fb_t *fb = esp_camera_fb_get();
if (!fb)
{
ESP_LOGE(TAG, "Camera capture failed");
return false;
}
if (i < 2)
{
esp_camera_fb_return(fb);
}
else
{
current_fb_ = fb;
}
}
if (!current_fb_)
{
ESP_LOGE(TAG, "Failed to get frame buffer");
return false;
}
// 保存帧副本到 PSRAM
if (frame_.data)
{
heap_caps_free(frame_.data);
frame_.data = nullptr;
}
frame_.len = current_fb_->len;
frame_.width = current_fb_->width;
frame_.height = current_fb_->height;
frame_.format = current_fb_->format;
frame_.data = (uint8_t *)heap_caps_malloc(frame_.len, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
if (!frame_.data)
{
ESP_LOGE(TAG, "Failed to allocate %zu bytes for frame copy", frame_.len);
esp_camera_fb_return(current_fb_);
current_fb_ = nullptr;
return false;
}
memcpy(frame_.data, current_fb_->buf, frame_.len);
// 释放原始帧
esp_camera_fb_return(current_fb_);
current_fb_ = nullptr;
// 对 RGB565 格式进行字节交换 (Big Endian <-> Little Endian)
// 这样 frame_.data 就是已交换的数据,显示和上传都使用相同的数据
if (frame_.format == PIXFORMAT_RGB565)
{
uint8_t *data = frame_.data;
size_t pixel_count = frame_.width * frame_.height;
for (size_t i = 0; i < pixel_count; i++)
{
uint8_t temp = data[2 * i];
data[2 * i] = data[2 * i + 1];
data[2 * i + 1] = temp;
}
}
ESP_LOGD(TAG, "Captured frame: %dx%d, len=%zu, format=%d",
frame_.width, frame_.height, frame_.len, frame_.format);
// 显示预览图片
auto display = dynamic_cast<LvglDisplay *>(Board::GetInstance().GetDisplay());
if (display != nullptr)
{
if (!frame_.data)
{
ESP_LOGE(TAG, "frame.data is null");
return false;
}
uint16_t w = frame_.width;
uint16_t h = frame_.height;
size_t lvgl_image_size = frame_.len;
size_t stride = ((w * 2) + 3) & ~3; // 4字节对齐
lv_color_format_t color_format = LV_COLOR_FORMAT_RGB565;
uint8_t *data = nullptr;
switch (frame_.format)
{
case PIXFORMAT_RGB565:
// frame_.data 已经在捕获阶段完成了字节交换,直接复制即可
data = (uint8_t *)heap_caps_malloc(w * h * 2, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
if (data == nullptr)
{
ESP_LOGE(TAG, "Failed to allocate memory for preview image");
return false;
}
memcpy(data, frame_.data, frame_.len);
lvgl_image_size = frame_.len;
break;
case PIXFORMAT_JPEG:
// JPEG 格式需要解码 - 跳过预览显示
ESP_LOGD(TAG, "JPEG format preview not supported, skipping display");
return true;
default:
ESP_LOGE(TAG, "Unsupported frame format for preview: %d", frame_.format);
return true; // 仍然返回 true因为捕获成功
}
if (data)
{
auto image = std::make_unique<LvglAllocatedImage>(data, lvgl_image_size, w, h, stride, color_format);
display->SetPreviewImage(std::move(image));
}
}
return true;
}
bool Esp32S3Camera::SetHMirror(bool enabled)
{
sensor_t *s = esp_camera_sensor_get();
if (!s)
{
return false;
}
s->set_hmirror(s, enabled ? 1 : 0);
return true;
}
bool Esp32S3Camera::SetVFlip(bool enabled)
{
sensor_t *s = esp_camera_sensor_get();
if (!s)
{
return false;
}
s->set_vflip(s, enabled ? 1 : 0);
return true;
}
std::string Esp32S3Camera::Explain(const std::string &question)
{
if (explain_url_.empty())
{
throw std::runtime_error("Image explain URL or token is not set");
}
// 创建局部的 JPEG 队列
QueueHandle_t jpeg_queue = xQueueCreate(40, sizeof(JpegChunk));
if (jpeg_queue == nullptr)
{
ESP_LOGE(TAG, "Failed to create JPEG queue");
throw std::runtime_error("Failed to create JPEG queue");
}
// 转换格式为 v4l2 兼容格式
uint32_t v4l2_format = pixformat_to_v4l2(frame_.format);
// 启动编码线程
encoder_thread_ = std::thread([this, jpeg_queue, v4l2_format]()
{
uint16_t w = frame_.width ? frame_.width : 320;
uint16_t h = frame_.height ? frame_.height : 240;
bool ok = image_to_jpeg_cb(
frame_.data, frame_.len, w, h, static_cast<v4l2_pix_fmt_t>(v4l2_format), 80,
[](void* arg, size_t index, const void* data, size_t len) -> size_t {
auto jpeg_queue = static_cast<QueueHandle_t>(arg);
JpegChunk chunk = {.data = nullptr, .len = len};
if (index == 0 && data != nullptr && len > 0) {
chunk.data = (uint8_t*)heap_caps_aligned_alloc(16, len, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
if (chunk.data == nullptr) {
ESP_LOGE(TAG, "Failed to allocate %zu bytes for JPEG chunk", len);
chunk.len = 0;
} else {
memcpy(chunk.data, data, len);
}
} else {
chunk.len = 0;
}
xQueueSend(jpeg_queue, &chunk, portMAX_DELAY);
return len;
},
jpeg_queue);
if (!ok) {
JpegChunk chunk = {.data = nullptr, .len = 0};
xQueueSend(jpeg_queue, &chunk, portMAX_DELAY);
} });
auto network = Board::GetInstance().GetNetwork();
auto http = network->CreateHttp(3);
std::string boundary = "----ESP32_CAMERA_BOUNDARY";
http->SetHeader("Device-Id", SystemInfo::GetMacAddress().c_str());
http->SetHeader("Client-Id", Board::GetInstance().GetUuid().c_str());
if (!explain_token_.empty())
{
http->SetHeader("Authorization", "Bearer " + explain_token_);
}
http->SetHeader("Content-Type", "multipart/form-data; boundary=" + boundary);
http->SetHeader("Transfer-Encoding", "chunked");
if (!http->Open("POST", explain_url_))
{
ESP_LOGE(TAG, "Failed to connect to explain URL");
encoder_thread_.join();
JpegChunk chunk;
while (xQueueReceive(jpeg_queue, &chunk, portMAX_DELAY) == pdPASS)
{
if (chunk.data != nullptr)
{
heap_caps_free(chunk.data);
}
else
{
break;
}
}
vQueueDelete(jpeg_queue);
throw std::runtime_error("Failed to connect to explain URL");
}
{
std::string question_field;
question_field += "--" + boundary + "\r\n";
question_field += "Content-Disposition: form-data; name=\"question\"\r\n";
question_field += "\r\n";
question_field += question + "\r\n";
http->Write(question_field.c_str(), question_field.size());
}
{
std::string file_header;
file_header += "--" + boundary + "\r\n";
file_header += "Content-Disposition: form-data; name=\"file\"; filename=\"camera.jpg\"\r\n";
file_header += "Content-Type: image/jpeg\r\n";
file_header += "\r\n";
http->Write(file_header.c_str(), file_header.size());
}
size_t total_sent = 0;
bool saw_terminator = false;
while (true)
{
JpegChunk chunk;
if (xQueueReceive(jpeg_queue, &chunk, portMAX_DELAY) != pdPASS)
{
ESP_LOGE(TAG, "Failed to receive JPEG chunk");
break;
}
if (chunk.data == nullptr)
{
saw_terminator = true;
break;
}
http->Write((const char *)chunk.data, chunk.len);
total_sent += chunk.len;
heap_caps_free(chunk.data);
}
encoder_thread_.join();
vQueueDelete(jpeg_queue);
if (!saw_terminator || total_sent == 0)
{
ESP_LOGE(TAG, "JPEG encoder failed or produced empty output");
throw std::runtime_error("Failed to encode image to JPEG");
}
{
std::string multipart_footer;
multipart_footer += "\r\n--" + boundary + "--\r\n";
http->Write(multipart_footer.c_str(), multipart_footer.size());
}
http->Write("", 0);
if (http->GetStatusCode() != 200)
{
ESP_LOGE(TAG, "Failed to upload photo, status code: %d", http->GetStatusCode());
throw std::runtime_error("Failed to upload photo");
}
std::string result = http->ReadAll();
http->Close();
size_t remain_stack_size = uxTaskGetStackHighWaterMark(nullptr);
ESP_LOGI(TAG, "Explain image size=%d bytes, compressed size=%d, remain stack size=%d, question=%s\n%s",
(int)frame_.len, (int)total_sent, (int)remain_stack_size, question.c_str(), result.c_str());
return result;
}
#endif // CONFIG_IDF_TARGET_ESP32S3 && CONFIG_XIAOZHI_USE_ESP_CAMERA

View File

@ -1,53 +0,0 @@
#pragma once
#include "sdkconfig.h"
// esp32s3_camera (使用 esp_camera 组件) 仅用于 ESP32-S3 且选择使用 esp_camera 时
#if defined(CONFIG_IDF_TARGET_ESP32S3) && defined(CONFIG_XIAOZHI_USE_ESP_CAMERA)
#include <lvgl.h>
#include <thread>
#include <memory>
#include <vector>
#include <freertos/FreeRTOS.h>
#include <freertos/queue.h>
#include "camera.h"
#include "esp_camera.h"
struct JpegChunk
{
uint8_t *data;
size_t len;
};
class Esp32S3Camera : public Camera
{
private:
struct FrameBuffer
{
uint8_t *data = nullptr;
size_t len = 0;
uint16_t width = 0;
uint16_t height = 0;
pixformat_t format = PIXFORMAT_RGB565;
} frame_;
bool streaming_on_ = false;
std::string explain_url_;
std::string explain_token_;
std::thread encoder_thread_;
camera_fb_t *current_fb_ = nullptr;
public:
Esp32S3Camera(const camera_config_t &config);
~Esp32S3Camera();
virtual void SetExplainUrl(const std::string &url, const std::string &token) override;
virtual bool Capture() override;
virtual bool SetHMirror(bool enabled) override;
virtual bool SetVFlip(bool enabled) override;
virtual std::string Explain(const std::string &question) override;
};
#endif // CONFIG_IDF_TARGET_ESP32S3 && CONFIG_XIAOZHI_USE_ESP_CAMERA

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,53 @@
#pragma once
#include "sdkconfig.h"
#include <lvgl.h>
#include <thread>
#include <memory>
#include <vector>
#include <freertos/FreeRTOS.h>
#include <freertos/queue.h>
#include "camera.h"
#include "jpg/image_to_jpeg.h"
#include "esp_video_init.h"
struct JpegChunk {
uint8_t* data;
size_t len;
};
class EspVideo : public Camera {
private:
struct FrameBuffer {
uint8_t *data = nullptr;
size_t len = 0;
uint16_t width = 0;
uint16_t height = 0;
v4l2_pix_fmt_t format = 0;
} frame_;
v4l2_pix_fmt_t sensor_format_ = 0;
#ifdef CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE
uint16_t sensor_width_ = 0;
uint16_t sensor_height_ = 0;
#endif // CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE
int video_fd_ = -1;
bool streaming_on_ = false;
struct MmapBuffer { void *start = nullptr; size_t length = 0; };
std::vector<MmapBuffer> mmap_buffers_;
std::string explain_url_;
std::string explain_token_;
std::thread encoder_thread_;
public:
EspVideo(const esp_video_init_config_t& config);
~EspVideo();
virtual void SetExplainUrl(const std::string& url, const std::string& token);
virtual bool Capture();
// 翻转控制函数
virtual bool SetHMirror(bool enabled) override;
virtual bool SetVFlip(bool enabled) override;
virtual std::string Explain(const std::string& question);
};