Support both esp_video and esp32_camera (#1671)

* Update project version to 2.2.1 and refactor camera component handling - Incremented project version from 2.2.0 to 2.2.1 in CMakeLists.txt. - Removed legacy esp32_camera component and replaced it with esp_video for ESP32-S3 and ESP32-P4 boards. - Updated board implementations to utilize the new esp_video component, ensuring compatibility and improved functionality. - Cleaned up Kconfig options related to camera selection, streamlining the configuration process. - Enhanced camera initialization logic across various board files to support the new component structure. * Refactor camera handling in AtomS3R CAM/M12 EchoBase board - Replaced the legacy EspVideo component with the new Esp32Camera class for improved camera functionality. - Updated camera initialization logic to utilize a more structured configuration approach, enhancing clarity and maintainability. - Removed outdated comments and code related to the previous camera implementation in the README file. * Update camera configuration for atoms3r-cam-m12-echo-base - Removed outdated camera configuration options from config.json to streamline the setup. - Retained essential partition table configuration for improved clarity. * Enhance Esp32Camera functionality and memory management - Added esp_timer.h for improved timing functionality. - Streamlined camera initialization by removing redundant frame buffer setup and logging. - Improved memory allocation for JPEG encoding and added error handling for unsupported pixel formats. - Updated comments for clarity and consistency, ensuring better understanding of the code flow.
2026-01-20 22:44:37 +08:00
parent d5ec8f7081
commit 734b5b410a
41 changed files with 1348 additions and 1581 deletions
--- a/main/boards/common/esp32_camera.cc
+++ b/main/boards/common/esp32_camera.cc
--- a/main/boards/common/esp32_camera.h
+++ b/main/boards/common/esp32_camera.h
@ -1,7 +1,5 @@
 #pragma once
 #include "sdkconfig.h"
-// esp32_camera (使用 esp_video 组件) 用于 ESP32-P4，或 ESP32-S3 选择使用 esp_video 时
-#if defined(CONFIG_IDF_TARGET_ESP32P4) || (defined(CONFIG_IDF_TARGET_ESP32S3) && defined(CONFIG_XIAOZHI_USE_ESP_VIDEO))

 #include <lvgl.h>
 #include <thread>
@ -12,46 +10,31 @@
 #include <freertos/queue.h>

 #include "camera.h"
+#include "esp_camera.h"
 #include "jpg/image_to_jpeg.h"
-#include "esp_video_init.h"

-struct JpegChunk {
-    uint8_t* data;
+struct JpegChunk
+{
+    uint8_t *data;
    size_t len;
 };

-class Esp32Camera : public Camera {
+class Esp32Camera : public Camera
+{
 private:
-    struct FrameBuffer {
-        uint8_t *data = nullptr;
-        size_t len = 0;
-        uint16_t width = 0;
-        uint16_t height = 0;
-        v4l2_pix_fmt_t format = 0;
-    } frame_;
-    v4l2_pix_fmt_t sensor_format_ = 0;
-#ifdef CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE
-    uint16_t sensor_width_ = 0;
-    uint16_t sensor_height_ = 0;
-#endif  // CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE
-    int video_fd_ = -1;
    bool streaming_on_ = false;
-    struct MmapBuffer { void *start = nullptr; size_t length = 0; };
-    std::vector<MmapBuffer> mmap_buffers_;
    std::string explain_url_;
    std::string explain_token_;
    std::thread encoder_thread_;
+    camera_fb_t *current_fb_ = nullptr;

 public:
-    Esp32Camera(const esp_video_init_config_t& config);
+    Esp32Camera(const camera_config_t &config);
    ~Esp32Camera();

-    virtual void SetExplainUrl(const std::string& url, const std::string& token);
-    virtual bool Capture();
-    // 翻转控制函数
+    virtual void SetExplainUrl(const std::string &url, const std::string &token) override;
+    virtual bool Capture() override;
    virtual bool SetHMirror(bool enabled) override;
    virtual bool SetVFlip(bool enabled) override;
-    virtual std::string Explain(const std::string& question);
+    virtual std::string Explain(const std::string &question) override;
 };
-
-#endif // ndef CONFIG_IDF_TARGET_ESP32
--- a/main/boards/common/esp32s3_camera.cc
+++ b/main/boards/common/esp32s3_camera.cc
@ -1,413 +0,0 @@
-#include "sdkconfig.h"
-
-// esp32s3_camera (使用 esp_camera 组件) 仅用于 ESP32-S3 且选择使用 esp_camera 时
-#if defined(CONFIG_IDF_TARGET_ESP32S3) && defined(CONFIG_XIAOZHI_USE_ESP_CAMERA)
-
-#include <esp_heap_caps.h>
-#include <cstdio>
-#include <cstring>
-#include <esp_log.h>
-
-#include "esp32s3_camera.h"
-#include "board.h"
-#include "display.h"
-#include "lvgl_display.h"
-#include "mcp_server.h"
-#include "system_info.h"
-#include "jpg/image_to_jpeg.h"
-
-#define TAG "Esp32S3Camera"
-
-// V4L2 兼容的格式定义
-#define V4L2_PIX_FMT_RGB565 0x50424752 // 'RGBP'
-#define V4L2_PIX_FMT_YUYV 0x56595559   // 'YUYV'
-#define V4L2_PIX_FMT_JPEG 0x4745504A   // 'JPEG'
-#define V4L2_PIX_FMT_RGB24 0x33424752  // 'RGB3'
-#define V4L2_PIX_FMT_GREY 0x59455247   // 'GREY'
-
-static uint32_t pixformat_to_v4l2(pixformat_t fmt)
-{
-    switch (fmt)
-    {
-    case PIXFORMAT_RGB565:
-        return V4L2_PIX_FMT_RGB565;
-    case PIXFORMAT_YUV422:
-        return V4L2_PIX_FMT_YUYV;
-    case PIXFORMAT_JPEG:
-        return V4L2_PIX_FMT_JPEG;
-    case PIXFORMAT_RGB888:
-        return V4L2_PIX_FMT_RGB24;
-    case PIXFORMAT_GRAYSCALE:
-        return V4L2_PIX_FMT_GREY;
-    default:
-        return 0;
-    }
-}
-
-Esp32S3Camera::Esp32S3Camera(const camera_config_t &config)
-{
-    esp_err_t err = esp_camera_init(&config);
-    if (err != ESP_OK)
-    {
-        ESP_LOGE(TAG, "esp_camera_init failed with error 0x%x", err);
-        return;
-    }
-
-    sensor_t *s = esp_camera_sensor_get();
-    if (s)
-    {
-        frame_.width = config.frame_size == FRAMESIZE_QVGA ? 320 : config.frame_size == FRAMESIZE_VGA ? 640
-                                                               : config.frame_size == FRAMESIZE_SVGA  ? 800
-                                                               : config.frame_size == FRAMESIZE_XGA   ? 1024
-                                                               : config.frame_size == FRAMESIZE_HD    ? 1280
-                                                               : config.frame_size == FRAMESIZE_SXGA  ? 1280
-                                                               : config.frame_size == FRAMESIZE_UXGA  ? 1600
-                                                                                                      : 320;
-        frame_.height = config.frame_size == FRAMESIZE_QVGA ? 240 : config.frame_size == FRAMESIZE_VGA ? 480
-                                                                : config.frame_size == FRAMESIZE_SVGA  ? 600
-                                                                : config.frame_size == FRAMESIZE_XGA   ? 768
-                                                                : config.frame_size == FRAMESIZE_HD    ? 720
-                                                                : config.frame_size == FRAMESIZE_SXGA  ? 1024
-                                                                : config.frame_size == FRAMESIZE_UXGA  ? 1200
-                                                                                                       : 240;
-        frame_.format = config.pixel_format;
-        ESP_LOGI(TAG, "Camera initialized: %dx%d, format=%d", frame_.width, frame_.height, config.pixel_format);
-    }
-
-    streaming_on_ = true;
-    ESP_LOGI(TAG, "ESP32-S3 Camera init success");
-}
-
-Esp32S3Camera::~Esp32S3Camera()
-{
-    if (streaming_on_)
-    {
-        if (current_fb_)
-        {
-            esp_camera_fb_return(current_fb_);
-            current_fb_ = nullptr;
-        }
-        esp_camera_deinit();
-        streaming_on_ = false;
-    }
-    if (frame_.data)
-    {
-        heap_caps_free(frame_.data);
-        frame_.data = nullptr;
-    }
-}
-
-void Esp32S3Camera::SetExplainUrl(const std::string &url, const std::string &token)
-{
-    explain_url_ = url;
-    explain_token_ = token;
-}
-
-bool Esp32S3Camera::Capture()
-{
-    if (encoder_thread_.joinable())
-    {
-        encoder_thread_.join();
-    }
-
-    if (!streaming_on_)
-    {
-        return false;
-    }
-
-    // 释放之前的帧
-    if (current_fb_)
-    {
-        esp_camera_fb_return(current_fb_);
-        current_fb_ = nullptr;
-    }
-
-    // 丢弃前两帧，获取最新帧
-    for (int i = 0; i < 3; i++)
-    {
-        camera_fb_t *fb = esp_camera_fb_get();
-        if (!fb)
-        {
-            ESP_LOGE(TAG, "Camera capture failed");
-            return false;
-        }
-        if (i < 2)
-        {
-            esp_camera_fb_return(fb);
-        }
-        else
-        {
-            current_fb_ = fb;
-        }
-    }
-
-    if (!current_fb_)
-    {
-        ESP_LOGE(TAG, "Failed to get frame buffer");
-        return false;
-    }
-
-    // 保存帧副本到 PSRAM
-    if (frame_.data)
-    {
-        heap_caps_free(frame_.data);
-        frame_.data = nullptr;
-    }
-
-    frame_.len = current_fb_->len;
-    frame_.width = current_fb_->width;
-    frame_.height = current_fb_->height;
-    frame_.format = current_fb_->format;
-
-    frame_.data = (uint8_t *)heap_caps_malloc(frame_.len, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
-    if (!frame_.data)
-    {
-        ESP_LOGE(TAG, "Failed to allocate %zu bytes for frame copy", frame_.len);
-        esp_camera_fb_return(current_fb_);
-        current_fb_ = nullptr;
-        return false;
-    }
-    memcpy(frame_.data, current_fb_->buf, frame_.len);
-
-    // 释放原始帧
-    esp_camera_fb_return(current_fb_);
-    current_fb_ = nullptr;
-
-    // 对 RGB565 格式进行字节交换 (Big Endian <-> Little Endian)
-    // 这样 frame_.data 就是已交换的数据，显示和上传都使用相同的数据
-    if (frame_.format == PIXFORMAT_RGB565)
-    {
-        uint8_t *data = frame_.data;
-        size_t pixel_count = frame_.width * frame_.height;
-        for (size_t i = 0; i < pixel_count; i++)
-        {
-            uint8_t temp = data[2 * i];
-            data[2 * i] = data[2 * i + 1];
-            data[2 * i + 1] = temp;
-        }
-    }
-
-    ESP_LOGD(TAG, "Captured frame: %dx%d, len=%zu, format=%d",
-             frame_.width, frame_.height, frame_.len, frame_.format);
-
-    // 显示预览图片
-    auto display = dynamic_cast<LvglDisplay *>(Board::GetInstance().GetDisplay());
-    if (display != nullptr)
-    {
-        if (!frame_.data)
-        {
-            ESP_LOGE(TAG, "frame.data is null");
-            return false;
-        }
-
-        uint16_t w = frame_.width;
-        uint16_t h = frame_.height;
-        size_t lvgl_image_size = frame_.len;
-        size_t stride = ((w * 2) + 3) & ~3; // 4字节对齐
-        lv_color_format_t color_format = LV_COLOR_FORMAT_RGB565;
-        uint8_t *data = nullptr;
-
-        switch (frame_.format)
-        {
-        case PIXFORMAT_RGB565:
-            // frame_.data 已经在捕获阶段完成了字节交换，直接复制即可
-            data = (uint8_t *)heap_caps_malloc(w * h * 2, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
-            if (data == nullptr)
-            {
-                ESP_LOGE(TAG, "Failed to allocate memory for preview image");
-                return false;
-            }
-            memcpy(data, frame_.data, frame_.len);
-            lvgl_image_size = frame_.len;
-            break;
-
-        case PIXFORMAT_JPEG:
-            // JPEG 格式需要解码 - 跳过预览显示
-            ESP_LOGD(TAG, "JPEG format preview not supported, skipping display");
-            return true;
-
-        default:
-            ESP_LOGE(TAG, "Unsupported frame format for preview: %d", frame_.format);
-            return true; // 仍然返回 true，因为捕获成功
-        }
-
-        if (data)
-        {
-            auto image = std::make_unique<LvglAllocatedImage>(data, lvgl_image_size, w, h, stride, color_format);
-            display->SetPreviewImage(std::move(image));
-        }
-    }
-    return true;
-}
-
-bool Esp32S3Camera::SetHMirror(bool enabled)
-{
-    sensor_t *s = esp_camera_sensor_get();
-    if (!s)
-    {
-        return false;
-    }
-    s->set_hmirror(s, enabled ? 1 : 0);
-    return true;
-}
-
-bool Esp32S3Camera::SetVFlip(bool enabled)
-{
-    sensor_t *s = esp_camera_sensor_get();
-    if (!s)
-    {
-        return false;
-    }
-    s->set_vflip(s, enabled ? 1 : 0);
-    return true;
-}
-
-std::string Esp32S3Camera::Explain(const std::string &question)
-{
-    if (explain_url_.empty())
-    {
-        throw std::runtime_error("Image explain URL or token is not set");
-    }
-
-    // 创建局部的 JPEG 队列
-    QueueHandle_t jpeg_queue = xQueueCreate(40, sizeof(JpegChunk));
-    if (jpeg_queue == nullptr)
-    {
-        ESP_LOGE(TAG, "Failed to create JPEG queue");
-        throw std::runtime_error("Failed to create JPEG queue");
-    }
-
-    // 转换格式为 v4l2 兼容格式
-    uint32_t v4l2_format = pixformat_to_v4l2(frame_.format);
-
-    // 启动编码线程
-    encoder_thread_ = std::thread([this, jpeg_queue, v4l2_format]()
-                                  {
-        uint16_t w = frame_.width ? frame_.width : 320;
-        uint16_t h = frame_.height ? frame_.height : 240;
-        bool ok = image_to_jpeg_cb(
-            frame_.data, frame_.len, w, h, static_cast<v4l2_pix_fmt_t>(v4l2_format), 80,
-            [](void* arg, size_t index, const void* data, size_t len) -> size_t {
-                auto jpeg_queue = static_cast<QueueHandle_t>(arg);
-                JpegChunk chunk = {.data = nullptr, .len = len};
-                if (index == 0 && data != nullptr && len > 0) {
-                    chunk.data = (uint8_t*)heap_caps_aligned_alloc(16, len, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
-                    if (chunk.data == nullptr) {
-                        ESP_LOGE(TAG, "Failed to allocate %zu bytes for JPEG chunk", len);
-                        chunk.len = 0;
-                    } else {
-                        memcpy(chunk.data, data, len);
-                    }
-                } else {
-                    chunk.len = 0;
-                }
-                xQueueSend(jpeg_queue, &chunk, portMAX_DELAY);
-                return len;
-            },
-            jpeg_queue);
-
-        if (!ok) {
-            JpegChunk chunk = {.data = nullptr, .len = 0};
-            xQueueSend(jpeg_queue, &chunk, portMAX_DELAY);
-        } });
-
-    auto network = Board::GetInstance().GetNetwork();
-    auto http = network->CreateHttp(3);
-    std::string boundary = "----ESP32_CAMERA_BOUNDARY";
-
-    http->SetHeader("Device-Id", SystemInfo::GetMacAddress().c_str());
-    http->SetHeader("Client-Id", Board::GetInstance().GetUuid().c_str());
-    if (!explain_token_.empty())
-    {
-        http->SetHeader("Authorization", "Bearer " + explain_token_);
-    }
-    http->SetHeader("Content-Type", "multipart/form-data; boundary=" + boundary);
-    http->SetHeader("Transfer-Encoding", "chunked");
-    if (!http->Open("POST", explain_url_))
-    {
-        ESP_LOGE(TAG, "Failed to connect to explain URL");
-        encoder_thread_.join();
-        JpegChunk chunk;
-        while (xQueueReceive(jpeg_queue, &chunk, portMAX_DELAY) == pdPASS)
-        {
-            if (chunk.data != nullptr)
-            {
-                heap_caps_free(chunk.data);
-            }
-            else
-            {
-                break;
-            }
-        }
-        vQueueDelete(jpeg_queue);
-        throw std::runtime_error("Failed to connect to explain URL");
-    }
-
-    {
-        std::string question_field;
-        question_field += "--" + boundary + "\r\n";
-        question_field += "Content-Disposition: form-data; name=\"question\"\r\n";
-        question_field += "\r\n";
-        question_field += question + "\r\n";
-        http->Write(question_field.c_str(), question_field.size());
-    }
-    {
-        std::string file_header;
-        file_header += "--" + boundary + "\r\n";
-        file_header += "Content-Disposition: form-data; name=\"file\"; filename=\"camera.jpg\"\r\n";
-        file_header += "Content-Type: image/jpeg\r\n";
-        file_header += "\r\n";
-        http->Write(file_header.c_str(), file_header.size());
-    }
-
-    size_t total_sent = 0;
-    bool saw_terminator = false;
-    while (true)
-    {
-        JpegChunk chunk;
-        if (xQueueReceive(jpeg_queue, &chunk, portMAX_DELAY) != pdPASS)
-        {
-            ESP_LOGE(TAG, "Failed to receive JPEG chunk");
-            break;
-        }
-        if (chunk.data == nullptr)
-        {
-            saw_terminator = true;
-            break;
-        }
-        http->Write((const char *)chunk.data, chunk.len);
-        total_sent += chunk.len;
-        heap_caps_free(chunk.data);
-    }
-    encoder_thread_.join();
-    vQueueDelete(jpeg_queue);
-
-    if (!saw_terminator || total_sent == 0)
-    {
-        ESP_LOGE(TAG, "JPEG encoder failed or produced empty output");
-        throw std::runtime_error("Failed to encode image to JPEG");
-    }
-
-    {
-        std::string multipart_footer;
-        multipart_footer += "\r\n--" + boundary + "--\r\n";
-        http->Write(multipart_footer.c_str(), multipart_footer.size());
-    }
-    http->Write("", 0);
-
-    if (http->GetStatusCode() != 200)
-    {
-        ESP_LOGE(TAG, "Failed to upload photo, status code: %d", http->GetStatusCode());
-        throw std::runtime_error("Failed to upload photo");
-    }
-
-    std::string result = http->ReadAll();
-    http->Close();
-
-    size_t remain_stack_size = uxTaskGetStackHighWaterMark(nullptr);
-    ESP_LOGI(TAG, "Explain image size=%d bytes, compressed size=%d, remain stack size=%d, question=%s\n%s",
-             (int)frame_.len, (int)total_sent, (int)remain_stack_size, question.c_str(), result.c_str());
-    return result;
-}
-
-#endif // CONFIG_IDF_TARGET_ESP32S3 && CONFIG_XIAOZHI_USE_ESP_CAMERA
--- a/main/boards/common/esp32s3_camera.h
+++ b/main/boards/common/esp32s3_camera.h
@ -1,53 +0,0 @@
-#pragma once
-#include "sdkconfig.h"
-
-// esp32s3_camera (使用 esp_camera 组件) 仅用于 ESP32-S3 且选择使用 esp_camera 时
-#if defined(CONFIG_IDF_TARGET_ESP32S3) && defined(CONFIG_XIAOZHI_USE_ESP_CAMERA)
-
-#include <lvgl.h>
-#include <thread>
-#include <memory>
-#include <vector>
-
-#include <freertos/FreeRTOS.h>
-#include <freertos/queue.h>
-
-#include "camera.h"
-#include "esp_camera.h"
-
-struct JpegChunk
-{
-    uint8_t *data;
-    size_t len;
-};
-
-class Esp32S3Camera : public Camera
-{
-private:
-    struct FrameBuffer
-    {
-        uint8_t *data = nullptr;
-        size_t len = 0;
-        uint16_t width = 0;
-        uint16_t height = 0;
-        pixformat_t format = PIXFORMAT_RGB565;
-    } frame_;
-
-    bool streaming_on_ = false;
-    std::string explain_url_;
-    std::string explain_token_;
-    std::thread encoder_thread_;
-    camera_fb_t *current_fb_ = nullptr;
-
-public:
-    Esp32S3Camera(const camera_config_t &config);
-    ~Esp32S3Camera();
-
-    virtual void SetExplainUrl(const std::string &url, const std::string &token) override;
-    virtual bool Capture() override;
-    virtual bool SetHMirror(bool enabled) override;
-    virtual bool SetVFlip(bool enabled) override;
-    virtual std::string Explain(const std::string &question) override;
-};
-
-#endif // CONFIG_IDF_TARGET_ESP32S3 && CONFIG_XIAOZHI_USE_ESP_CAMERA
--- a/main/boards/common/esp_video.cc
+++ b/main/boards/common/esp_video.cc
--- a/main/boards/common/esp_video.h
+++ b/main/boards/common/esp_video.h
@ -0,0 +1,53 @@
+#pragma once
+#include "sdkconfig.h"
+
+#include <lvgl.h>
+#include <thread>
+#include <memory>
+#include <vector>
+
+#include <freertos/FreeRTOS.h>
+#include <freertos/queue.h>
+
+#include "camera.h"
+#include "jpg/image_to_jpeg.h"
+#include "esp_video_init.h"
+
+struct JpegChunk {
+    uint8_t* data;
+    size_t len;
+};
+
+class EspVideo : public Camera {
+private:
+    struct FrameBuffer {
+        uint8_t *data = nullptr;
+        size_t len = 0;
+        uint16_t width = 0;
+        uint16_t height = 0;
+        v4l2_pix_fmt_t format = 0;
+    } frame_;
+    v4l2_pix_fmt_t sensor_format_ = 0;
+#ifdef CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE
+    uint16_t sensor_width_ = 0;
+    uint16_t sensor_height_ = 0;
+#endif  // CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE
+    int video_fd_ = -1;
+    bool streaming_on_ = false;
+    struct MmapBuffer { void *start = nullptr; size_t length = 0; };
+    std::vector<MmapBuffer> mmap_buffers_;
+    std::string explain_url_;
+    std::string explain_token_;
+    std::thread encoder_thread_;
+
+public:
+    EspVideo(const esp_video_init_config_t& config);
+    ~EspVideo();
+
+    virtual void SetExplainUrl(const std::string& url, const std::string& token);
+    virtual bool Capture();
+    // 翻转控制函数
+    virtual bool SetHMirror(bool enabled) override;
+    virtual bool SetVFlip(bool enabled) override;
+    virtual std::string Explain(const std::string& question);
+};