Files
xiaozhi-esp32/main/boards/common/esp32_camera.cc
Xiaoxia 734b5b410a Support both esp_video and esp32_camera (#1671)
* Update project version to 2.2.1 and refactor camera component handling

- Incremented project version from 2.2.0 to 2.2.1 in CMakeLists.txt.
- Removed legacy esp32_camera component and replaced it with esp_video for ESP32-S3 and ESP32-P4 boards.
- Updated board implementations to utilize the new esp_video component, ensuring compatibility and improved functionality.
- Cleaned up Kconfig options related to camera selection, streamlining the configuration process.
- Enhanced camera initialization logic across various board files to support the new component structure.

* Refactor camera handling in AtomS3R CAM/M12 EchoBase board

- Replaced the legacy EspVideo component with the new Esp32Camera class for improved camera functionality.
- Updated camera initialization logic to utilize a more structured configuration approach, enhancing clarity and maintainability.
- Removed outdated comments and code related to the previous camera implementation in the README file.

* Update camera configuration for atoms3r-cam-m12-echo-base

- Removed outdated camera configuration options from config.json to streamline the setup.
- Retained essential partition table configuration for improved clarity.

* Enhance Esp32Camera functionality and memory management

- Added esp_timer.h for improved timing functionality.
- Streamlined camera initialization by removing redundant frame buffer setup and logging.
- Improved memory allocation for JPEG encoding and added error handling for unsupported pixel formats.
- Updated comments for clarity and consistency, ensuring better understanding of the code flow.
2026-01-20 22:44:37 +08:00

289 lines
9.8 KiB
C++

#include "sdkconfig.h"
#include <esp_heap_caps.h>
#include <cstdio>
#include <cstring>
#include <esp_log.h>
#include <img_converters.h>
#include "esp32_camera.h"
#include "board.h"
#include "display.h"
#include "lvgl_display.h"
#include "mcp_server.h"
#include "system_info.h"
#include "jpg/image_to_jpeg.h"
#include "esp_timer.h"
#define TAG "Esp32Camera"
Esp32Camera::Esp32Camera(const camera_config_t &config) {
esp_err_t err = esp_camera_init(&config);
if (err != ESP_OK) {
ESP_LOGE(TAG, "esp_camera_init failed with error 0x%x", err);
return;
}
sensor_t *s = esp_camera_sensor_get();
if (s) {
if (s->id.PID == GC0308_PID) {
s->set_hmirror(s, 0); // Control camera mirror: 1 for mirror, 0 for normal
}
ESP_LOGI(TAG, "Camera initialized: format=%d", config.pixel_format);
}
streaming_on_ = true;
}
Esp32Camera::~Esp32Camera() {
if (streaming_on_) {
if (current_fb_) {
esp_camera_fb_return(current_fb_);
current_fb_ = nullptr;
}
esp_camera_deinit();
streaming_on_ = false;
}
}
void Esp32Camera::SetExplainUrl(const std::string &url, const std::string &token) {
explain_url_ = url;
explain_token_ = token;
}
bool Esp32Camera::Capture() {
if (encoder_thread_.joinable()) {
encoder_thread_.join();
}
if (!streaming_on_) {
return false;
}
// Get the latest frame, discard old frames for real-time performance
for (int i = 0; i < 2; i++) {
if (current_fb_) {
esp_camera_fb_return(current_fb_);
}
current_fb_ = esp_camera_fb_get();
if (!current_fb_) {
ESP_LOGE(TAG, "Camera capture failed");
return false;
}
}
// Perform byte swapping for RGB565 format and prepare preview image
if (current_fb_->format == PIXFORMAT_RGB565) {
size_t pixel_count = current_fb_->width * current_fb_->height;
size_t data_size = pixel_count * 2;
uint8_t *preview_data = (uint8_t *)heap_caps_malloc(data_size, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
if (preview_data == nullptr) {
ESP_LOGE(TAG, "Failed to allocate memory for preview image");
return false;
}
uint16_t *src = (uint16_t *)current_fb_->buf;
uint16_t *dst = (uint16_t *)preview_data;
for (size_t i = 0; i < pixel_count; i++) {
// Copy data from driver buffer to preview buffer with byte swapping
dst[i] = __builtin_bswap16(src[i]);
}
// Display preview image
auto display = dynamic_cast<LvglDisplay *>(Board::GetInstance().GetDisplay());
if (display != nullptr) {
display->SetPreviewImage(std::make_unique<LvglAllocatedImage>(preview_data, data_size, current_fb_->width, current_fb_->height, current_fb_->width * 2, LV_COLOR_FORMAT_RGB565));
} else {
heap_caps_free(preview_data);
}
} else if (current_fb_->format == PIXFORMAT_JPEG) {
// JPEG format preview usually requires decoding, skip preview display for now, just log
ESP_LOGW(TAG, "JPEG capture success, len=%zu, but not supported for preview", current_fb_->len);
}
ESP_LOGI(TAG, "Captured frame: %dx%d, len=%zu, format=%d",
current_fb_->width, current_fb_->height, current_fb_->len, current_fb_->format);
return true;
}
bool Esp32Camera::SetHMirror(bool enabled) {
sensor_t *s = esp_camera_sensor_get();
if (!s) {
return false;
}
s->set_hmirror(s, enabled ? 1 : 0);
return true;
}
bool Esp32Camera::SetVFlip(bool enabled) {
sensor_t *s = esp_camera_sensor_get();
if (!s) {
return false;
}
s->set_vflip(s, enabled ? 1 : 0);
return true;
}
std::string Esp32Camera::Explain(const std::string &question) {
if (explain_url_.empty()) {
throw std::runtime_error("Image explain URL or token is not set");
}
if (current_fb_ == nullptr) {
throw std::runtime_error("No camera frame captured");
}
// Create local JPEG queue
QueueHandle_t jpeg_queue = xQueueCreate(40, sizeof(JpegChunk));
if (jpeg_queue == nullptr) {
ESP_LOGE(TAG, "Failed to create JPEG queue");
throw std::runtime_error("Failed to create JPEG queue");
}
// Start encoding thread
encoder_thread_ = std::thread([this, jpeg_queue]() {
int64_t start_time = esp_timer_get_time();
uint16_t w = current_fb_->width;
uint16_t h = current_fb_->height;
v4l2_pix_fmt_t enc_fmt;
switch (current_fb_->format) {
case PIXFORMAT_RGB565:
enc_fmt = V4L2_PIX_FMT_RGB565;
break;
case PIXFORMAT_YUV422:
enc_fmt = V4L2_PIX_FMT_YUYV; // YUV422 is actually YUYV format
break;
case PIXFORMAT_YUV420:
enc_fmt = V4L2_PIX_FMT_YUV420;
break;
case PIXFORMAT_GRAYSCALE:
enc_fmt = V4L2_PIX_FMT_GREY;
break;
case PIXFORMAT_JPEG:
enc_fmt = V4L2_PIX_FMT_JPEG;
break;
case PIXFORMAT_RGB888:
enc_fmt = V4L2_PIX_FMT_RGB24;
break;
default:
ESP_LOGE(TAG, "Unsupported pixel format: %d", current_fb_->format);
return;
}
bool ok = image_to_jpeg_cb(current_fb_->buf, current_fb_->len, w, h, enc_fmt, 80,
[](void* arg, size_t index, const void* data, size_t len) -> size_t {
auto jpeg_queue = static_cast<QueueHandle_t>(arg);
JpegChunk chunk = {.data = nullptr, .len = len};
if (index == 0 && data != nullptr && len > 0) {
chunk.data = (uint8_t*)heap_caps_aligned_alloc(16, len, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
if (chunk.data == nullptr) {
ESP_LOGE(TAG, "Failed to allocate %zu bytes for JPEG chunk", len);
chunk.len = 0;
} else {
memcpy(chunk.data, data, len);
}
} else {
chunk.len = 0; // Sentinel or error
}
xQueueSend(jpeg_queue, &chunk, portMAX_DELAY);
return len;
}, jpeg_queue);
if (!ok) {
JpegChunk chunk = {.data = nullptr, .len = 0};
xQueueSend(jpeg_queue, &chunk, portMAX_DELAY);
}
int64_t end_time = esp_timer_get_time();
ESP_LOGI(TAG, "JPEG encoding time: %ld ms", int((end_time - start_time) / 1000));
});
auto network = Board::GetInstance().GetNetwork();
auto http = network->CreateHttp(3);
std::string boundary = "----ESP32_CAMERA_BOUNDARY";
http->SetHeader("Device-Id", SystemInfo::GetMacAddress().c_str());
http->SetHeader("Client-Id", Board::GetInstance().GetUuid().c_str());
if (!explain_token_.empty()) {
http->SetHeader("Authorization", "Bearer " + explain_token_);
}
http->SetHeader("Content-Type", "multipart/form-data; boundary=" + boundary);
http->SetHeader("Transfer-Encoding", "chunked");
if (!http->Open("POST", explain_url_)) {
ESP_LOGE(TAG, "Failed to connect to explain URL");
encoder_thread_.join();
JpegChunk chunk;
while (xQueueReceive(jpeg_queue, &chunk, portMAX_DELAY) == pdPASS) {
if (chunk.data != nullptr) {
heap_caps_free(chunk.data);
} else {
break;
}
}
vQueueDelete(jpeg_queue);
throw std::runtime_error("Failed to connect to explain URL");
}
{
std::string question_field;
question_field += "--" + boundary + "\r\n";
question_field += "Content-Disposition: form-data; name=\"question\"\r\n";
question_field += "\r\n";
question_field += question + "\r\n";
http->Write(question_field.c_str(), question_field.size());
}
{
std::string file_header;
file_header += "--" + boundary + "\r\n";
file_header += "Content-Disposition: form-data; name=\"file\"; filename=\"camera.jpg\"\r\n";
file_header += "Content-Type: image/jpeg\r\n";
file_header += "\r\n";
http->Write(file_header.c_str(), file_header.size());
}
size_t total_sent = 0;
bool saw_terminator = false;
while (true) {
JpegChunk chunk;
if (xQueueReceive(jpeg_queue, &chunk, portMAX_DELAY) != pdPASS) {
ESP_LOGE(TAG, "Failed to receive JPEG chunk");
break;
}
if (chunk.data == nullptr) {
saw_terminator = true;
break;
}
http->Write((const char *)chunk.data, chunk.len);
total_sent += chunk.len;
heap_caps_free(chunk.data);
}
encoder_thread_.join();
vQueueDelete(jpeg_queue);
if (!saw_terminator || total_sent == 0) {
ESP_LOGE(TAG, "JPEG encoder failed or produced empty output");
throw std::runtime_error("Failed to encode image to JPEG");
}
{
std::string multipart_footer;
multipart_footer += "\r\n--" + boundary + "--\r\n";
http->Write(multipart_footer.c_str(), multipart_footer.size());
}
http->Write("", 0);
if (http->GetStatusCode() != 200) {
ESP_LOGE(TAG, "Failed to upload photo, status code: %d", http->GetStatusCode());
throw std::runtime_error("Failed to upload photo");
}
std::string result = http->ReadAll();
http->Close();
size_t remain_stack_size = uxTaskGetStackHighWaterMark(nullptr);
ESP_LOGI(TAG, "Explain image size=%dx%d, compressed size=%d, remain stack size=%d, question=%s\n%s",
current_fb_->width, current_fb_->height, (int)total_sent, (int)remain_stack_size, question.c_str(), result.c_str());
return result;
}