Refactor: Use esp_video component (#1245)
* refactor: migrate camera module to esp-video library * refactor: migrate boards to esp-video API (1/2) * refactor: migrate boards to esp-video API (2/2) * fix: use ESP-IDF 5.5 * refactor: migrate the JPEG encoder to `esp_new_jpeg` * feat: add YUV422 support * feat: improve pixelformat and device selection process * feat: use ESP32-P4 Hardware JPEG Encoder
This commit is contained in:
@ -1,37 +1,307 @@
|
||||
#include "esp32_camera.h"
|
||||
#include "mcp_server.h"
|
||||
#include "display.h"
|
||||
#include <fcntl.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/param.h>
|
||||
#include <unistd.h>
|
||||
#include "board.h"
|
||||
#include "system_info.h"
|
||||
#include "lvgl_display.h"
|
||||
#include "display.h"
|
||||
#include "esp_video_device.h"
|
||||
#include "esp_video_init.h"
|
||||
#include "jpg/image_to_jpeg.h"
|
||||
#include "linux/videodev2.h"
|
||||
#include "lvgl_display.h"
|
||||
#include "mcp_server.h"
|
||||
#include "system_info.h"
|
||||
|
||||
#include <esp_log.h>
|
||||
#ifdef CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE
|
||||
#undef LOG_LOCAL_LEVEL
|
||||
#define LOG_LOCAL_LEVEL MAX(CONFIG_LOG_DEFAULT_LEVEL, ESP_LOG_DEBUG)
|
||||
#endif // CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE
|
||||
|
||||
#include <errno.h>
|
||||
#include <esp_heap_caps.h>
|
||||
#include <esp_log.h>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
|
||||
#define TAG "Esp32Camera"
|
||||
|
||||
Esp32Camera::Esp32Camera(const camera_config_t& config) {
|
||||
// camera init
|
||||
esp_err_t err = esp_camera_init(&config); // 配置上面定义的参数
|
||||
if (err != ESP_OK) {
|
||||
ESP_LOGE(TAG, "Camera init failed with error 0x%x", err);
|
||||
#if defined(CONFIG_CAMERA_SENSOR_SWAP_PIXEL_BYTE_ORDER) || defined(CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP)
|
||||
#warning "CAMERA_SENSOR_SWAP_PIXEL_BYTE_ORDER or CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP is enabled, which may cause image corruption in YUV422 format!"
|
||||
#endif
|
||||
|
||||
#if CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE
|
||||
#define CAM_PRINT_FOURCC(pixelformat) \
|
||||
char fourcc[5]; \
|
||||
fourcc[0] = pixelformat & 0xFF; \
|
||||
fourcc[1] = (pixelformat >> 8) & 0xFF; \
|
||||
fourcc[2] = (pixelformat >> 16) & 0xFF; \
|
||||
fourcc[3] = (pixelformat >> 24) & 0xFF; \
|
||||
fourcc[4] = '\0'; \
|
||||
ESP_LOGD(TAG, "FOURCC: '%c%c%c%c'", fourcc[0], fourcc[1], fourcc[2], fourcc[3]);
|
||||
|
||||
static void log_available_video_devices() {
|
||||
for (int i = 0; i < 50; i++) {
|
||||
char path[16];
|
||||
snprintf(path, sizeof(path), "/dev/video%d", i);
|
||||
int fd = open(path, O_RDONLY);
|
||||
if (fd >= 0) {
|
||||
ESP_LOGD(TAG, "found video device: %s", path);
|
||||
close(fd);
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
#define CAM_PRINT_FOURCC(pixelformat) (void)0;
|
||||
#endif // CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE
|
||||
|
||||
Esp32Camera::Esp32Camera(const esp_video_init_config_t& config) {
|
||||
if (esp_video_init(&config) != ESP_OK) {
|
||||
ESP_LOGE(TAG, "esp_video_init failed");
|
||||
return;
|
||||
}
|
||||
|
||||
sensor_t *s = esp_camera_sensor_get(); // 获取摄像头型号
|
||||
if (s->id.PID == GC0308_PID) {
|
||||
s->set_hmirror(s, 0); // 这里控制摄像头镜像 写1镜像 写0不镜像
|
||||
#ifdef CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE
|
||||
esp_log_level_set(TAG, ESP_LOG_DEBUG);
|
||||
#endif // CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE
|
||||
|
||||
const char* video_device_name = nullptr;
|
||||
|
||||
if (false) { /* 用于构建 else if */
|
||||
}
|
||||
#if CONFIG_ESP_VIDEO_ENABLE_MIPI_CSI_VIDEO_DEVICE
|
||||
else if (config.csi != nullptr) {
|
||||
video_device_name = ESP_VIDEO_MIPI_CSI_DEVICE_NAME;
|
||||
}
|
||||
#endif
|
||||
#if CONFIG_ESP_VIDEO_ENABLE_DVP_VIDEO_DEVICE
|
||||
else if (config.dvp != nullptr) {
|
||||
video_device_name = ESP_VIDEO_DVP_DEVICE_NAME;
|
||||
}
|
||||
#endif
|
||||
#if CONFIG_ESP_VIDEO_ENABLE_HW_JPEG_VIDEO_DEVICE
|
||||
else if (config.jpeg != nullptr) {
|
||||
video_device_name = ESP_VIDEO_JPEG_DEVICE_NAME;
|
||||
}
|
||||
#endif
|
||||
#if CONFIG_ESP_VIDEO_ENABLE_SPI_VIDEO_DEVICE
|
||||
else if (config.spi != nullptr) {
|
||||
video_device_name = ESP_VIDEO_SPI_DEVICE_NAME;
|
||||
}
|
||||
#endif
|
||||
#if CONFIG_ESP_VIDEO_ENABLE_USB_UVC_VIDEO_DEVICE
|
||||
else if (config.usb_uvc != nullptr) {
|
||||
video_device_name = ESP_VIDEO_USB_UVC_DEVICE_NAME(config.usb_uvc->uvc.uvc_dev_num);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (video_device_name == nullptr) {
|
||||
ESP_LOGE(TAG, "no video device is enabled");
|
||||
return;
|
||||
}
|
||||
|
||||
video_fd_ = open(video_device_name, O_RDWR);
|
||||
|
||||
if (video_fd_ < 0) {
|
||||
ESP_LOGE(TAG, "open %s failed, errno=%d(%s)", video_device_name, errno, strerror(errno));
|
||||
#if CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE
|
||||
log_available_video_devices();
|
||||
#endif // CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE
|
||||
return;
|
||||
}
|
||||
|
||||
struct v4l2_capability cap = {};
|
||||
if (ioctl(video_fd_, VIDIOC_QUERYCAP, &cap) != 0) {
|
||||
ESP_LOGE(TAG, "VIDIOC_QUERYCAP failed, errno=%d(%s)", errno, strerror(errno));
|
||||
close(video_fd_);
|
||||
video_fd_ = -1;
|
||||
return;
|
||||
}
|
||||
|
||||
ESP_LOGD(
|
||||
TAG,
|
||||
"VIDIOC_QUERYCAP: driver=%s, card=%s, bus_info=%s, version=0x%08x, capabilities=0x%08x, device_caps=0x%08x",
|
||||
cap.driver, cap.card, cap.bus_info, cap.version, cap.capabilities, cap.device_caps);
|
||||
|
||||
struct v4l2_format format = {};
|
||||
format.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
|
||||
if (ioctl(video_fd_, VIDIOC_G_FMT, &format) != 0) {
|
||||
ESP_LOGE(TAG, "VIDIOC_G_FMT failed, errno=%d(%s)", errno, strerror(errno));
|
||||
close(video_fd_);
|
||||
video_fd_ = -1;
|
||||
return;
|
||||
}
|
||||
ESP_LOGD(TAG, "VIDIOC_G_FMT: pixelformat=0x%08x, width=%d, height=%d", format.fmt.pix.pixelformat,
|
||||
format.fmt.pix.width, format.fmt.pix.height);
|
||||
CAM_PRINT_FOURCC(format.fmt.pix.pixelformat);
|
||||
|
||||
struct v4l2_format setformat = {};
|
||||
setformat.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
|
||||
setformat.fmt.pix.width = format.fmt.pix.width;
|
||||
setformat.fmt.pix.height = format.fmt.pix.height;
|
||||
|
||||
struct v4l2_fmtdesc fmtdesc = {};
|
||||
fmtdesc.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
|
||||
fmtdesc.index = 0;
|
||||
uint32_t best_fmt = 0;
|
||||
int best_rank = 1 << 30; // large number
|
||||
// 优先级: YUV422P > RGB565 > RGB24 > GREY
|
||||
// 注: 当前版本中 YUV422P 实际输出为 YUYV。YUYV 色彩格式在后续的处理中更节省内存空间。
|
||||
auto get_rank = [](uint32_t fmt) -> int {
|
||||
switch (fmt) {
|
||||
case V4L2_PIX_FMT_YUV422P:
|
||||
return 0;
|
||||
case V4L2_PIX_FMT_RGB565:
|
||||
return 1;
|
||||
case V4L2_PIX_FMT_RGB24:
|
||||
return 2;
|
||||
case V4L2_PIX_FMT_GREY:
|
||||
return 3;
|
||||
default:
|
||||
return 1 << 29; // unsupported
|
||||
}
|
||||
};
|
||||
while (ioctl(video_fd_, VIDIOC_ENUM_FMT, &fmtdesc) == 0) {
|
||||
ESP_LOGD(TAG, "VIDIOC_ENUM_FMT: pixelformat=0x%08x, description=%s", fmtdesc.pixelformat, fmtdesc.description);
|
||||
CAM_PRINT_FOURCC(fmtdesc.pixelformat);
|
||||
int rank = get_rank(fmtdesc.pixelformat);
|
||||
if (rank < best_rank) {
|
||||
best_rank = rank;
|
||||
best_fmt = fmtdesc.pixelformat;
|
||||
}
|
||||
fmtdesc.index++;
|
||||
}
|
||||
if (best_rank < (1 << 29)) {
|
||||
setformat.fmt.pix.pixelformat = best_fmt;
|
||||
sensor_format_ = best_fmt;
|
||||
}
|
||||
|
||||
if (!setformat.fmt.pix.pixelformat) {
|
||||
ESP_LOGE(TAG, "no supported pixel format found");
|
||||
close(video_fd_);
|
||||
video_fd_ = -1;
|
||||
sensor_format_ = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
ESP_LOGD(TAG, "selected pixel format: 0x%08x", setformat.fmt.pix.pixelformat);
|
||||
|
||||
if (ioctl(video_fd_, VIDIOC_S_FMT, &setformat) != 0) {
|
||||
ESP_LOGE(TAG, "VIDIOC_S_FMT failed, errno=%d(%s)", errno, strerror(errno));
|
||||
close(video_fd_);
|
||||
video_fd_ = -1;
|
||||
sensor_format_ = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
frame_.width = setformat.fmt.pix.width;
|
||||
frame_.height = setformat.fmt.pix.height;
|
||||
|
||||
// 申请缓冲并mmap
|
||||
struct v4l2_requestbuffers req = {};
|
||||
req.count = strcmp(video_device_name, ESP_VIDEO_MIPI_CSI_DEVICE_NAME) == 0 ? 2 : 1;
|
||||
req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
|
||||
req.memory = V4L2_MEMORY_MMAP;
|
||||
if (ioctl(video_fd_, VIDIOC_REQBUFS, &req) != 0) {
|
||||
ESP_LOGE(TAG, "VIDIOC_REQBUFS failed");
|
||||
close(video_fd_);
|
||||
video_fd_ = -1;
|
||||
sensor_format_ = 0;
|
||||
return;
|
||||
}
|
||||
mmap_buffers_.resize(req.count);
|
||||
for (uint32_t i = 0; i < req.count; i++) {
|
||||
struct v4l2_buffer buf = {};
|
||||
buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
|
||||
buf.memory = V4L2_MEMORY_MMAP;
|
||||
buf.index = i;
|
||||
if (ioctl(video_fd_, VIDIOC_QUERYBUF, &buf) != 0) {
|
||||
ESP_LOGE(TAG, "VIDIOC_QUERYBUF failed");
|
||||
close(video_fd_);
|
||||
video_fd_ = -1;
|
||||
sensor_format_ = 0;
|
||||
return;
|
||||
}
|
||||
void* start = mmap(NULL, buf.length, PROT_READ | PROT_WRITE, MAP_SHARED, video_fd_, buf.m.offset);
|
||||
if (start == MAP_FAILED) {
|
||||
ESP_LOGE(TAG, "mmap failed");
|
||||
close(video_fd_);
|
||||
video_fd_ = -1;
|
||||
sensor_format_ = 0;
|
||||
return;
|
||||
}
|
||||
mmap_buffers_[i].start = start;
|
||||
mmap_buffers_[i].length = buf.length;
|
||||
|
||||
if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) {
|
||||
ESP_LOGE(TAG, "VIDIOC_QBUF failed");
|
||||
close(video_fd_);
|
||||
video_fd_ = -1;
|
||||
sensor_format_ = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
int type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
|
||||
if (ioctl(video_fd_, VIDIOC_STREAMON, &type) != 0) {
|
||||
ESP_LOGE(TAG, "VIDIOC_STREAMON failed");
|
||||
close(video_fd_);
|
||||
video_fd_ = -1;
|
||||
sensor_format_ = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ESP_VIDEO_ENABLE_ISP_VIDEO_DEVICE
|
||||
// 当启用 ISP 时,ISP 需要一些照片来初始化参数,因此开启后后台拍摄5s照片并丢弃
|
||||
xTaskCreate(
|
||||
[](void* arg) {
|
||||
Esp32Camera* self = static_cast<Esp32Camera*>(arg);
|
||||
uint16_t capture_count = 0;
|
||||
TickType_t start = xTaskGetTickCount();
|
||||
TickType_t duration = 5000 / portTICK_PERIOD_MS; // 5s
|
||||
while ((xTaskGetTickCount() - start) < duration) {
|
||||
struct v4l2_buffer buf = {};
|
||||
buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
|
||||
buf.memory = V4L2_MEMORY_MMAP;
|
||||
if (ioctl(self->video_fd_, VIDIOC_DQBUF, &buf) != 0) {
|
||||
ESP_LOGE(TAG, "VIDIOC_DQBUF failed during init");
|
||||
vTaskDelay(10 / portTICK_PERIOD_MS);
|
||||
continue;
|
||||
}
|
||||
if (ioctl(self->video_fd_, VIDIOC_QBUF, &buf) != 0) {
|
||||
ESP_LOGE(TAG, "VIDIOC_QBUF failed during init");
|
||||
}
|
||||
capture_count++;
|
||||
}
|
||||
ESP_LOGI(TAG, "Camera init success, captured %d frames in %dms", capture_count,
|
||||
(xTaskGetTickCount() - start) * portTICK_PERIOD_MS);
|
||||
self->streaming_on_ = true;
|
||||
vTaskDelete(NULL);
|
||||
},
|
||||
"CameraInitTask", 4096, this, 5, nullptr);
|
||||
#else
|
||||
ESP_LOGI(TAG, "Camera init success");
|
||||
streaming_on_ = true;
|
||||
#endif // CONFIG_ESP_VIDEO_ENABLE_ISP_VIDEO_DEVICE
|
||||
}
|
||||
|
||||
Esp32Camera::~Esp32Camera() {
|
||||
if (fb_) {
|
||||
esp_camera_fb_return(fb_);
|
||||
fb_ = nullptr;
|
||||
if (streaming_on_ && video_fd_ >= 0) {
|
||||
int type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
|
||||
ioctl(video_fd_, VIDIOC_STREAMOFF, &type);
|
||||
}
|
||||
esp_camera_deinit();
|
||||
for (auto& b : mmap_buffers_) {
|
||||
if (b.start && b.length) {
|
||||
munmap(b.start, b.length);
|
||||
}
|
||||
}
|
||||
if (video_fd_ >= 0) {
|
||||
close(video_fd_);
|
||||
video_fd_ = -1;
|
||||
}
|
||||
sensor_format_ = 0;
|
||||
esp_video_deinit();
|
||||
}
|
||||
|
||||
void Esp32Camera::SetExplainUrl(const std::string& url, const std::string& token) {
|
||||
@ -44,98 +314,266 @@ bool Esp32Camera::Capture() {
|
||||
encoder_thread_.join();
|
||||
}
|
||||
|
||||
auto start_time = esp_timer_get_time();
|
||||
int frames_to_get = 2;
|
||||
// Try to get a stable frame
|
||||
for (int i = 0; i < frames_to_get; i++) {
|
||||
if (fb_ != nullptr) {
|
||||
esp_camera_fb_return(fb_);
|
||||
}
|
||||
fb_ = esp_camera_fb_get();
|
||||
if (fb_ == nullptr) {
|
||||
ESP_LOGE(TAG, "Camera capture failed");
|
||||
if (!streaming_on_ || video_fd_ < 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 3; i++) {
|
||||
struct v4l2_buffer buf = {};
|
||||
buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
|
||||
buf.memory = V4L2_MEMORY_MMAP;
|
||||
if (ioctl(video_fd_, VIDIOC_DQBUF, &buf) != 0) {
|
||||
ESP_LOGE(TAG, "VIDIOC_DQBUF failed");
|
||||
return false;
|
||||
}
|
||||
if (i == 2) {
|
||||
// 保存帧副本到PSRAM
|
||||
if (frame_.data) {
|
||||
heap_caps_free(frame_.data);
|
||||
frame_.data = nullptr;
|
||||
frame_.format = 0;
|
||||
}
|
||||
frame_.len = buf.bytesused;
|
||||
frame_.data = (uint8_t*)heap_caps_malloc(frame_.len, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
|
||||
if (!frame_.data) {
|
||||
ESP_LOGE(TAG, "alloc frame copy failed");
|
||||
return false;
|
||||
}
|
||||
|
||||
ESP_LOGD(TAG, "frame.len = %d, frame.width = %d, frame.height = %d", frame_.len, frame_.width,
|
||||
frame_.height);
|
||||
ESP_LOG_BUFFER_HEXDUMP(TAG, frame_.data, MIN(frame_.len, 256), ESP_LOG_DEBUG);
|
||||
|
||||
switch (sensor_format_) {
|
||||
case V4L2_PIX_FMT_RGB565:
|
||||
case V4L2_PIX_FMT_RGB24:
|
||||
case V4L2_PIX_FMT_YUYV:
|
||||
#ifdef CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP
|
||||
{
|
||||
auto src16 = (uint16_t*)mmap_buffers_[buf.index].start;
|
||||
auto dst16 = (uint16_t*)frame_.data;
|
||||
size_t count = (size_t)mmap_buffers_[buf.index].length / 2;
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
dst16[i] = __builtin_bswap16(src16[i]);
|
||||
}
|
||||
}
|
||||
#else
|
||||
memcpy(frame_.data, mmap_buffers_[buf.index].start, frame_.len);
|
||||
#endif // CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP
|
||||
frame_.format = sensor_format_;
|
||||
break;
|
||||
case V4L2_PIX_FMT_YUV422P: {
|
||||
// 这个格式是 422 YUYV,不是 planer
|
||||
frame_.format = V4L2_PIX_FMT_YUYV;
|
||||
#ifdef CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP
|
||||
{
|
||||
auto src16 = (uint16_t*)mmap_buffers_[buf.index].start;
|
||||
auto dst16 = (uint16_t*)frame_.data;
|
||||
size_t count = (size_t)mmap_buffers_[buf.index].length / 2;
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
dst16[i] = __builtin_bswap16(src16[i]);
|
||||
}
|
||||
}
|
||||
#else
|
||||
memcpy(frame_.data, mmap_buffers_[buf.index].start, frame_.len);
|
||||
#endif // CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP
|
||||
break;
|
||||
}
|
||||
case V4L2_PIX_FMT_RGB565X: {
|
||||
// 大端序的 RGB565 需要转换为小端序
|
||||
// 目前 esp_video 的大小端都会返回格式为 RGB565,不会返回格式为 RGB565X,此 case 用于未来版本兼容
|
||||
auto src16 = (uint16_t*)mmap_buffers_[buf.index].start;
|
||||
auto dst16 = (uint16_t*)frame_.data;
|
||||
size_t pixel_count = (size_t)frame_.width * (size_t)frame_.height;
|
||||
for (size_t i = 0; i < pixel_count; i++) {
|
||||
dst16[i] = __builtin_bswap16(src16[i]);
|
||||
}
|
||||
frame_.format = V4L2_PIX_FMT_RGB565;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
ESP_LOGE(TAG, "unsupported sensor format: 0x%08x", sensor_format_);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) {
|
||||
ESP_LOGE(TAG, "VIDIOC_QBUF failed");
|
||||
}
|
||||
}
|
||||
auto end_time = esp_timer_get_time();
|
||||
ESP_LOGI(TAG, "Camera captured %d frames in %d ms", frames_to_get, int((end_time - start_time) / 1000));
|
||||
|
||||
// 显示预览图片
|
||||
auto display = dynamic_cast<LvglDisplay*>(Board::GetInstance().GetDisplay());
|
||||
if (display != nullptr) {
|
||||
auto data = (uint8_t*)heap_caps_malloc(fb_->len, MALLOC_CAP_SPIRAM);
|
||||
if (data == nullptr) {
|
||||
ESP_LOGE(TAG, "Failed to allocate memory for preview image");
|
||||
if (!frame_.data) {
|
||||
return false;
|
||||
}
|
||||
uint16_t w = frame_.width;
|
||||
uint16_t h = frame_.height;
|
||||
size_t lvgl_image_size = frame_.len;
|
||||
size_t stride = ((w * 2) + 3) & ~3; // 4字节对齐
|
||||
lv_color_format_t color_format = LV_COLOR_FORMAT_RGB565;
|
||||
uint8_t* data = nullptr;
|
||||
|
||||
auto src = (uint16_t*)fb_->buf;
|
||||
auto dst = (uint16_t*)data;
|
||||
size_t pixel_count = fb_->len / 2;
|
||||
for (size_t i = 0; i < pixel_count; i++) {
|
||||
// 交换每个16位字内的字节
|
||||
dst[i] = __builtin_bswap16(src[i]);
|
||||
switch (frame_.format) {
|
||||
case V4L2_PIX_FMT_YUYV:
|
||||
// color_format = LV_COLOR_FORMAT_YUY2;
|
||||
// [[fallthrough]];
|
||||
// LV_COLOR_FORMAT_YUY2 的显示似乎有问题,暂时转换为 RGB565 显示
|
||||
{
|
||||
color_format = LV_COLOR_FORMAT_RGB565;
|
||||
data = (uint8_t*)heap_caps_malloc(w * h * 2, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
|
||||
lvgl_image_size = w * h * 2;
|
||||
if (data == nullptr) {
|
||||
ESP_LOGE(TAG, "Failed to allocate memory for preview image");
|
||||
return false;
|
||||
}
|
||||
const uint8_t* src = (const uint8_t*)frame_.data;
|
||||
size_t src_len = frame_.len;
|
||||
size_t dst_off = 0;
|
||||
|
||||
auto clamp = [](int v) -> uint8_t {
|
||||
if (v < 0) return 0;
|
||||
if (v > 255) return 255;
|
||||
return (uint8_t)v;
|
||||
};
|
||||
|
||||
// 每 4 字节处理两个像素: Y0 U Y1 V
|
||||
for (size_t i = 0; i + 3 < src_len; i += 4) {
|
||||
int y0 = (int)src[i + 0];
|
||||
int u = (int)src[i + 1];
|
||||
int y1 = (int)src[i + 2];
|
||||
int v = (int)src[i + 3];
|
||||
|
||||
int c0 = y0 - 16;
|
||||
int c1 = y1 - 16;
|
||||
int d = u - 128;
|
||||
int e = v - 128;
|
||||
|
||||
// 常用整数近似转换
|
||||
int r0 = (298 * c0 + 409 * e + 128) >> 8;
|
||||
int g0 = (298 * c0 - 100 * d - 208 * e + 128) >> 8;
|
||||
int b0 = (298 * c0 + 516 * d + 128) >> 8;
|
||||
|
||||
int r1 = (298 * c1 + 409 * e + 128) >> 8;
|
||||
int g1 = (298 * c1 - 100 * d - 208 * e + 128) >> 8;
|
||||
int b1 = (298 * c1 + 516 * d + 128) >> 8;
|
||||
|
||||
uint8_t cr0 = clamp(r0);
|
||||
uint8_t cg0 = clamp(g0);
|
||||
uint8_t cb0 = clamp(b0);
|
||||
|
||||
uint8_t cr1 = clamp(r1);
|
||||
uint8_t cg1 = clamp(g1);
|
||||
uint8_t cb1 = clamp(b1);
|
||||
|
||||
// RGB565 打包
|
||||
uint16_t pix0 = (uint16_t)(((cr0 >> 3) << 11) | ((cg0 >> 2) << 5) | (cb0 >> 3));
|
||||
uint16_t pix1 = (uint16_t)(((cr1 >> 3) << 11) | ((cg1 >> 2) << 5) | (cb1 >> 3));
|
||||
|
||||
// 小端序:低字节先写入
|
||||
data[dst_off++] = (uint8_t)(pix0 & 0xFF);
|
||||
data[dst_off++] = (uint8_t)((pix0 >> 8) & 0xFF);
|
||||
|
||||
data[dst_off++] = (uint8_t)(pix1 & 0xFF);
|
||||
data[dst_off++] = (uint8_t)((pix1 >> 8) & 0xFF);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case V4L2_PIX_FMT_RGB565:
|
||||
// 默认的 color_format 就是 LV_COLOR_FORMAT_RGB565
|
||||
data = (uint8_t*)heap_caps_malloc(w * h * 2, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
|
||||
if (data == nullptr) {
|
||||
ESP_LOGE(TAG, "Failed to allocate memory for preview image");
|
||||
return false;
|
||||
}
|
||||
memcpy(data, frame_.data, frame_.len);
|
||||
lvgl_image_size = frame_.len; // fallthrough 时兼顾 YUYV 与 RGB565
|
||||
break;
|
||||
|
||||
case V4L2_PIX_FMT_RGB24: {
|
||||
// RGB888 需要转换为 RGB565
|
||||
color_format = LV_COLOR_FORMAT_RGB565;
|
||||
data = (uint8_t*)heap_caps_malloc(w * h * 2, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
|
||||
uint16_t* dst16 = (uint16_t*)data;
|
||||
if (data == nullptr) {
|
||||
ESP_LOGE(TAG, "Failed to allocate memory for preview image");
|
||||
return false;
|
||||
}
|
||||
const uint8_t* src = frame_.data;
|
||||
size_t pixel_count = (size_t)w * (size_t)h;
|
||||
for (size_t i = 0; i < pixel_count; i++) {
|
||||
uint8_t r = src[i * 3 + 0];
|
||||
uint8_t g = src[i * 3 + 1];
|
||||
uint8_t b = src[i * 3 + 2];
|
||||
dst16[i] = (uint16_t)(((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3));
|
||||
}
|
||||
lvgl_image_size = w * h * 2;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
ESP_LOGE(TAG, "unsupported frame format: 0x%08x", frame_.format);
|
||||
return false;
|
||||
}
|
||||
|
||||
auto image = std::make_unique<LvglAllocatedImage>(data, fb_->len, fb_->width, fb_->height, fb_->width * 2, LV_COLOR_FORMAT_RGB565);
|
||||
auto image = std::make_unique<LvglAllocatedImage>(data, lvgl_image_size, w, h, stride, color_format);
|
||||
display->SetPreviewImage(std::move(image));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Esp32Camera::SetHMirror(bool enabled) {
|
||||
sensor_t *s = esp_camera_sensor_get();
|
||||
if (s == nullptr) {
|
||||
ESP_LOGE(TAG, "Failed to get camera sensor");
|
||||
if (video_fd_ < 0)
|
||||
return false;
|
||||
struct v4l2_ext_controls ctrls = {};
|
||||
struct v4l2_ext_control ctrl = {};
|
||||
ctrl.id = V4L2_CID_HFLIP;
|
||||
ctrl.value = enabled ? 1 : 0;
|
||||
ctrls.ctrl_class = V4L2_CTRL_CLASS_USER;
|
||||
ctrls.count = 1;
|
||||
ctrls.controls = &ctrl;
|
||||
if (ioctl(video_fd_, VIDIOC_S_EXT_CTRLS, &ctrls) != 0) {
|
||||
ESP_LOGE(TAG, "set HFLIP failed");
|
||||
return false;
|
||||
}
|
||||
|
||||
esp_err_t err = s->set_hmirror(s, enabled);
|
||||
if (err != ESP_OK) {
|
||||
ESP_LOGE(TAG, "Failed to set horizontal mirror: %d", err);
|
||||
return false;
|
||||
}
|
||||
|
||||
ESP_LOGI(TAG, "Camera horizontal mirror set to: %s", enabled ? "enabled" : "disabled");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Esp32Camera::SetVFlip(bool enabled) {
|
||||
sensor_t *s = esp_camera_sensor_get();
|
||||
if (s == nullptr) {
|
||||
ESP_LOGE(TAG, "Failed to get camera sensor");
|
||||
if (video_fd_ < 0)
|
||||
return false;
|
||||
struct v4l2_ext_controls ctrls = {};
|
||||
struct v4l2_ext_control ctrl = {};
|
||||
ctrl.id = V4L2_CID_VFLIP;
|
||||
ctrl.value = enabled ? 1 : 0;
|
||||
ctrls.ctrl_class = V4L2_CTRL_CLASS_USER;
|
||||
ctrls.count = 1;
|
||||
ctrls.controls = &ctrl;
|
||||
if (ioctl(video_fd_, VIDIOC_S_EXT_CTRLS, &ctrls) != 0) {
|
||||
ESP_LOGE(TAG, "set VFLIP failed");
|
||||
return false;
|
||||
}
|
||||
|
||||
esp_err_t err = s->set_vflip(s, enabled);
|
||||
if (err != ESP_OK) {
|
||||
ESP_LOGE(TAG, "Failed to set vertical flip: %d", err);
|
||||
return false;
|
||||
}
|
||||
|
||||
ESP_LOGI(TAG, "Camera vertical flip set to: %s", enabled ? "enabled" : "disabled");
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief 将摄像头捕获的图像发送到远程服务器进行AI分析和解释
|
||||
*
|
||||
*
|
||||
* 该函数将当前摄像头缓冲区中的图像编码为JPEG格式,并通过HTTP POST请求
|
||||
* 以multipart/form-data的形式发送到指定的解释服务器。服务器将根据提供的
|
||||
* 问题对图像进行AI分析并返回结果。
|
||||
*
|
||||
*
|
||||
* 实现特点:
|
||||
* - 使用独立线程编码JPEG,与主线程分离
|
||||
* - 采用分块传输编码(chunked transfer encoding)优化内存使用
|
||||
* - 通过队列机制实现编码线程和发送线程的数据同步
|
||||
* - 支持设备ID、客户端ID和认证令牌的HTTP头部配置
|
||||
*
|
||||
*
|
||||
* @param question 要向AI提出的关于图像的问题,将作为表单字段发送
|
||||
* @return std::string 服务器返回的JSON格式响应字符串
|
||||
* 成功时包含AI分析结果,失败时包含错误信息
|
||||
* 格式示例:{"success": true, "result": "分析结果"}
|
||||
* {"success": false, "message": "错误信息"}
|
||||
*
|
||||
*
|
||||
* @note 调用此函数前必须先调用SetExplainUrl()设置服务器URL
|
||||
* @note 函数会等待之前的编码线程完成后再开始新的处理
|
||||
* @warning 如果摄像头缓冲区为空或网络连接失败,将返回错误信息
|
||||
@ -154,17 +592,19 @@ std::string Esp32Camera::Explain(const std::string& question) {
|
||||
|
||||
// We spawn a thread to encode the image to JPEG using optimized encoder (cost about 500ms and 8KB SRAM)
|
||||
encoder_thread_ = std::thread([this, jpeg_queue]() {
|
||||
image_to_jpeg_cb(fb_->buf, fb_->len, fb_->width, fb_->height, fb_->format, 80,
|
||||
uint16_t w = frame_.width ? frame_.width : 320;
|
||||
uint16_t h = frame_.height ? frame_.height : 240;
|
||||
v4l2_pix_fmt_t enc_fmt = frame_.format;
|
||||
image_to_jpeg_cb(
|
||||
frame_.data, frame_.len, w, h, enc_fmt, 80,
|
||||
[](void* arg, size_t index, const void* data, size_t len) -> size_t {
|
||||
auto jpeg_queue = (QueueHandle_t)arg;
|
||||
JpegChunk chunk = {
|
||||
.data = (uint8_t*)heap_caps_aligned_alloc(16, len, MALLOC_CAP_SPIRAM),
|
||||
.len = len
|
||||
};
|
||||
memcpy(chunk.data, data, len);
|
||||
xQueueSend(jpeg_queue, &chunk, portMAX_DELAY);
|
||||
return len;
|
||||
}, jpeg_queue);
|
||||
auto jpeg_queue = (QueueHandle_t)arg;
|
||||
JpegChunk chunk = {.data = (uint8_t*)heap_caps_aligned_alloc(16, len, MALLOC_CAP_SPIRAM), .len = len};
|
||||
memcpy(chunk.data, data, len);
|
||||
xQueueSend(jpeg_queue, &chunk, portMAX_DELAY);
|
||||
return len;
|
||||
},
|
||||
jpeg_queue);
|
||||
});
|
||||
|
||||
auto network = Board::GetInstance().GetNetwork();
|
||||
@ -195,7 +635,7 @@ std::string Esp32Camera::Explain(const std::string& question) {
|
||||
vQueueDelete(jpeg_queue);
|
||||
throw std::runtime_error("Failed to connect to explain URL");
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
// 第一块:question字段
|
||||
std::string question_field;
|
||||
@ -224,7 +664,7 @@ std::string Esp32Camera::Explain(const std::string& question) {
|
||||
break;
|
||||
}
|
||||
if (chunk.data == nullptr) {
|
||||
break; // The last chunk
|
||||
break; // The last chunk
|
||||
}
|
||||
http->Write((const char*)chunk.data, chunk.len);
|
||||
total_sent += chunk.len;
|
||||
@ -254,7 +694,7 @@ std::string Esp32Camera::Explain(const std::string& question) {
|
||||
|
||||
// Get remain task stack size
|
||||
size_t remain_stack_size = uxTaskGetStackHighWaterMark(nullptr);
|
||||
ESP_LOGI(TAG, "Explain image size=%dx%d, compressed size=%d, remain stack size=%d, question=%s\n%s",
|
||||
fb_->width, fb_->height, total_sent, remain_stack_size, question.c_str(), result.c_str());
|
||||
ESP_LOGI(TAG, "Explain image size=%d bytes, compressed size=%d, remain stack size=%d, question=%s\n%s",
|
||||
(int)frame_.len, (int)total_sent, (int)remain_stack_size, question.c_str(), result.c_str());
|
||||
return result;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user