462 lines
16 KiB
C++
462 lines
16 KiB
C++
#include "sdkconfig.h"
|
|
|
|
#include <esp_heap_caps.h>
|
|
#include <cstdio>
|
|
#include <cstring>
|
|
#include <esp_log.h>
|
|
#include <img_converters.h>
|
|
#include <algorithm>
|
|
#include <cmath>
|
|
|
|
#include "esp32_camera.h"
|
|
#include "board.h"
|
|
#include "display.h"
|
|
#include "lvgl_display.h"
|
|
#include "mcp_server.h"
|
|
#include "system_info.h"
|
|
#include "jpg/image_to_jpeg.h"
|
|
#include "esp_timer.h"
|
|
|
|
#define TAG "Esp32Camera"
|
|
|
|
namespace {
|
|
constexpr float kClosedEyeScoreRatio = 0.58f;
|
|
constexpr int kEyeSampleStep = 4;
|
|
|
|
uint8_t Rgb565ToLuma(uint16_t pixel) {
|
|
uint8_t r = ((pixel >> 11) & 0x1f) << 3;
|
|
uint8_t g = ((pixel >> 5) & 0x3f) << 2;
|
|
uint8_t b = (pixel & 0x1f) << 3;
|
|
return static_cast<uint8_t>((static_cast<uint16_t>(r) * 30 +
|
|
static_cast<uint16_t>(g) * 59 +
|
|
static_cast<uint16_t>(b) * 11) / 100);
|
|
}
|
|
|
|
void DrawRectRgb565(uint16_t* pixels, int width, int height, int x0, int y0, int x1, int y1, uint16_t color) {
|
|
if (pixels == nullptr || width <= 0 || height <= 0) {
|
|
return;
|
|
}
|
|
x0 = std::clamp(x0, 0, width - 1);
|
|
x1 = std::clamp(x1, 0, width - 1);
|
|
y0 = std::clamp(y0, 0, height - 1);
|
|
y1 = std::clamp(y1, 0, height - 1);
|
|
for (int x = x0; x <= x1; x++) {
|
|
pixels[y0 * width + x] = color;
|
|
pixels[y1 * width + x] = color;
|
|
}
|
|
for (int y = y0; y <= y1; y++) {
|
|
pixels[y * width + x0] = color;
|
|
pixels[y * width + x1] = color;
|
|
}
|
|
}
|
|
} // namespace
|
|
|
|
Esp32Camera::Esp32Camera(const camera_config_t &config) {
|
|
esp_err_t err = esp_camera_init(&config);
|
|
if (err != ESP_OK) {
|
|
ESP_LOGE(TAG, "esp_camera_init failed with error 0x%x", err);
|
|
return;
|
|
}
|
|
|
|
sensor_t *s = esp_camera_sensor_get();
|
|
if (s) {
|
|
if (s->id.PID == GC0308_PID) {
|
|
s->set_hmirror(s, 0); // Control camera mirror: 1 for mirror, 0 for normal
|
|
}
|
|
ESP_LOGI(TAG, "Camera initialized: format=%d", config.pixel_format);
|
|
}
|
|
|
|
streaming_on_ = true;
|
|
}
|
|
|
|
Esp32Camera::~Esp32Camera() {
|
|
if (streaming_on_) {
|
|
if (current_fb_) {
|
|
esp_camera_fb_return(current_fb_);
|
|
current_fb_ = nullptr;
|
|
}
|
|
if (encode_buf_) {
|
|
heap_caps_free(encode_buf_);
|
|
encode_buf_ = nullptr;
|
|
encode_buf_size_ = 0;
|
|
}
|
|
esp_camera_deinit();
|
|
streaming_on_ = false;
|
|
}
|
|
}
|
|
|
|
void Esp32Camera::SetExplainUrl(const std::string &url, const std::string &token) {
|
|
explain_url_ = url;
|
|
explain_token_ = token;
|
|
}
|
|
|
|
bool Esp32Camera::Capture() {
|
|
if (encoder_thread_.joinable()) {
|
|
encoder_thread_.join();
|
|
}
|
|
|
|
if (!streaming_on_) {
|
|
return false;
|
|
}
|
|
|
|
// Get the latest frame, discard old frames for real-time performance
|
|
for (int i = 0; i < 2; i++) {
|
|
if (current_fb_) {
|
|
esp_camera_fb_return(current_fb_);
|
|
}
|
|
current_fb_ = esp_camera_fb_get();
|
|
if (!current_fb_) {
|
|
ESP_LOGE(TAG, "Camera capture failed");
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// Prepare encode buffer for RGB565 format (with optional byte swapping)
|
|
if (current_fb_->format == PIXFORMAT_RGB565) {
|
|
size_t pixel_count = current_fb_->width * current_fb_->height;
|
|
size_t data_size = pixel_count * 2;
|
|
|
|
// Allocate or reallocate encode buffer if needed
|
|
if (encode_buf_size_ < data_size) {
|
|
if (encode_buf_) {
|
|
heap_caps_free(encode_buf_);
|
|
}
|
|
encode_buf_ = (uint8_t *)heap_caps_malloc(data_size, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
|
|
if (encode_buf_ == nullptr) {
|
|
ESP_LOGE(TAG, "Failed to allocate memory for encode buffer");
|
|
encode_buf_size_ = 0;
|
|
return false;
|
|
}
|
|
encode_buf_size_ = data_size;
|
|
}
|
|
|
|
// Copy data to encode buffer with optional byte swapping
|
|
uint16_t *src = (uint16_t *)current_fb_->buf;
|
|
uint16_t *dst = (uint16_t *)encode_buf_;
|
|
if (swap_bytes_enabled_) {
|
|
for (size_t i = 0; i < pixel_count; i++) {
|
|
dst[i] = __builtin_bswap16(src[i]);
|
|
}
|
|
} else {
|
|
memcpy(encode_buf_, current_fb_->buf, data_size);
|
|
}
|
|
|
|
// Allocate separate buffer for preview display
|
|
uint8_t *preview_data = (uint8_t *)heap_caps_malloc(data_size, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
|
|
if (preview_data != nullptr) {
|
|
memcpy(preview_data, encode_buf_, data_size);
|
|
auto display = dynamic_cast<LvglDisplay *>(Board::GetInstance().GetDisplay());
|
|
if (display != nullptr) {
|
|
display->SetPreviewImage(std::make_unique<LvglAllocatedImage>(preview_data, data_size, current_fb_->width, current_fb_->height, current_fb_->width * 2, LV_COLOR_FORMAT_RGB565));
|
|
} else {
|
|
heap_caps_free(preview_data);
|
|
}
|
|
}
|
|
} else if (current_fb_->format == PIXFORMAT_JPEG) {
|
|
// JPEG format preview usually requires decoding, skip preview display for now, just log
|
|
ESP_LOGW(TAG, "JPEG capture success, len=%zu, but not supported for preview", current_fb_->len);
|
|
}
|
|
|
|
ESP_LOGI(TAG, "Captured frame: %dx%d, len=%zu, format=%d",
|
|
current_fb_->width, current_fb_->height, current_fb_->len, current_fb_->format);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool Esp32Camera::SetHMirror(bool enabled) {
|
|
sensor_t *s = esp_camera_sensor_get();
|
|
if (!s) {
|
|
return false;
|
|
}
|
|
s->set_hmirror(s, enabled ? 1 : 0);
|
|
return true;
|
|
}
|
|
|
|
bool Esp32Camera::SetVFlip(bool enabled) {
|
|
sensor_t *s = esp_camera_sensor_get();
|
|
if (!s) {
|
|
return false;
|
|
}
|
|
s->set_vflip(s, enabled ? 1 : 0);
|
|
return true;
|
|
}
|
|
|
|
bool Esp32Camera::SetSwapBytes(bool enabled) {
|
|
swap_bytes_enabled_ = enabled;
|
|
return true;
|
|
}
|
|
|
|
bool Esp32Camera::DetectDrowsiness(CameraDrowsinessResult& result, bool show_debug_preview) {
|
|
result = {};
|
|
|
|
if (encoder_thread_.joinable()) {
|
|
encoder_thread_.join();
|
|
}
|
|
if (!streaming_on_) {
|
|
return false;
|
|
}
|
|
|
|
if (current_fb_) {
|
|
esp_camera_fb_return(current_fb_);
|
|
current_fb_ = nullptr;
|
|
}
|
|
|
|
camera_fb_t* fb = esp_camera_fb_get();
|
|
if (!fb) {
|
|
ESP_LOGW(TAG, "Camera drowsiness capture failed");
|
|
return false;
|
|
}
|
|
|
|
if (fb->format != PIXFORMAT_RGB565 || fb->width < 80 || fb->height < 80) {
|
|
ESP_LOGW(TAG, "Unsupported drowsiness frame: %dx%d format=%d",
|
|
fb->width, fb->height, fb->format);
|
|
esp_camera_fb_return(fb);
|
|
return false;
|
|
}
|
|
|
|
const int width = fb->width;
|
|
const int height = fb->height;
|
|
const int x0 = width * 22 / 100;
|
|
const int x1 = width * 78 / 100;
|
|
const int y0 = height * 24 / 100;
|
|
const int y1 = height * 46 / 100;
|
|
const uint16_t* pixels = reinterpret_cast<const uint16_t*>(fb->buf);
|
|
|
|
if (show_debug_preview) {
|
|
size_t data_size = static_cast<size_t>(width) * static_cast<size_t>(height) * 2;
|
|
auto* preview_data = static_cast<uint8_t*>(heap_caps_malloc(data_size, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT));
|
|
if (preview_data != nullptr) {
|
|
auto* dst = reinterpret_cast<uint16_t*>(preview_data);
|
|
const auto* src = reinterpret_cast<const uint16_t*>(fb->buf);
|
|
size_t pixel_count = static_cast<size_t>(width) * static_cast<size_t>(height);
|
|
for (size_t i = 0; i < pixel_count; i++) {
|
|
dst[i] = swap_bytes_enabled_ ? __builtin_bswap16(src[i]) : src[i];
|
|
}
|
|
DrawRectRgb565(dst, width, height, x0, y0, x1, y1, 0xF800);
|
|
auto display = dynamic_cast<LvglDisplay*>(Board::GetInstance().GetDisplay());
|
|
if (display != nullptr) {
|
|
display->SetPreviewImage(std::make_unique<LvglAllocatedImage>(
|
|
preview_data, data_size, width, height, width * 2, LV_COLOR_FORMAT_RGB565));
|
|
} else {
|
|
heap_caps_free(preview_data);
|
|
}
|
|
}
|
|
}
|
|
|
|
float vertical_edge_sum = 0.0f;
|
|
float horizontal_edge_sum = 0.0f;
|
|
int samples = 0;
|
|
for (int y = y0; y + kEyeSampleStep < y1; y += kEyeSampleStep) {
|
|
for (int x = x0; x + kEyeSampleStep < x1; x += kEyeSampleStep) {
|
|
uint16_t p = pixels[y * width + x];
|
|
uint16_t px = pixels[y * width + x + kEyeSampleStep];
|
|
uint16_t py = pixels[(y + kEyeSampleStep) * width + x];
|
|
if (swap_bytes_enabled_) {
|
|
p = __builtin_bswap16(p);
|
|
px = __builtin_bswap16(px);
|
|
py = __builtin_bswap16(py);
|
|
}
|
|
uint8_t l = Rgb565ToLuma(p);
|
|
vertical_edge_sum += std::abs(static_cast<int>(l) - static_cast<int>(Rgb565ToLuma(py)));
|
|
horizontal_edge_sum += std::abs(static_cast<int>(l) - static_cast<int>(Rgb565ToLuma(px)));
|
|
samples++;
|
|
}
|
|
}
|
|
|
|
esp_camera_fb_return(fb);
|
|
|
|
if (samples == 0) {
|
|
return false;
|
|
}
|
|
|
|
// Open eyes usually keep more vertical texture in the fixed eye band.
|
|
// This is a lightweight central-face heuristic, not a landmark model.
|
|
float score = (vertical_edge_sum + horizontal_edge_sum * 0.35f) / samples;
|
|
if (eye_openness_baseline_ <= 0.0f) {
|
|
eye_openness_baseline_ = score;
|
|
} else if (score > eye_openness_baseline_ * 0.85f) {
|
|
eye_openness_baseline_ = eye_openness_baseline_ * 0.90f + score * 0.10f;
|
|
} else {
|
|
eye_openness_baseline_ = eye_openness_baseline_ * 0.995f + score * 0.005f;
|
|
}
|
|
|
|
result.valid = eye_openness_baseline_ > 1.0f;
|
|
result.eye_openness_score = score;
|
|
result.baseline_score = eye_openness_baseline_;
|
|
result.eyes_closed = result.valid && score < eye_openness_baseline_ * kClosedEyeScoreRatio;
|
|
|
|
ESP_LOGI(TAG, "Drowsiness frame=%dx%d eye_roi=(%d,%d)-(%d,%d) score=%.2f baseline=%.2f closed=%d",
|
|
width, height, x0, y0, x1, y1, result.eye_openness_score, result.baseline_score,
|
|
result.eyes_closed ? 1 : 0);
|
|
return result.valid;
|
|
}
|
|
|
|
std::string Esp32Camera::Explain(const std::string &question) {
|
|
if (explain_url_.empty()) {
|
|
throw std::runtime_error("Image explain URL or token is not set");
|
|
}
|
|
|
|
if (current_fb_ == nullptr) {
|
|
throw std::runtime_error("No camera frame captured");
|
|
}
|
|
|
|
// Create local JPEG queue
|
|
QueueHandle_t jpeg_queue = xQueueCreate(40, sizeof(JpegChunk));
|
|
if (jpeg_queue == nullptr) {
|
|
ESP_LOGE(TAG, "Failed to create JPEG queue");
|
|
throw std::runtime_error("Failed to create JPEG queue");
|
|
}
|
|
|
|
// Start encoding thread
|
|
encoder_thread_ = std::thread([this, jpeg_queue]() {
|
|
int64_t start_time = esp_timer_get_time();
|
|
uint16_t w = current_fb_->width;
|
|
uint16_t h = current_fb_->height;
|
|
v4l2_pix_fmt_t enc_fmt;
|
|
switch (current_fb_->format) {
|
|
case PIXFORMAT_RGB565:
|
|
enc_fmt = V4L2_PIX_FMT_RGB565;
|
|
break;
|
|
case PIXFORMAT_YUV422:
|
|
enc_fmt = V4L2_PIX_FMT_YUYV; // YUV422 is actually YUYV format
|
|
break;
|
|
case PIXFORMAT_YUV420:
|
|
enc_fmt = V4L2_PIX_FMT_YUV420;
|
|
break;
|
|
case PIXFORMAT_GRAYSCALE:
|
|
enc_fmt = V4L2_PIX_FMT_GREY;
|
|
break;
|
|
case PIXFORMAT_JPEG:
|
|
enc_fmt = V4L2_PIX_FMT_JPEG;
|
|
break;
|
|
case PIXFORMAT_RGB888:
|
|
enc_fmt = V4L2_PIX_FMT_RGB24;
|
|
break;
|
|
default:
|
|
ESP_LOGE(TAG, "Unsupported pixel format: %d", current_fb_->format);
|
|
return;
|
|
}
|
|
|
|
// Use encode buffer for RGB565, otherwise use original frame buffer
|
|
uint8_t *jpeg_src_buf = current_fb_->buf;
|
|
size_t jpeg_src_len = current_fb_->len;
|
|
if (current_fb_->format == PIXFORMAT_RGB565 && encode_buf_ != nullptr) {
|
|
jpeg_src_buf = encode_buf_;
|
|
jpeg_src_len = encode_buf_size_;
|
|
}
|
|
|
|
bool ok = image_to_jpeg_cb(jpeg_src_buf, jpeg_src_len, w, h, enc_fmt, 80,
|
|
[](void* arg, size_t index, const void* data, size_t len) -> size_t {
|
|
auto jpeg_queue = static_cast<QueueHandle_t>(arg);
|
|
JpegChunk chunk = {.data = nullptr, .len = len};
|
|
if (index == 0 && data != nullptr && len > 0) {
|
|
chunk.data = (uint8_t*)heap_caps_aligned_alloc(16, len, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
|
|
if (chunk.data == nullptr) {
|
|
ESP_LOGE(TAG, "Failed to allocate %zu bytes for JPEG chunk", len);
|
|
chunk.len = 0;
|
|
} else {
|
|
memcpy(chunk.data, data, len);
|
|
}
|
|
} else {
|
|
chunk.len = 0; // Sentinel or error
|
|
}
|
|
xQueueSend(jpeg_queue, &chunk, portMAX_DELAY);
|
|
return len;
|
|
}, jpeg_queue);
|
|
|
|
if (!ok) {
|
|
JpegChunk chunk = {.data = nullptr, .len = 0};
|
|
xQueueSend(jpeg_queue, &chunk, portMAX_DELAY);
|
|
}
|
|
int64_t end_time = esp_timer_get_time();
|
|
ESP_LOGI(TAG, "JPEG encoding time: %ld ms", int((end_time - start_time) / 1000));
|
|
});
|
|
|
|
auto network = Board::GetInstance().GetNetwork();
|
|
auto http = network->CreateHttp(3);
|
|
std::string boundary = "----ESP32_CAMERA_BOUNDARY";
|
|
|
|
http->SetHeader("Device-Id", SystemInfo::GetMacAddress().c_str());
|
|
http->SetHeader("Client-Id", Board::GetInstance().GetUuid().c_str());
|
|
if (!explain_token_.empty()) {
|
|
http->SetHeader("Authorization", "Bearer " + explain_token_);
|
|
}
|
|
http->SetHeader("Content-Type", "multipart/form-data; boundary=" + boundary);
|
|
http->SetHeader("Transfer-Encoding", "chunked");
|
|
if (!http->Open("POST", explain_url_)) {
|
|
ESP_LOGE(TAG, "Failed to connect to explain URL");
|
|
encoder_thread_.join();
|
|
JpegChunk chunk;
|
|
while (xQueueReceive(jpeg_queue, &chunk, portMAX_DELAY) == pdPASS) {
|
|
if (chunk.data != nullptr) {
|
|
heap_caps_free(chunk.data);
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
vQueueDelete(jpeg_queue);
|
|
throw std::runtime_error("Failed to connect to explain URL");
|
|
}
|
|
|
|
{
|
|
std::string question_field;
|
|
question_field += "--" + boundary + "\r\n";
|
|
question_field += "Content-Disposition: form-data; name=\"question\"\r\n";
|
|
question_field += "\r\n";
|
|
question_field += question + "\r\n";
|
|
http->Write(question_field.c_str(), question_field.size());
|
|
}
|
|
{
|
|
std::string file_header;
|
|
file_header += "--" + boundary + "\r\n";
|
|
file_header += "Content-Disposition: form-data; name=\"file\"; filename=\"camera.jpg\"\r\n";
|
|
file_header += "Content-Type: image/jpeg\r\n";
|
|
file_header += "\r\n";
|
|
http->Write(file_header.c_str(), file_header.size());
|
|
}
|
|
|
|
size_t total_sent = 0;
|
|
bool saw_terminator = false;
|
|
while (true) {
|
|
JpegChunk chunk;
|
|
if (xQueueReceive(jpeg_queue, &chunk, portMAX_DELAY) != pdPASS) {
|
|
ESP_LOGE(TAG, "Failed to receive JPEG chunk");
|
|
break;
|
|
}
|
|
if (chunk.data == nullptr) {
|
|
saw_terminator = true;
|
|
break;
|
|
}
|
|
http->Write((const char *)chunk.data, chunk.len);
|
|
total_sent += chunk.len;
|
|
heap_caps_free(chunk.data);
|
|
}
|
|
encoder_thread_.join();
|
|
vQueueDelete(jpeg_queue);
|
|
|
|
if (!saw_terminator || total_sent == 0) {
|
|
ESP_LOGE(TAG, "JPEG encoder failed or produced empty output");
|
|
throw std::runtime_error("Failed to encode image to JPEG");
|
|
}
|
|
|
|
{
|
|
std::string multipart_footer;
|
|
multipart_footer += "\r\n--" + boundary + "--\r\n";
|
|
http->Write(multipart_footer.c_str(), multipart_footer.size());
|
|
}
|
|
http->Write("", 0);
|
|
|
|
if (http->GetStatusCode() != 200) {
|
|
ESP_LOGE(TAG, "Failed to upload photo, status code: %d", http->GetStatusCode());
|
|
throw std::runtime_error("Failed to upload photo");
|
|
}
|
|
|
|
std::string result = http->ReadAll();
|
|
http->Close();
|
|
|
|
size_t remain_stack_size = uxTaskGetStackHighWaterMark(nullptr);
|
|
ESP_LOGI(TAG, "Explain image size=%dx%d, compressed size=%d, remain stack size=%d, question=%s\n%s",
|
|
current_fb_->width, current_fb_->height, (int)total_sent, (int)remain_stack_size, question.c_str(), result.c_str());
|
|
return result;
|
|
}
|