#include "sdkconfig.h" #include #include #include #include #include #include #include #include "esp32_camera.h" #include "board.h" #include "display.h" #include "lvgl_display.h" #include "mcp_server.h" #include "system_info.h" #include "jpg/image_to_jpeg.h" #include "esp_timer.h" #define TAG "Esp32Camera" namespace { constexpr float kClosedEyeScoreRatio = 0.58f; constexpr int kEyeSampleStep = 4; uint8_t Rgb565ToLuma(uint16_t pixel) { uint8_t r = ((pixel >> 11) & 0x1f) << 3; uint8_t g = ((pixel >> 5) & 0x3f) << 2; uint8_t b = (pixel & 0x1f) << 3; return static_cast((static_cast(r) * 30 + static_cast(g) * 59 + static_cast(b) * 11) / 100); } void DrawRectRgb565(uint16_t* pixels, int width, int height, int x0, int y0, int x1, int y1, uint16_t color) { if (pixels == nullptr || width <= 0 || height <= 0) { return; } x0 = std::clamp(x0, 0, width - 1); x1 = std::clamp(x1, 0, width - 1); y0 = std::clamp(y0, 0, height - 1); y1 = std::clamp(y1, 0, height - 1); for (int x = x0; x <= x1; x++) { pixels[y0 * width + x] = color; pixels[y1 * width + x] = color; } for (int y = y0; y <= y1; y++) { pixels[y * width + x0] = color; pixels[y * width + x1] = color; } } } // namespace Esp32Camera::Esp32Camera(const camera_config_t &config) { esp_err_t err = esp_camera_init(&config); if (err != ESP_OK) { ESP_LOGE(TAG, "esp_camera_init failed with error 0x%x", err); return; } sensor_t *s = esp_camera_sensor_get(); if (s) { if (s->id.PID == GC0308_PID) { s->set_hmirror(s, 0); // Control camera mirror: 1 for mirror, 0 for normal } ESP_LOGI(TAG, "Camera initialized: format=%d", config.pixel_format); } streaming_on_ = true; } Esp32Camera::~Esp32Camera() { if (streaming_on_) { if (current_fb_) { esp_camera_fb_return(current_fb_); current_fb_ = nullptr; } if (encode_buf_) { heap_caps_free(encode_buf_); encode_buf_ = nullptr; encode_buf_size_ = 0; } esp_camera_deinit(); streaming_on_ = false; } } void Esp32Camera::SetExplainUrl(const std::string &url, const std::string &token) { explain_url_ = url; explain_token_ = token; } bool Esp32Camera::Capture() { if (encoder_thread_.joinable()) { encoder_thread_.join(); } if (!streaming_on_) { return false; } // Get the latest frame, discard old frames for real-time performance for (int i = 0; i < 2; i++) { if (current_fb_) { esp_camera_fb_return(current_fb_); } current_fb_ = esp_camera_fb_get(); if (!current_fb_) { ESP_LOGE(TAG, "Camera capture failed"); return false; } } // Prepare encode buffer for RGB565 format (with optional byte swapping) if (current_fb_->format == PIXFORMAT_RGB565) { size_t pixel_count = current_fb_->width * current_fb_->height; size_t data_size = pixel_count * 2; // Allocate or reallocate encode buffer if needed if (encode_buf_size_ < data_size) { if (encode_buf_) { heap_caps_free(encode_buf_); } encode_buf_ = (uint8_t *)heap_caps_malloc(data_size, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); if (encode_buf_ == nullptr) { ESP_LOGE(TAG, "Failed to allocate memory for encode buffer"); encode_buf_size_ = 0; return false; } encode_buf_size_ = data_size; } // Copy data to encode buffer with optional byte swapping uint16_t *src = (uint16_t *)current_fb_->buf; uint16_t *dst = (uint16_t *)encode_buf_; if (swap_bytes_enabled_) { for (size_t i = 0; i < pixel_count; i++) { dst[i] = __builtin_bswap16(src[i]); } } else { memcpy(encode_buf_, current_fb_->buf, data_size); } // Allocate separate buffer for preview display uint8_t *preview_data = (uint8_t *)heap_caps_malloc(data_size, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); if (preview_data != nullptr) { memcpy(preview_data, encode_buf_, data_size); auto display = dynamic_cast(Board::GetInstance().GetDisplay()); if (display != nullptr) { display->SetPreviewImage(std::make_unique(preview_data, data_size, current_fb_->width, current_fb_->height, current_fb_->width * 2, LV_COLOR_FORMAT_RGB565)); } else { heap_caps_free(preview_data); } } } else if (current_fb_->format == PIXFORMAT_JPEG) { // JPEG format preview usually requires decoding, skip preview display for now, just log ESP_LOGW(TAG, "JPEG capture success, len=%zu, but not supported for preview", current_fb_->len); } ESP_LOGI(TAG, "Captured frame: %dx%d, len=%zu, format=%d", current_fb_->width, current_fb_->height, current_fb_->len, current_fb_->format); return true; } bool Esp32Camera::SetHMirror(bool enabled) { sensor_t *s = esp_camera_sensor_get(); if (!s) { return false; } s->set_hmirror(s, enabled ? 1 : 0); return true; } bool Esp32Camera::SetVFlip(bool enabled) { sensor_t *s = esp_camera_sensor_get(); if (!s) { return false; } s->set_vflip(s, enabled ? 1 : 0); return true; } bool Esp32Camera::SetSwapBytes(bool enabled) { swap_bytes_enabled_ = enabled; return true; } bool Esp32Camera::DetectDrowsiness(CameraDrowsinessResult& result, bool show_debug_preview) { result = {}; if (encoder_thread_.joinable()) { encoder_thread_.join(); } if (!streaming_on_) { return false; } if (current_fb_) { esp_camera_fb_return(current_fb_); current_fb_ = nullptr; } camera_fb_t* fb = esp_camera_fb_get(); if (!fb) { ESP_LOGW(TAG, "Camera drowsiness capture failed"); return false; } if (fb->format != PIXFORMAT_RGB565 || fb->width < 80 || fb->height < 80) { ESP_LOGW(TAG, "Unsupported drowsiness frame: %dx%d format=%d", fb->width, fb->height, fb->format); esp_camera_fb_return(fb); return false; } const int width = fb->width; const int height = fb->height; const int x0 = width * 22 / 100; const int x1 = width * 78 / 100; const int y0 = height * 24 / 100; const int y1 = height * 46 / 100; const uint16_t* pixels = reinterpret_cast(fb->buf); if (show_debug_preview) { size_t data_size = static_cast(width) * static_cast(height) * 2; auto* preview_data = static_cast(heap_caps_malloc(data_size, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT)); if (preview_data != nullptr) { auto* dst = reinterpret_cast(preview_data); const auto* src = reinterpret_cast(fb->buf); size_t pixel_count = static_cast(width) * static_cast(height); for (size_t i = 0; i < pixel_count; i++) { dst[i] = swap_bytes_enabled_ ? __builtin_bswap16(src[i]) : src[i]; } DrawRectRgb565(dst, width, height, x0, y0, x1, y1, 0xF800); auto display = dynamic_cast(Board::GetInstance().GetDisplay()); if (display != nullptr) { display->SetPreviewImage(std::make_unique( preview_data, data_size, width, height, width * 2, LV_COLOR_FORMAT_RGB565)); } else { heap_caps_free(preview_data); } } } float vertical_edge_sum = 0.0f; float horizontal_edge_sum = 0.0f; int samples = 0; for (int y = y0; y + kEyeSampleStep < y1; y += kEyeSampleStep) { for (int x = x0; x + kEyeSampleStep < x1; x += kEyeSampleStep) { uint16_t p = pixels[y * width + x]; uint16_t px = pixels[y * width + x + kEyeSampleStep]; uint16_t py = pixels[(y + kEyeSampleStep) * width + x]; if (swap_bytes_enabled_) { p = __builtin_bswap16(p); px = __builtin_bswap16(px); py = __builtin_bswap16(py); } uint8_t l = Rgb565ToLuma(p); vertical_edge_sum += std::abs(static_cast(l) - static_cast(Rgb565ToLuma(py))); horizontal_edge_sum += std::abs(static_cast(l) - static_cast(Rgb565ToLuma(px))); samples++; } } esp_camera_fb_return(fb); if (samples == 0) { return false; } // Open eyes usually keep more vertical texture in the fixed eye band. // This is a lightweight central-face heuristic, not a landmark model. float score = (vertical_edge_sum + horizontal_edge_sum * 0.35f) / samples; if (eye_openness_baseline_ <= 0.0f) { eye_openness_baseline_ = score; } else if (score > eye_openness_baseline_ * 0.85f) { eye_openness_baseline_ = eye_openness_baseline_ * 0.90f + score * 0.10f; } else { eye_openness_baseline_ = eye_openness_baseline_ * 0.995f + score * 0.005f; } result.valid = eye_openness_baseline_ > 1.0f; result.eye_openness_score = score; result.baseline_score = eye_openness_baseline_; result.eyes_closed = result.valid && score < eye_openness_baseline_ * kClosedEyeScoreRatio; ESP_LOGI(TAG, "Drowsiness frame=%dx%d eye_roi=(%d,%d)-(%d,%d) score=%.2f baseline=%.2f closed=%d", width, height, x0, y0, x1, y1, result.eye_openness_score, result.baseline_score, result.eyes_closed ? 1 : 0); return result.valid; } std::string Esp32Camera::Explain(const std::string &question) { if (explain_url_.empty()) { throw std::runtime_error("Image explain URL or token is not set"); } if (current_fb_ == nullptr) { throw std::runtime_error("No camera frame captured"); } // Create local JPEG queue QueueHandle_t jpeg_queue = xQueueCreate(40, sizeof(JpegChunk)); if (jpeg_queue == nullptr) { ESP_LOGE(TAG, "Failed to create JPEG queue"); throw std::runtime_error("Failed to create JPEG queue"); } // Start encoding thread encoder_thread_ = std::thread([this, jpeg_queue]() { int64_t start_time = esp_timer_get_time(); uint16_t w = current_fb_->width; uint16_t h = current_fb_->height; v4l2_pix_fmt_t enc_fmt; switch (current_fb_->format) { case PIXFORMAT_RGB565: enc_fmt = V4L2_PIX_FMT_RGB565; break; case PIXFORMAT_YUV422: enc_fmt = V4L2_PIX_FMT_YUYV; // YUV422 is actually YUYV format break; case PIXFORMAT_YUV420: enc_fmt = V4L2_PIX_FMT_YUV420; break; case PIXFORMAT_GRAYSCALE: enc_fmt = V4L2_PIX_FMT_GREY; break; case PIXFORMAT_JPEG: enc_fmt = V4L2_PIX_FMT_JPEG; break; case PIXFORMAT_RGB888: enc_fmt = V4L2_PIX_FMT_RGB24; break; default: ESP_LOGE(TAG, "Unsupported pixel format: %d", current_fb_->format); return; } // Use encode buffer for RGB565, otherwise use original frame buffer uint8_t *jpeg_src_buf = current_fb_->buf; size_t jpeg_src_len = current_fb_->len; if (current_fb_->format == PIXFORMAT_RGB565 && encode_buf_ != nullptr) { jpeg_src_buf = encode_buf_; jpeg_src_len = encode_buf_size_; } bool ok = image_to_jpeg_cb(jpeg_src_buf, jpeg_src_len, w, h, enc_fmt, 80, [](void* arg, size_t index, const void* data, size_t len) -> size_t { auto jpeg_queue = static_cast(arg); JpegChunk chunk = {.data = nullptr, .len = len}; if (index == 0 && data != nullptr && len > 0) { chunk.data = (uint8_t*)heap_caps_aligned_alloc(16, len, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); if (chunk.data == nullptr) { ESP_LOGE(TAG, "Failed to allocate %zu bytes for JPEG chunk", len); chunk.len = 0; } else { memcpy(chunk.data, data, len); } } else { chunk.len = 0; // Sentinel or error } xQueueSend(jpeg_queue, &chunk, portMAX_DELAY); return len; }, jpeg_queue); if (!ok) { JpegChunk chunk = {.data = nullptr, .len = 0}; xQueueSend(jpeg_queue, &chunk, portMAX_DELAY); } int64_t end_time = esp_timer_get_time(); ESP_LOGI(TAG, "JPEG encoding time: %ld ms", int((end_time - start_time) / 1000)); }); auto network = Board::GetInstance().GetNetwork(); auto http = network->CreateHttp(3); std::string boundary = "----ESP32_CAMERA_BOUNDARY"; http->SetHeader("Device-Id", SystemInfo::GetMacAddress().c_str()); http->SetHeader("Client-Id", Board::GetInstance().GetUuid().c_str()); if (!explain_token_.empty()) { http->SetHeader("Authorization", "Bearer " + explain_token_); } http->SetHeader("Content-Type", "multipart/form-data; boundary=" + boundary); http->SetHeader("Transfer-Encoding", "chunked"); if (!http->Open("POST", explain_url_)) { ESP_LOGE(TAG, "Failed to connect to explain URL"); encoder_thread_.join(); JpegChunk chunk; while (xQueueReceive(jpeg_queue, &chunk, portMAX_DELAY) == pdPASS) { if (chunk.data != nullptr) { heap_caps_free(chunk.data); } else { break; } } vQueueDelete(jpeg_queue); throw std::runtime_error("Failed to connect to explain URL"); } { std::string question_field; question_field += "--" + boundary + "\r\n"; question_field += "Content-Disposition: form-data; name=\"question\"\r\n"; question_field += "\r\n"; question_field += question + "\r\n"; http->Write(question_field.c_str(), question_field.size()); } { std::string file_header; file_header += "--" + boundary + "\r\n"; file_header += "Content-Disposition: form-data; name=\"file\"; filename=\"camera.jpg\"\r\n"; file_header += "Content-Type: image/jpeg\r\n"; file_header += "\r\n"; http->Write(file_header.c_str(), file_header.size()); } size_t total_sent = 0; bool saw_terminator = false; while (true) { JpegChunk chunk; if (xQueueReceive(jpeg_queue, &chunk, portMAX_DELAY) != pdPASS) { ESP_LOGE(TAG, "Failed to receive JPEG chunk"); break; } if (chunk.data == nullptr) { saw_terminator = true; break; } http->Write((const char *)chunk.data, chunk.len); total_sent += chunk.len; heap_caps_free(chunk.data); } encoder_thread_.join(); vQueueDelete(jpeg_queue); if (!saw_terminator || total_sent == 0) { ESP_LOGE(TAG, "JPEG encoder failed or produced empty output"); throw std::runtime_error("Failed to encode image to JPEG"); } { std::string multipart_footer; multipart_footer += "\r\n--" + boundary + "--\r\n"; http->Write(multipart_footer.c_str(), multipart_footer.size()); } http->Write("", 0); if (http->GetStatusCode() != 200) { ESP_LOGE(TAG, "Failed to upload photo, status code: %d", http->GetStatusCode()); throw std::runtime_error("Failed to upload photo"); } std::string result = http->ReadAll(); http->Close(); size_t remain_stack_size = uxTaskGetStackHighWaterMark(nullptr); ESP_LOGI(TAG, "Explain image size=%dx%d, compressed size=%d, remain stack size=%d, question=%s\n%s", current_fb_->width, current_fb_->height, (int)total_sent, (int)remain_stack_size, question.c_str(), result.c_str()); return result; }