feat: cam and slience detect sleep

This commit is contained in:
0Xiao0
2026-06-17 15:02:16 +08:00
parent 154ce461d7
commit a8c6c62c92
9 changed files with 564 additions and 163 deletions

View File

@ -5,6 +5,8 @@
#include <cstring>
#include <esp_log.h>
#include <img_converters.h>
#include <algorithm>
#include <cmath>
#include "esp32_camera.h"
#include "board.h"
@ -17,6 +19,38 @@
#define TAG "Esp32Camera"
namespace {
constexpr float kClosedEyeScoreRatio = 0.58f;
constexpr int kEyeSampleStep = 4;
uint8_t Rgb565ToLuma(uint16_t pixel) {
uint8_t r = ((pixel >> 11) & 0x1f) << 3;
uint8_t g = ((pixel >> 5) & 0x3f) << 2;
uint8_t b = (pixel & 0x1f) << 3;
return static_cast<uint8_t>((static_cast<uint16_t>(r) * 30 +
static_cast<uint16_t>(g) * 59 +
static_cast<uint16_t>(b) * 11) / 100);
}
void DrawRectRgb565(uint16_t* pixels, int width, int height, int x0, int y0, int x1, int y1, uint16_t color) {
if (pixels == nullptr || width <= 0 || height <= 0) {
return;
}
x0 = std::clamp(x0, 0, width - 1);
x1 = std::clamp(x1, 0, width - 1);
y0 = std::clamp(y0, 0, height - 1);
y1 = std::clamp(y1, 0, height - 1);
for (int x = x0; x <= x1; x++) {
pixels[y0 * width + x] = color;
pixels[y1 * width + x] = color;
}
for (int y = y0; y <= y1; y++) {
pixels[y * width + x0] = color;
pixels[y * width + x1] = color;
}
}
} // namespace
Esp32Camera::Esp32Camera(const camera_config_t &config) {
esp_err_t err = esp_camera_init(&config);
if (err != ESP_OK) {
@ -152,6 +186,111 @@ bool Esp32Camera::SetSwapBytes(bool enabled) {
return true;
}
bool Esp32Camera::DetectDrowsiness(CameraDrowsinessResult& result, bool show_debug_preview) {
result = {};
if (encoder_thread_.joinable()) {
encoder_thread_.join();
}
if (!streaming_on_) {
return false;
}
if (current_fb_) {
esp_camera_fb_return(current_fb_);
current_fb_ = nullptr;
}
camera_fb_t* fb = esp_camera_fb_get();
if (!fb) {
ESP_LOGW(TAG, "Camera drowsiness capture failed");
return false;
}
if (fb->format != PIXFORMAT_RGB565 || fb->width < 80 || fb->height < 80) {
ESP_LOGW(TAG, "Unsupported drowsiness frame: %dx%d format=%d",
fb->width, fb->height, fb->format);
esp_camera_fb_return(fb);
return false;
}
const int width = fb->width;
const int height = fb->height;
const int x0 = width * 22 / 100;
const int x1 = width * 78 / 100;
const int y0 = height * 24 / 100;
const int y1 = height * 46 / 100;
const uint16_t* pixels = reinterpret_cast<const uint16_t*>(fb->buf);
if (show_debug_preview) {
size_t data_size = static_cast<size_t>(width) * static_cast<size_t>(height) * 2;
auto* preview_data = static_cast<uint8_t*>(heap_caps_malloc(data_size, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT));
if (preview_data != nullptr) {
auto* dst = reinterpret_cast<uint16_t*>(preview_data);
const auto* src = reinterpret_cast<const uint16_t*>(fb->buf);
size_t pixel_count = static_cast<size_t>(width) * static_cast<size_t>(height);
for (size_t i = 0; i < pixel_count; i++) {
dst[i] = swap_bytes_enabled_ ? __builtin_bswap16(src[i]) : src[i];
}
DrawRectRgb565(dst, width, height, x0, y0, x1, y1, 0xF800);
auto display = dynamic_cast<LvglDisplay*>(Board::GetInstance().GetDisplay());
if (display != nullptr) {
display->SetPreviewImage(std::make_unique<LvglAllocatedImage>(
preview_data, data_size, width, height, width * 2, LV_COLOR_FORMAT_RGB565));
} else {
heap_caps_free(preview_data);
}
}
}
float vertical_edge_sum = 0.0f;
float horizontal_edge_sum = 0.0f;
int samples = 0;
for (int y = y0; y + kEyeSampleStep < y1; y += kEyeSampleStep) {
for (int x = x0; x + kEyeSampleStep < x1; x += kEyeSampleStep) {
uint16_t p = pixels[y * width + x];
uint16_t px = pixels[y * width + x + kEyeSampleStep];
uint16_t py = pixels[(y + kEyeSampleStep) * width + x];
if (swap_bytes_enabled_) {
p = __builtin_bswap16(p);
px = __builtin_bswap16(px);
py = __builtin_bswap16(py);
}
uint8_t l = Rgb565ToLuma(p);
vertical_edge_sum += std::abs(static_cast<int>(l) - static_cast<int>(Rgb565ToLuma(py)));
horizontal_edge_sum += std::abs(static_cast<int>(l) - static_cast<int>(Rgb565ToLuma(px)));
samples++;
}
}
esp_camera_fb_return(fb);
if (samples == 0) {
return false;
}
// Open eyes usually keep more vertical texture in the fixed eye band.
// This is a lightweight central-face heuristic, not a landmark model.
float score = (vertical_edge_sum + horizontal_edge_sum * 0.35f) / samples;
if (eye_openness_baseline_ <= 0.0f) {
eye_openness_baseline_ = score;
} else if (score > eye_openness_baseline_ * 0.85f) {
eye_openness_baseline_ = eye_openness_baseline_ * 0.90f + score * 0.10f;
} else {
eye_openness_baseline_ = eye_openness_baseline_ * 0.995f + score * 0.005f;
}
result.valid = eye_openness_baseline_ > 1.0f;
result.eye_openness_score = score;
result.baseline_score = eye_openness_baseline_;
result.eyes_closed = result.valid && score < eye_openness_baseline_ * kClosedEyeScoreRatio;
ESP_LOGI(TAG, "Drowsiness frame=%dx%d eye_roi=(%d,%d)-(%d,%d) score=%.2f baseline=%.2f closed=%d",
width, height, x0, y0, x1, y1, result.eye_openness_score, result.baseline_score,
result.eyes_closed ? 1 : 0);
return result.valid;
}
std::string Esp32Camera::Explain(const std::string &question) {
if (explain_url_.empty()) {
throw std::runtime_error("Image explain URL or token is not set");