chore: initialize EverOS 1.0.0

md-first memory extraction framework for AI agents. Markdown is the single source of truth; SQLite holds state and LanceDB provides the rebuildable vector + BM25 + scalar index. The codebase follows a single-direction DDD layering (entrypoints -> service -> memory -> infra, with component / core / config cross-cutting) enforced by import-linter. Engineering surface: - Coding conventions in .claude/rules/ (path-scoped) and workflows in .claude/skills/ (/commit, /new-branch, /pr). - GitHub Actions CI runs make lint + test + integration; pre-commit mirrors the gates locally (ruff, hygiene hooks, gitlint commit-msg). - Commit messages follow Conventional Commits, enforced by gitlint. - make lint also enforces datetime two-zone discipline and OpenAPI drift.
2026-06-05 22:35:51 +08:00
commit 518b8eca85
636 changed files with 160553 additions and 0 deletions
--- a/src/everos/core/observability/init.py
+++ b/src/everos/core/observability/init.py
--- a/src/everos/core/observability/logging/init.py
+++ b/src/everos/core/observability/logging/init.py
@ -0,0 +1,13 @@
+"""structlog-based logging factory.
+
+External usage:
+    from everos.core.observability.logging import get_logger, configure_logging
+
+    logger = get_logger(__name__)
+    logger.info("event_name", key=value)
+"""
+
+from .factory import configure_logging as configure_logging
+from .factory import get_logger as get_logger
+
+__all__ = ["configure_logging", "get_logger"]
--- a/src/everos/core/observability/logging/factory.py
+++ b/src/everos/core/observability/logging/factory.py
@ -0,0 +1,117 @@
+"""structlog logger factory.
+
+Provides ``get_logger(__name__)`` for module-level logger acquisition.
+``configure_logging()`` is called once at process startup (run.py / lifespan)
+to set up the structlog processor chain and route stdlib logging through
+the same formatter so output stays uniform regardless of the caller.
+
+The configuration follows structlog's official "Foreign Log Integration"
+recipe: a single ``ProcessorFormatter`` renders both everos's own
+``get_logger(...)`` calls and any stdlib ``logging.getLogger(...)`` call
+made by third-party libraries (uvicorn, fastapi, httpx, openai, ...).
+That way all three of the previously divergent prefixes — ``INFO:``,
+``[warning  ]``, plus the unconfigured no-prefix output — collapse to
+one ``[level] event key=value`` shape.
+
+Rust-side loggers (LanceDB / Lance / Arrow) live in the Rust ``log``
+crate and emit straight to stderr without going through Python; this
+module cannot reach them. Control their level with ``RUST_LOG`` env.
+"""
+
+from __future__ import annotations
+
+import logging
+import sys
+from typing import Any
+
+import structlog
+
+
+def get_logger(name: str) -> Any:
+    """Return a structlog logger bound to the given module name."""
+    return structlog.get_logger(name)
+
+
+def configure_logging(level: str = "INFO") -> None:
+    """Configure structlog and stdlib logging once at process startup.
+
+    After this call:
+
+    * Every ``structlog.get_logger(...)`` and ``logging.getLogger(...)``
+      message flows through the same ``ProcessorFormatter``, so output
+      format is identical regardless of which logging API the caller used.
+    * Root-logger handlers are replaced with a single ``StreamHandler``
+      pointing at ``sys.stdout``; any previously installed handler
+      (uvicorn's default ``LOGGING_CONFIG``, libraries that call
+      ``logging.basicConfig``, etc.) is removed.
+
+    The ``uvicorn.run(..., log_config=None)`` flag is the matching half
+    on the server entry point — without it, uvicorn re-installs its own
+    handlers on every startup and overrides what we set here.
+
+    Args:
+        level: Log level name (``DEBUG`` / ``INFO`` / ``WARNING`` / ``ERROR``).
+            Unknown names silently fall back to ``INFO`` via
+            ``getattr(logging, ..., INFO)``.
+    """
+    log_level = getattr(logging, level.upper(), logging.INFO)
+
+    shared_processors: list[Any] = [
+        structlog.contextvars.merge_contextvars,
+        structlog.processors.add_log_level,
+        structlog.processors.TimeStamper(fmt="iso"),
+        structlog.processors.StackInfoRenderer(),
+    ]
+
+    # structlog's own loggers feed into stdlib's logging, so the root
+    # logger handler decides where output lands and how it's rendered.
+    structlog.configure(
+        processors=[
+            *shared_processors,
+            structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
+        ],
+        wrapper_class=structlog.make_filtering_bound_logger(log_level),
+        logger_factory=structlog.stdlib.LoggerFactory(),
+        cache_logger_on_first_use=True,
+    )
+
+    # The single formatter shared by both pipelines:
+    # * structlog events arrive already wrapped via ``wrap_for_formatter``;
+    # * foreign records (stdlib LogRecord) get pushed through
+    #   ``foreign_pre_chain`` so they pick up the same level / timestamp
+    #   fields before hitting ``ConsoleRenderer``.
+    formatter = structlog.stdlib.ProcessorFormatter(
+        foreign_pre_chain=shared_processors,
+        processors=[
+            structlog.stdlib.ProcessorFormatter.remove_processors_meta,
+            structlog.dev.ConsoleRenderer(),
+        ],
+    )
+
+    handler = logging.StreamHandler(sys.stdout)
+    handler.setFormatter(formatter)
+
+    # Drop any handler we installed on a previous ``configure_logging``
+    # call (identified by formatter type) so repeated invocations don't
+    # produce duplicate output, but keep handlers other parties have
+    # attached — pytest's caplog handler in particular has to survive,
+    # otherwise tests using the ``caplog`` fixture can't see records
+    # that flow through structlog.
+    root = logging.getLogger()
+    root.handlers = [
+        h
+        for h in root.handlers
+        if not isinstance(h.formatter, structlog.stdlib.ProcessorFormatter)
+    ]
+    root.addHandler(handler)
+    root.setLevel(log_level)
+
+    # Third-party HTTP clients log every successful request at INFO level —
+    # `httpx` is the worst offender (one line per call, called once per
+    # LLM / embedding / rerank request). A single LoCoMo conv run easily
+    # produces a thousand such lines, drowning everos's own events. They
+    # are useful for debugging API failures, but failures already surface
+    # via exceptions + status codes — so demote the success path to WARNING
+    # and let real errors still come through.
+    for noisy in ("httpx", "httpcore", "urllib3"):
+        logging.getLogger(noisy).setLevel(logging.WARNING)
--- a/src/everos/core/observability/metrics/init.py
+++ b/src/everos/core/observability/metrics/init.py
@ -0,0 +1,34 @@
+"""Prometheus-style metrics primitives + registry.
+
+External usage:
+    from everos.core.observability.metrics import (
+        Counter, Gauge, Histogram, HistogramBuckets,
+        get_metrics_registry, generate_metrics_response,
+    )
+"""
+
+from .counter import Counter as Counter
+from .counter import LabeledCounter as LabeledCounter
+from .gauge import Gauge as Gauge
+from .gauge import LabeledGauge as LabeledGauge
+from .histogram import Histogram as Histogram
+from .histogram import HistogramBuckets as HistogramBuckets
+from .histogram import LabeledHistogram as LabeledHistogram
+from .registry import generate_metrics_response as generate_metrics_response
+from .registry import get_metrics_registry as get_metrics_registry
+from .registry import reset_metrics_registry as reset_metrics_registry
+from .registry import set_metrics_registry as set_metrics_registry
+
+__all__ = [
+    "Counter",
+    "Gauge",
+    "Histogram",
+    "HistogramBuckets",
+    "LabeledCounter",
+    "LabeledGauge",
+    "LabeledHistogram",
+    "generate_metrics_response",
+    "get_metrics_registry",
+    "reset_metrics_registry",
+    "set_metrics_registry",
+]
--- a/src/everos/core/observability/metrics/counter.py
+++ b/src/everos/core/observability/metrics/counter.py
@ -0,0 +1,50 @@
+"""Counter wrapper around ``prometheus_client.Counter``."""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+from typing import Any
+
+from prometheus_client import Counter as PromCounter
+
+from .registry import get_metrics_registry
+
+
+class Counter:
+    """Monotonically-increasing counter (totals, error counts)."""
+
+    def __init__(
+        self,
+        name: str,
+        description: str,
+        labelnames: Sequence[str] = (),
+        namespace: str = "",
+        subsystem: str = "",
+        unit: str = "",
+    ) -> None:
+        self._counter = PromCounter(
+            name=name,
+            documentation=description,
+            labelnames=labelnames,
+            namespace=namespace,
+            subsystem=subsystem,
+            unit=unit,
+            registry=get_metrics_registry(),
+        )
+        self._labelnames = tuple(labelnames)
+
+    def labels(self, **labels: str) -> LabeledCounter:
+        return LabeledCounter(self._counter.labels(**labels))
+
+    def inc(self, amount: float = 1.0) -> None:
+        self._counter.inc(amount)
+
+
+class LabeledCounter:
+    """Counter slice with labels applied."""
+
+    def __init__(self, labeled: Any) -> None:
+        self._labeled = labeled
+
+    def inc(self, amount: float = 1.0) -> None:
+        self._labeled.inc(amount)
--- a/src/everos/core/observability/metrics/gauge.py
+++ b/src/everos/core/observability/metrics/gauge.py
@ -0,0 +1,66 @@
+"""Gauge wrapper around ``prometheus_client.Gauge``.
+
+Async auto-refresh is intentionally not included in v0.1; subclass
+:class:`Gauge` and call :meth:`set` from your own scheduling logic when
+needed.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+from typing import Any
+
+from prometheus_client import Gauge as PromGauge
+
+from .registry import get_metrics_registry
+
+
+class Gauge:
+    """Instantaneous numeric value (queue depth, cache size)."""
+
+    def __init__(
+        self,
+        name: str,
+        description: str,
+        labelnames: Sequence[str] = (),
+        namespace: str = "",
+        subsystem: str = "",
+        unit: str = "",
+    ) -> None:
+        self._gauge = PromGauge(
+            name=name,
+            documentation=description,
+            labelnames=labelnames,
+            namespace=namespace,
+            subsystem=subsystem,
+            unit=unit,
+            registry=get_metrics_registry(),
+        )
+
+    def labels(self, **labels: str) -> LabeledGauge:
+        return LabeledGauge(self._gauge.labels(**labels))
+
+    def set(self, value: float) -> None:
+        self._gauge.set(value)
+
+    def inc(self, amount: float = 1.0) -> None:
+        self._gauge.inc(amount)
+
+    def dec(self, amount: float = 1.0) -> None:
+        self._gauge.dec(amount)
+
+
+class LabeledGauge:
+    """Gauge slice with labels applied."""
+
+    def __init__(self, labeled: Any) -> None:
+        self._labeled = labeled
+
+    def set(self, value: float) -> None:
+        self._labeled.set(value)
+
+    def inc(self, amount: float = 1.0) -> None:
+        self._labeled.inc(amount)
+
+    def dec(self, amount: float = 1.0) -> None:
+        self._labeled.dec(amount)
--- a/src/everos/core/observability/metrics/histogram.py
+++ b/src/everos/core/observability/metrics/histogram.py
@ -0,0 +1,102 @@
+"""Histogram wrapper around ``prometheus_client.Histogram``."""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+from typing import Any
+
+from prometheus_client import Histogram as PromHistogram
+
+from .registry import get_metrics_registry
+
+
+class HistogramBuckets:
+    """Predefined bucket configurations for common workloads."""
+
+    DEFAULT: tuple[float, ...] = (
+        0.005,
+        0.01,
+        0.025,
+        0.05,
+        0.1,
+        0.25,
+        0.5,
+        1.0,
+        2.5,
+        5.0,
+        10.0,
+    )
+    FAST: tuple[float, ...] = (0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5)
+    API_CALL: tuple[float, ...] = (
+        0.01,
+        0.05,
+        0.1,
+        0.25,
+        0.5,
+        1.0,
+        2.0,
+        5.0,
+        10.0,
+        30.0,
+    )
+    BATCH: tuple[float, ...] = (0.1, 0.5, 1.0, 5.0, 10.0, 30.0, 60.0)
+    DATABASE: tuple[float, ...] = (
+        0.001,
+        0.005,
+        0.01,
+        0.025,
+        0.05,
+        0.1,
+        0.25,
+        0.5,
+        1.0,
+        2.5,
+        5.0,
+    )
+
+
+class Histogram:
+    """Distribution of observed values (latency, sizes)."""
+
+    def __init__(
+        self,
+        name: str,
+        description: str,
+        labelnames: Sequence[str] = (),
+        namespace: str = "",
+        subsystem: str = "",
+        unit: str = "",
+        buckets: Sequence[float] = HistogramBuckets.DEFAULT,
+    ) -> None:
+        self._histogram = PromHistogram(
+            name=name,
+            documentation=description,
+            labelnames=labelnames,
+            namespace=namespace,
+            subsystem=subsystem,
+            unit=unit,
+            buckets=tuple(buckets),
+            registry=get_metrics_registry(),
+        )
+
+    def labels(self, **labels: str) -> LabeledHistogram:
+        return LabeledHistogram(self._histogram.labels(**labels))
+
+    def observe(self, amount: float) -> None:
+        self._histogram.observe(amount)
+
+    def time(self) -> Any:
+        return self._histogram.time()
+
+
+class LabeledHistogram:
+    """Histogram slice with labels applied."""
+
+    def __init__(self, labeled: Any) -> None:
+        self._labeled = labeled
+
+    def observe(self, amount: float) -> None:
+        self._labeled.observe(amount)
+
+    def time(self) -> Any:
+        return self._labeled.time()
--- a/src/everos/core/observability/metrics/registry.py
+++ b/src/everos/core/observability/metrics/registry.py
@ -0,0 +1,35 @@
+"""Prometheus metrics registry singleton."""
+
+from __future__ import annotations
+
+from prometheus_client import REGISTRY, CollectorRegistry, generate_latest
+
+_registry: CollectorRegistry | None = None
+
+
+def get_metrics_registry() -> CollectorRegistry:
+    """Return the global metrics registry.
+
+    Defaults to ``prometheus_client.REGISTRY``.
+    """
+    global _registry
+    if _registry is None:
+        _registry = REGISTRY
+    return _registry
+
+
+def set_metrics_registry(registry: CollectorRegistry) -> None:
+    """Override the global registry (mainly for tests)."""
+    global _registry
+    _registry = registry
+
+
+def generate_metrics_response() -> bytes:
+    """Render the current registry into Prometheus exposition format."""
+    return generate_latest(get_metrics_registry())
+
+
+def reset_metrics_registry() -> None:
+    """Reset the global registry override (mainly for tests)."""
+    global _registry
+    _registry = None
--- a/src/everos/core/observability/tracing/init.py
+++ b/src/everos/core/observability/tracing/init.py
@ -0,0 +1,32 @@
+"""Tracing utilities — W3C-compatible request id generation.
+
+External usage::
+
+    from everos.core.observability.tracing import gen_request_id
+"""
+
+from __future__ import annotations
+
+from uuid import uuid4
+
+
+def gen_request_id() -> str:
+    """Generate a request id matching the W3C trace-context spec.
+
+    Returns 32 lowercase hex characters (128-bit, no prefix) — the same
+    format as a W3C ``trace_id`` / OpenTelemetry trace identifier. Routes
+    and services that mint a fresh request id (when one wasn't injected
+    by upstream middleware) should call this helper rather than rolling
+    their own uuid / prefix format, so the id layer stays compatible
+    with OpenTelemetry exporters and standard APM tooling.
+
+    Example::
+
+        >>> rid = gen_request_id()
+        >>> len(rid)
+        32
+    """
+    return uuid4().hex
+
+
+__all__ = ["gen_request_id"]