chore: initialize EverOS 1.0.0

md-first memory extraction framework for AI agents.

Markdown is the single source of truth; SQLite holds state and LanceDB
provides the rebuildable vector + BM25 + scalar index. The codebase follows
a single-direction DDD layering (entrypoints -> service -> memory -> infra,
with component / core / config cross-cutting) enforced by import-linter.

Engineering surface:
- Coding conventions in .claude/rules/ (path-scoped) and workflows in
  .claude/skills/ (/commit, /new-branch, /pr).
- GitHub Actions CI runs make lint + test + integration; pre-commit mirrors
  the gates locally (ruff, hygiene hooks, gitlint commit-msg).
- Commit messages follow Conventional Commits, enforced by gitlint.
- make lint also enforces datetime two-zone discipline and OpenAPI drift.
This commit is contained in:
Elliot Chen
2026-06-05 22:35:51 +08:00
commit 518b8eca85
636 changed files with 160553 additions and 0 deletions

View File

@ -0,0 +1,13 @@
"""structlog-based logging factory.
External usage:
from everos.core.observability.logging import get_logger, configure_logging
logger = get_logger(__name__)
logger.info("event_name", key=value)
"""
from .factory import configure_logging as configure_logging
from .factory import get_logger as get_logger
__all__ = ["configure_logging", "get_logger"]

View File

@ -0,0 +1,117 @@
"""structlog logger factory.
Provides ``get_logger(__name__)`` for module-level logger acquisition.
``configure_logging()`` is called once at process startup (run.py / lifespan)
to set up the structlog processor chain and route stdlib logging through
the same formatter so output stays uniform regardless of the caller.
The configuration follows structlog's official "Foreign Log Integration"
recipe: a single ``ProcessorFormatter`` renders both everos's own
``get_logger(...)`` calls and any stdlib ``logging.getLogger(...)`` call
made by third-party libraries (uvicorn, fastapi, httpx, openai, ...).
That way all three of the previously divergent prefixes — ``INFO:``,
``[warning ]``, plus the unconfigured no-prefix output — collapse to
one ``[level] event key=value`` shape.
Rust-side loggers (LanceDB / Lance / Arrow) live in the Rust ``log``
crate and emit straight to stderr without going through Python; this
module cannot reach them. Control their level with ``RUST_LOG`` env.
"""
from __future__ import annotations
import logging
import sys
from typing import Any
import structlog
def get_logger(name: str) -> Any:
"""Return a structlog logger bound to the given module name."""
return structlog.get_logger(name)
def configure_logging(level: str = "INFO") -> None:
"""Configure structlog and stdlib logging once at process startup.
After this call:
* Every ``structlog.get_logger(...)`` and ``logging.getLogger(...)``
message flows through the same ``ProcessorFormatter``, so output
format is identical regardless of which logging API the caller used.
* Root-logger handlers are replaced with a single ``StreamHandler``
pointing at ``sys.stdout``; any previously installed handler
(uvicorn's default ``LOGGING_CONFIG``, libraries that call
``logging.basicConfig``, etc.) is removed.
The ``uvicorn.run(..., log_config=None)`` flag is the matching half
on the server entry point — without it, uvicorn re-installs its own
handlers on every startup and overrides what we set here.
Args:
level: Log level name (``DEBUG`` / ``INFO`` / ``WARNING`` / ``ERROR``).
Unknown names silently fall back to ``INFO`` via
``getattr(logging, ..., INFO)``.
"""
log_level = getattr(logging, level.upper(), logging.INFO)
shared_processors: list[Any] = [
structlog.contextvars.merge_contextvars,
structlog.processors.add_log_level,
structlog.processors.TimeStamper(fmt="iso"),
structlog.processors.StackInfoRenderer(),
]
# structlog's own loggers feed into stdlib's logging, so the root
# logger handler decides where output lands and how it's rendered.
structlog.configure(
processors=[
*shared_processors,
structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
],
wrapper_class=structlog.make_filtering_bound_logger(log_level),
logger_factory=structlog.stdlib.LoggerFactory(),
cache_logger_on_first_use=True,
)
# The single formatter shared by both pipelines:
# * structlog events arrive already wrapped via ``wrap_for_formatter``;
# * foreign records (stdlib LogRecord) get pushed through
# ``foreign_pre_chain`` so they pick up the same level / timestamp
# fields before hitting ``ConsoleRenderer``.
formatter = structlog.stdlib.ProcessorFormatter(
foreign_pre_chain=shared_processors,
processors=[
structlog.stdlib.ProcessorFormatter.remove_processors_meta,
structlog.dev.ConsoleRenderer(),
],
)
handler = logging.StreamHandler(sys.stdout)
handler.setFormatter(formatter)
# Drop any handler we installed on a previous ``configure_logging``
# call (identified by formatter type) so repeated invocations don't
# produce duplicate output, but keep handlers other parties have
# attached — pytest's caplog handler in particular has to survive,
# otherwise tests using the ``caplog`` fixture can't see records
# that flow through structlog.
root = logging.getLogger()
root.handlers = [
h
for h in root.handlers
if not isinstance(h.formatter, structlog.stdlib.ProcessorFormatter)
]
root.addHandler(handler)
root.setLevel(log_level)
# Third-party HTTP clients log every successful request at INFO level —
# `httpx` is the worst offender (one line per call, called once per
# LLM / embedding / rerank request). A single LoCoMo conv run easily
# produces a thousand such lines, drowning everos's own events. They
# are useful for debugging API failures, but failures already surface
# via exceptions + status codes — so demote the success path to WARNING
# and let real errors still come through.
for noisy in ("httpx", "httpcore", "urllib3"):
logging.getLogger(noisy).setLevel(logging.WARNING)

View File

@ -0,0 +1,34 @@
"""Prometheus-style metrics primitives + registry.
External usage:
from everos.core.observability.metrics import (
Counter, Gauge, Histogram, HistogramBuckets,
get_metrics_registry, generate_metrics_response,
)
"""
from .counter import Counter as Counter
from .counter import LabeledCounter as LabeledCounter
from .gauge import Gauge as Gauge
from .gauge import LabeledGauge as LabeledGauge
from .histogram import Histogram as Histogram
from .histogram import HistogramBuckets as HistogramBuckets
from .histogram import LabeledHistogram as LabeledHistogram
from .registry import generate_metrics_response as generate_metrics_response
from .registry import get_metrics_registry as get_metrics_registry
from .registry import reset_metrics_registry as reset_metrics_registry
from .registry import set_metrics_registry as set_metrics_registry
__all__ = [
"Counter",
"Gauge",
"Histogram",
"HistogramBuckets",
"LabeledCounter",
"LabeledGauge",
"LabeledHistogram",
"generate_metrics_response",
"get_metrics_registry",
"reset_metrics_registry",
"set_metrics_registry",
]

View File

@ -0,0 +1,50 @@
"""Counter wrapper around ``prometheus_client.Counter``."""
from __future__ import annotations
from collections.abc import Sequence
from typing import Any
from prometheus_client import Counter as PromCounter
from .registry import get_metrics_registry
class Counter:
"""Monotonically-increasing counter (totals, error counts)."""
def __init__(
self,
name: str,
description: str,
labelnames: Sequence[str] = (),
namespace: str = "",
subsystem: str = "",
unit: str = "",
) -> None:
self._counter = PromCounter(
name=name,
documentation=description,
labelnames=labelnames,
namespace=namespace,
subsystem=subsystem,
unit=unit,
registry=get_metrics_registry(),
)
self._labelnames = tuple(labelnames)
def labels(self, **labels: str) -> LabeledCounter:
return LabeledCounter(self._counter.labels(**labels))
def inc(self, amount: float = 1.0) -> None:
self._counter.inc(amount)
class LabeledCounter:
"""Counter slice with labels applied."""
def __init__(self, labeled: Any) -> None:
self._labeled = labeled
def inc(self, amount: float = 1.0) -> None:
self._labeled.inc(amount)

View File

@ -0,0 +1,66 @@
"""Gauge wrapper around ``prometheus_client.Gauge``.
Async auto-refresh is intentionally not included in v0.1; subclass
:class:`Gauge` and call :meth:`set` from your own scheduling logic when
needed.
"""
from __future__ import annotations
from collections.abc import Sequence
from typing import Any
from prometheus_client import Gauge as PromGauge
from .registry import get_metrics_registry
class Gauge:
"""Instantaneous numeric value (queue depth, cache size)."""
def __init__(
self,
name: str,
description: str,
labelnames: Sequence[str] = (),
namespace: str = "",
subsystem: str = "",
unit: str = "",
) -> None:
self._gauge = PromGauge(
name=name,
documentation=description,
labelnames=labelnames,
namespace=namespace,
subsystem=subsystem,
unit=unit,
registry=get_metrics_registry(),
)
def labels(self, **labels: str) -> LabeledGauge:
return LabeledGauge(self._gauge.labels(**labels))
def set(self, value: float) -> None:
self._gauge.set(value)
def inc(self, amount: float = 1.0) -> None:
self._gauge.inc(amount)
def dec(self, amount: float = 1.0) -> None:
self._gauge.dec(amount)
class LabeledGauge:
"""Gauge slice with labels applied."""
def __init__(self, labeled: Any) -> None:
self._labeled = labeled
def set(self, value: float) -> None:
self._labeled.set(value)
def inc(self, amount: float = 1.0) -> None:
self._labeled.inc(amount)
def dec(self, amount: float = 1.0) -> None:
self._labeled.dec(amount)

View File

@ -0,0 +1,102 @@
"""Histogram wrapper around ``prometheus_client.Histogram``."""
from __future__ import annotations
from collections.abc import Sequence
from typing import Any
from prometheus_client import Histogram as PromHistogram
from .registry import get_metrics_registry
class HistogramBuckets:
"""Predefined bucket configurations for common workloads."""
DEFAULT: tuple[float, ...] = (
0.005,
0.01,
0.025,
0.05,
0.1,
0.25,
0.5,
1.0,
2.5,
5.0,
10.0,
)
FAST: tuple[float, ...] = (0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5)
API_CALL: tuple[float, ...] = (
0.01,
0.05,
0.1,
0.25,
0.5,
1.0,
2.0,
5.0,
10.0,
30.0,
)
BATCH: tuple[float, ...] = (0.1, 0.5, 1.0, 5.0, 10.0, 30.0, 60.0)
DATABASE: tuple[float, ...] = (
0.001,
0.005,
0.01,
0.025,
0.05,
0.1,
0.25,
0.5,
1.0,
2.5,
5.0,
)
class Histogram:
"""Distribution of observed values (latency, sizes)."""
def __init__(
self,
name: str,
description: str,
labelnames: Sequence[str] = (),
namespace: str = "",
subsystem: str = "",
unit: str = "",
buckets: Sequence[float] = HistogramBuckets.DEFAULT,
) -> None:
self._histogram = PromHistogram(
name=name,
documentation=description,
labelnames=labelnames,
namespace=namespace,
subsystem=subsystem,
unit=unit,
buckets=tuple(buckets),
registry=get_metrics_registry(),
)
def labels(self, **labels: str) -> LabeledHistogram:
return LabeledHistogram(self._histogram.labels(**labels))
def observe(self, amount: float) -> None:
self._histogram.observe(amount)
def time(self) -> Any:
return self._histogram.time()
class LabeledHistogram:
"""Histogram slice with labels applied."""
def __init__(self, labeled: Any) -> None:
self._labeled = labeled
def observe(self, amount: float) -> None:
self._labeled.observe(amount)
def time(self) -> Any:
return self._labeled.time()

View File

@ -0,0 +1,35 @@
"""Prometheus metrics registry singleton."""
from __future__ import annotations
from prometheus_client import REGISTRY, CollectorRegistry, generate_latest
_registry: CollectorRegistry | None = None
def get_metrics_registry() -> CollectorRegistry:
"""Return the global metrics registry.
Defaults to ``prometheus_client.REGISTRY``.
"""
global _registry
if _registry is None:
_registry = REGISTRY
return _registry
def set_metrics_registry(registry: CollectorRegistry) -> None:
"""Override the global registry (mainly for tests)."""
global _registry
_registry = registry
def generate_metrics_response() -> bytes:
"""Render the current registry into Prometheus exposition format."""
return generate_latest(get_metrics_registry())
def reset_metrics_registry() -> None:
"""Reset the global registry override (mainly for tests)."""
global _registry
_registry = None

View File

@ -0,0 +1,32 @@
"""Tracing utilities — W3C-compatible request id generation.
External usage::
from everos.core.observability.tracing import gen_request_id
"""
from __future__ import annotations
from uuid import uuid4
def gen_request_id() -> str:
"""Generate a request id matching the W3C trace-context spec.
Returns 32 lowercase hex characters (128-bit, no prefix) — the same
format as a W3C ``trace_id`` / OpenTelemetry trace identifier. Routes
and services that mint a fresh request id (when one wasn't injected
by upstream middleware) should call this helper rather than rolling
their own uuid / prefix format, so the id layer stays compatible
with OpenTelemetry exporters and standard APM tooling.
Example::
>>> rid = gen_request_id()
>>> len(rid)
32
"""
return uuid4().hex
__all__ = ["gen_request_id"]