chore: initialize EverOS 1.0.0

md-first memory extraction framework for AI agents.

Markdown is the single source of truth; SQLite holds state and LanceDB
provides the rebuildable vector + BM25 + scalar index. The codebase follows
a single-direction DDD layering (entrypoints -> service -> memory -> infra,
with component / core / config cross-cutting) enforced by import-linter.

Engineering surface:
- Coding conventions in .claude/rules/ (path-scoped) and workflows in
  .claude/skills/ (/commit, /new-branch, /pr).
- GitHub Actions CI runs make lint + test + integration; pre-commit mirrors
  the gates locally (ruff, hygiene hooks, gitlint commit-msg).
- Commit messages follow Conventional Commits, enforced by gitlint.
- make lint also enforces datetime two-zone discipline and OpenAPI drift.
This commit is contained in:
Elliot Chen
2026-06-05 22:35:51 +08:00
commit 518b8eca85
636 changed files with 160553 additions and 0 deletions

350
scripts/check_consistency.py Executable file
View File

@ -0,0 +1,350 @@
#!/usr/bin/env python
"""Check md ↔ LanceDB consistency for an everos corpus.
Three checks per kind:
1. id set equality — md entry ids == LanceDB row entry_ids
2. content_sha256 equality — every shared id matches on both sides
3. id monotonicity (md-only) — within each daily-log md, the numeric
counter at the end of entry.id ascends
from 1 with no gap and no dupe
Two modes:
--mode lifespan (default) Full strict check through the everos app
lifespan stack (sqlite + lance + cascade +
ome). Safe ONLY on an idle corpus (no live
server writing). Covers every kind in
KIND_REGISTRY.
--mode readonly Bypass the lifespan stack, open LanceDB with
a fresh read connection, read md directly.
Safe even on an active corpus, but only
covers the three daily-log kinds (episode /
atomic_fact / foresight).
Examples:
scripts/check_consistency.py ~/.everos-locomo-all-kv-fast
scripts/check_consistency.py ~/.everos-locomo-all-kv-fast --mode readonly
scripts/check_consistency.py ~/.everos-locomo-all-kv-fast --owners joanna,nate
"""
# This script must mutate sys.path before importing everos/tests, and
# uses synchronous pathlib because it's a one-shot CLI, not server code.
# ruff: noqa: E402, ASYNC240
from __future__ import annotations
import argparse
import asyncio
import dataclasses
import os
import re
import sys
from collections import Counter
from pathlib import Path
ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(ROOT))
sys.path.insert(0, str(ROOT / "src"))
from dotenv import load_dotenv
load_dotenv(ROOT / ".env")
# ── shared: id counter parsing ──────────────────────────────────────────
_ID_NUM_RE = re.compile(r"_(\d+)$")
def _entry_counter(entry_id: str) -> int | None:
m = _ID_NUM_RE.search(entry_id)
return int(m.group(1)) if m else None
@dataclasses.dataclass
class MonotonicityReport:
path: str
total: int
not_sorted: bool
starts_at_1: bool
gaps: list[int]
dupes: list[int]
bad_format: list[str]
@property
def ok(self) -> bool:
return self.total == 0 or (
not self.not_sorted
and self.starts_at_1
and not self.gaps
and not self.dupes
and not self.bad_format
)
async def _scan_monotonicity(corpus: Path) -> list[MonotonicityReport]:
"""Walk all daily-log md files; report id-counter monotonicity per file."""
from everos.core.persistence import MarkdownReader
daily_dirs = ("/episodes/", "/.atomic_facts/", "/.foresights/", "/.agent_cases/")
reports: list[MonotonicityReport] = []
for md in sorted(corpus.rglob("*.md")):
rel = md.relative_to(corpus).as_posix()
if not (rel.startswith("users/") or rel.startswith("agents/")):
continue
if not any(d in "/" + rel for d in daily_dirs):
continue
parsed = await MarkdownReader.read(md)
counters: list[int] = []
bad_format: list[str] = []
for entry in parsed.entries:
c = _entry_counter(entry.id)
if c is None:
bad_format.append(entry.id)
else:
counters.append(c)
not_sorted = counters != sorted(counters)
starts_at_1 = bool(counters) and min(counters) == 1
gaps: list[int] = []
dupes: list[int] = []
if counters:
seen = set(counters)
for i in range(1, max(counters) + 1):
if i not in seen:
gaps.append(i)
cc = Counter(counters)
dupes = sorted(v for v, n in cc.items() if n > 1)
reports.append(
MonotonicityReport(
path=rel,
total=len(parsed.entries),
not_sorted=not_sorted,
starts_at_1=starts_at_1 if parsed.entries else True,
gaps=gaps,
dupes=dupes,
bad_format=bad_format,
)
)
return reports
def _print_monotonicity(reports: list[MonotonicityReport]) -> int:
issues = sum(1 for r in reports if not r.ok)
if issues == 0:
print(
f" all {len(reports)} daily-log md files have strictly ascending"
" ids from 1"
)
return 0
print(f"{issues}/{len(reports)} md files have id-counter issues:")
for r in reports:
if r.ok:
continue
problems = []
if r.not_sorted:
problems.append("not-sorted")
if not r.starts_at_1 and r.total:
problems.append("not-from-1")
if r.gaps:
preview = r.gaps[:5]
problems.append(f"gaps={preview}{'...' if len(r.gaps) > 5 else ''}")
if r.dupes:
problems.append(f"dupes={r.dupes}")
if r.bad_format:
problems.append(f"bad-format×{len(r.bad_format)}")
print(f" {r.path}: total={r.total} {' '.join(problems)}")
return issues
# ── mode: lifespan ──────────────────────────────────────────────────────
async def run_lifespan_mode(corpus: Path) -> int:
"""Full strict check via app lifespan; covers every kind in KIND_REGISTRY."""
os.environ["EVEROS_MEMORY__ROOT"] = str(corpus)
from everos.config import load_settings
load_settings.cache_clear()
from everos.entrypoints.api.app import create_app
from tests._consistency_assertions import assert_md_lance_strict_consistent
app = create_app()
rc = 0
async with app.router.lifespan_context(app):
# 1+2. id set + sha
print("─── md ↔ LanceDB strict consistency ───")
try:
stats = await assert_md_lance_strict_consistent(corpus)
print(" PASS")
except AssertionError as e:
print(f" DRIFT:\n{e}")
rc = 1
stats = None
if stats is not None:
print()
print(
f" {'kind':<15s} {'md_files':>10s}"
f" {'md_entries':>12s} {'lance_rows':>12s}"
)
print(" " + "" * 53)
for kind, s in stats.items():
print(
f" {kind:<15s} {s.md_file_count:>10d}"
f" {s.md_entry_count:>12d} {s.lance_row_count:>12d}"
)
# 3. id monotonicity
print()
print("─── id monotonicity ───")
reports = await _scan_monotonicity(corpus)
if _print_monotonicity(reports) > 0:
rc = max(rc, 2)
return rc
# ── mode: readonly ──────────────────────────────────────────────────────
async def run_readonly_mode(corpus: Path, owners_filter: list[str] | None) -> int:
"""Direct LanceDB read + md read; no lifespan / cascade / ome started.
Covers the three daily-log kinds; agent_case + user_profile + agent_skill
are NOT checked in this mode (use --mode lifespan on an idle corpus
snapshot for full coverage).
"""
import lancedb
from everos.core.persistence import MarkdownReader
from everos.memory.cascade.handlers.atomic_fact import AtomicFactHandler
from everos.memory.cascade.handlers.episode import EpisodeHandler
from everos.memory.cascade.handlers.foresight import ForesightHandler
from tests._consistency_assertions import _daily_log_sha_for_entry
db = lancedb.connect(str(corpus / ".index" / "lancedb"))
kinds = [
("episode", "episodes", "episode-", EpisodeHandler),
("atomic_fact", ".atomic_facts", "atomic_fact-", AtomicFactHandler),
("foresight", ".foresights", "foresight-", ForesightHandler),
]
# Pick owners
if owners_filter:
owners = owners_filter
else:
owners = (
sorted(p.name for p in (corpus / "users").iterdir() if p.is_dir())
if (corpus / "users").exists()
else []
)
print("─── md ↔ LanceDB consistency (readonly) ───")
rc = 0
for table_name, dir_name, prefix, handler_cls in kinds:
try:
table = db.open_table(table_name)
except FileNotFoundError:
print(f" {table_name}: table not in lancedb (skip)")
continue
for owner in owners:
md_dir = corpus / "users" / owner / dir_name
if not md_dir.exists():
continue
md_files = sorted(md_dir.glob(f"{prefix}*.md"))
md_sha_total: dict[str, str] = {}
for md in md_files:
parsed = await MarkdownReader.read(md)
for entry in parsed.entries:
md_sha_total[entry.id] = _daily_log_sha_for_entry(
handler_cls, entry.as_structured()
)
arr = (
table.search().where(f"owner_id = '{owner}'").limit(100_000).to_arrow()
)
lance_sha = dict(
zip(
arr["entry_id"].to_pylist(),
arr["content_sha256"].to_pylist(),
strict=True,
)
)
only_md = sorted(set(md_sha_total) - set(lance_sha))
only_lance = sorted(set(lance_sha) - set(md_sha_total))
sha_mismatch = sorted(
k
for k in set(md_sha_total) & set(lance_sha)
if md_sha_total[k] != lance_sha[k]
)
ok = not (only_md or only_lance or sha_mismatch)
status = "OK" if ok else "DRIFT"
if not ok:
rc = 1
print(
f" {table_name:<12s} owner={owner:<12s}"
f" md={len(md_sha_total):5d} lance={len(lance_sha):5d}"
f" {status}"
)
if only_md:
print(f" only_in_md (first 5): {only_md[:5]}")
if only_lance:
print(f" only_in_lance (first 5): {only_lance[:5]}")
if sha_mismatch:
print(f" sha_mismatch (first 5): {sha_mismatch[:5]}")
# id monotonicity (md-only, owner-filtered if provided)
print()
print("─── id monotonicity ───")
reports = await _scan_monotonicity(corpus)
if owners_filter:
owner_paths = tuple(f"users/{o}/" for o in owners_filter)
reports = [r for r in reports if any(r.path.startswith(p) for p in owner_paths)]
if _print_monotonicity(reports) > 0:
rc = max(rc, 2)
return rc
# ── main ────────────────────────────────────────────────────────────────
def _parse_args() -> argparse.Namespace:
p = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
)
p.add_argument("corpus", help="memory root (e.g. ~/.everos-locomo-all-kv-fast)")
p.add_argument(
"--mode",
choices=("lifespan", "readonly"),
default="lifespan",
help="lifespan = full strict check (idle corpus only); "
"readonly = direct lance read (safe on active corpus)",
)
p.add_argument(
"--owners",
help="comma-separated owner filter (readonly mode only)",
)
return p.parse_args()
async def main() -> int:
args = _parse_args()
corpus = Path(args.corpus).expanduser().resolve()
if not corpus.exists():
print(f"ERROR: corpus does not exist: {corpus}")
return 1
owners = (
[o.strip() for o in args.owners.split(",") if o.strip()]
if args.owners
else None
)
print(f"corpus: {corpus}")
print(f"mode: {args.mode}")
if owners:
print(f"owners: {owners}")
print()
if args.mode == "lifespan":
return await run_lifespan_mode(corpus)
return await run_readonly_mode(corpus, owners)
if __name__ == "__main__":
sys.exit(asyncio.run(main()))