chore: initialize EverOS 1.0.0
md-first memory extraction framework for AI agents. Markdown is the single source of truth; SQLite holds state and LanceDB provides the rebuildable vector + BM25 + scalar index. The codebase follows a single-direction DDD layering (entrypoints -> service -> memory -> infra, with component / core / config cross-cutting) enforced by import-linter. Engineering surface: - Coding conventions in .claude/rules/ (path-scoped) and workflows in .claude/skills/ (/commit, /new-branch, /pr). - GitHub Actions CI runs make lint + test + integration; pre-commit mirrors the gates locally (ruff, hygiene hooks, gitlint commit-msg). - Commit messages follow Conventional Commits, enforced by gitlint. - make lint also enforces datetime two-zone discipline and OpenAPI drift.
This commit is contained in:
93
scripts/check_cjk.py
Normal file
93
scripts/check_cjk.py
Normal file
@ -0,0 +1,93 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Scan tracked text files for CJK characters outside the language-policy allowlist.
|
||||
|
||||
Replaces the grep-based reference command that used to live in
|
||||
``.claude/rules/language-policy.md``. That command silently produced false
|
||||
negatives on this repo: the ``grep -zZv`` + ``xargs -0`` NUL pipeline
|
||||
mis-parsed the path list and reported "clean" even when violations existed.
|
||||
|
||||
Exit code 0 = clean, 1 = violations found (paths + line numbers printed).
|
||||
|
||||
Usage:
|
||||
python scripts/check_cjk.py # scan all tracked files
|
||||
python scripts/check_cjk.py a.py b.md # scan specific files (pre-commit)
|
||||
python scripts/check_cjk.py --quiet # per-file counts only
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
# CJK / fullwidth code points: CJK symbols & ideographs, Hangul syllables,
|
||||
# and halfwidth/fullwidth forms. Kept as escapes so this file stays ASCII.
|
||||
_CJK = re.compile("[\\u3000-\\u9fff\\uac00-\\ud7af\\uff00-\\uffef]")
|
||||
|
||||
|
||||
def _is_allowlisted(path: str) -> bool:
|
||||
"""Return True if CJK is permitted in this path (see language-policy.md)."""
|
||||
name = os.path.basename(path)
|
||||
# 1. Tests: fixtures, sample inputs, and CJK-behavior assertions.
|
||||
if path.startswith("tests/"):
|
||||
return True
|
||||
# 2. Tokenizer NLP resources (stopword lists, segmentation examples).
|
||||
if path.startswith("src/everos/component/tokenizer/"):
|
||||
return True
|
||||
# 3. Locale-suffixed sample data, e.g. data/solo_chat_zh.json.
|
||||
if re.match(r"data/.*_(zh|ja|ko)\.", path):
|
||||
return True
|
||||
# 4. Translated doc mirrors, e.g. README.zh.md.
|
||||
if re.search(r"\.(zh|ja|ko)\.md$", path):
|
||||
return True
|
||||
# 5. Filenames explicitly marked with a CJK/locale token.
|
||||
return bool(re.search(r"(^|[._-])(cjk|zh|ja|ko)([._-]|$)", name))
|
||||
|
||||
|
||||
def _tracked_files() -> list[str]:
|
||||
out = subprocess.check_output(["git", "ls-files"], text=True)
|
||||
return out.splitlines()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description="CJK language-policy scanner.")
|
||||
parser.add_argument("files", nargs="*", help="files to scan (default: all tracked)")
|
||||
parser.add_argument("--quiet", action="store_true", help="per-file counts only")
|
||||
args = parser.parse_args()
|
||||
|
||||
paths = args.files or _tracked_files()
|
||||
violations: list[tuple[str, int, str]] = []
|
||||
for path in paths:
|
||||
if _is_allowlisted(path):
|
||||
continue
|
||||
try:
|
||||
with open(path, encoding="utf-8") as fh:
|
||||
lines = fh.readlines()
|
||||
except (UnicodeDecodeError, FileNotFoundError, IsADirectoryError):
|
||||
continue # binary / missing / directory: nothing to scan
|
||||
for i, line in enumerate(lines, start=1):
|
||||
if _CJK.search(line):
|
||||
violations.append((path, i, line.strip()))
|
||||
|
||||
if not violations:
|
||||
print("CJK language-policy: clean")
|
||||
return 0
|
||||
|
||||
by_file: dict[str, int] = {}
|
||||
for path, _lineno, _text in violations:
|
||||
by_file[path] = by_file.get(path, 0) + 1
|
||||
|
||||
print(f"CJK language-policy: {len(violations)} hit(s) in {len(by_file)} file(s)\n")
|
||||
if args.quiet:
|
||||
for path, count in sorted(by_file.items(), key=lambda kv: -kv[1]):
|
||||
print(f" {count:4d} {path}")
|
||||
else:
|
||||
for path, lineno, text in violations:
|
||||
print(f" {path}:{lineno}: {text[:100]}")
|
||||
print("\nAllowed CJK locations are defined in .claude/rules/language-policy.md")
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
350
scripts/check_consistency.py
Executable file
350
scripts/check_consistency.py
Executable file
@ -0,0 +1,350 @@
|
||||
#!/usr/bin/env python
|
||||
"""Check md ↔ LanceDB consistency for an everos corpus.
|
||||
|
||||
Three checks per kind:
|
||||
1. id set equality — md entry ids == LanceDB row entry_ids
|
||||
2. content_sha256 equality — every shared id matches on both sides
|
||||
3. id monotonicity (md-only) — within each daily-log md, the numeric
|
||||
counter at the end of entry.id ascends
|
||||
from 1 with no gap and no dupe
|
||||
|
||||
Two modes:
|
||||
--mode lifespan (default) Full strict check through the everos app
|
||||
lifespan stack (sqlite + lance + cascade +
|
||||
ome). Safe ONLY on an idle corpus (no live
|
||||
server writing). Covers every kind in
|
||||
KIND_REGISTRY.
|
||||
--mode readonly Bypass the lifespan stack, open LanceDB with
|
||||
a fresh read connection, read md directly.
|
||||
Safe even on an active corpus, but only
|
||||
covers the three daily-log kinds (episode /
|
||||
atomic_fact / foresight).
|
||||
|
||||
Examples:
|
||||
scripts/check_consistency.py ~/.everos-locomo-all-kv-fast
|
||||
scripts/check_consistency.py ~/.everos-locomo-all-kv-fast --mode readonly
|
||||
scripts/check_consistency.py ~/.everos-locomo-all-kv-fast --owners joanna,nate
|
||||
"""
|
||||
# This script must mutate sys.path before importing everos/tests, and
|
||||
# uses synchronous pathlib because it's a one-shot CLI, not server code.
|
||||
# ruff: noqa: E402, ASYNC240
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import dataclasses
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from collections import Counter
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).resolve().parent.parent
|
||||
sys.path.insert(0, str(ROOT))
|
||||
sys.path.insert(0, str(ROOT / "src"))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(ROOT / ".env")
|
||||
|
||||
|
||||
# ── shared: id counter parsing ──────────────────────────────────────────
|
||||
|
||||
_ID_NUM_RE = re.compile(r"_(\d+)$")
|
||||
|
||||
|
||||
def _entry_counter(entry_id: str) -> int | None:
|
||||
m = _ID_NUM_RE.search(entry_id)
|
||||
return int(m.group(1)) if m else None
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class MonotonicityReport:
|
||||
path: str
|
||||
total: int
|
||||
not_sorted: bool
|
||||
starts_at_1: bool
|
||||
gaps: list[int]
|
||||
dupes: list[int]
|
||||
bad_format: list[str]
|
||||
|
||||
@property
|
||||
def ok(self) -> bool:
|
||||
return self.total == 0 or (
|
||||
not self.not_sorted
|
||||
and self.starts_at_1
|
||||
and not self.gaps
|
||||
and not self.dupes
|
||||
and not self.bad_format
|
||||
)
|
||||
|
||||
|
||||
async def _scan_monotonicity(corpus: Path) -> list[MonotonicityReport]:
|
||||
"""Walk all daily-log md files; report id-counter monotonicity per file."""
|
||||
from everos.core.persistence import MarkdownReader
|
||||
|
||||
daily_dirs = ("/episodes/", "/.atomic_facts/", "/.foresights/", "/.agent_cases/")
|
||||
reports: list[MonotonicityReport] = []
|
||||
for md in sorted(corpus.rglob("*.md")):
|
||||
rel = md.relative_to(corpus).as_posix()
|
||||
if not (rel.startswith("users/") or rel.startswith("agents/")):
|
||||
continue
|
||||
if not any(d in "/" + rel for d in daily_dirs):
|
||||
continue
|
||||
parsed = await MarkdownReader.read(md)
|
||||
counters: list[int] = []
|
||||
bad_format: list[str] = []
|
||||
for entry in parsed.entries:
|
||||
c = _entry_counter(entry.id)
|
||||
if c is None:
|
||||
bad_format.append(entry.id)
|
||||
else:
|
||||
counters.append(c)
|
||||
not_sorted = counters != sorted(counters)
|
||||
starts_at_1 = bool(counters) and min(counters) == 1
|
||||
gaps: list[int] = []
|
||||
dupes: list[int] = []
|
||||
if counters:
|
||||
seen = set(counters)
|
||||
for i in range(1, max(counters) + 1):
|
||||
if i not in seen:
|
||||
gaps.append(i)
|
||||
cc = Counter(counters)
|
||||
dupes = sorted(v for v, n in cc.items() if n > 1)
|
||||
reports.append(
|
||||
MonotonicityReport(
|
||||
path=rel,
|
||||
total=len(parsed.entries),
|
||||
not_sorted=not_sorted,
|
||||
starts_at_1=starts_at_1 if parsed.entries else True,
|
||||
gaps=gaps,
|
||||
dupes=dupes,
|
||||
bad_format=bad_format,
|
||||
)
|
||||
)
|
||||
return reports
|
||||
|
||||
|
||||
def _print_monotonicity(reports: list[MonotonicityReport]) -> int:
|
||||
issues = sum(1 for r in reports if not r.ok)
|
||||
if issues == 0:
|
||||
print(
|
||||
f" all {len(reports)} daily-log md files have strictly ascending"
|
||||
" ids from 1"
|
||||
)
|
||||
return 0
|
||||
print(f" ⚠ {issues}/{len(reports)} md files have id-counter issues:")
|
||||
for r in reports:
|
||||
if r.ok:
|
||||
continue
|
||||
problems = []
|
||||
if r.not_sorted:
|
||||
problems.append("not-sorted")
|
||||
if not r.starts_at_1 and r.total:
|
||||
problems.append("not-from-1")
|
||||
if r.gaps:
|
||||
preview = r.gaps[:5]
|
||||
problems.append(f"gaps={preview}{'...' if len(r.gaps) > 5 else ''}")
|
||||
if r.dupes:
|
||||
problems.append(f"dupes={r.dupes}")
|
||||
if r.bad_format:
|
||||
problems.append(f"bad-format×{len(r.bad_format)}")
|
||||
print(f" {r.path}: total={r.total} {' '.join(problems)}")
|
||||
return issues
|
||||
|
||||
|
||||
# ── mode: lifespan ──────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def run_lifespan_mode(corpus: Path) -> int:
|
||||
"""Full strict check via app lifespan; covers every kind in KIND_REGISTRY."""
|
||||
os.environ["EVEROS_MEMORY__ROOT"] = str(corpus)
|
||||
from everos.config import load_settings
|
||||
|
||||
load_settings.cache_clear()
|
||||
|
||||
from everos.entrypoints.api.app import create_app
|
||||
from tests._consistency_assertions import assert_md_lance_strict_consistent
|
||||
|
||||
app = create_app()
|
||||
rc = 0
|
||||
async with app.router.lifespan_context(app):
|
||||
# 1+2. id set + sha
|
||||
print("─── md ↔ LanceDB strict consistency ───")
|
||||
try:
|
||||
stats = await assert_md_lance_strict_consistent(corpus)
|
||||
print(" PASS")
|
||||
except AssertionError as e:
|
||||
print(f" DRIFT:\n{e}")
|
||||
rc = 1
|
||||
stats = None
|
||||
|
||||
if stats is not None:
|
||||
print()
|
||||
print(
|
||||
f" {'kind':<15s} {'md_files':>10s}"
|
||||
f" {'md_entries':>12s} {'lance_rows':>12s}"
|
||||
)
|
||||
print(" " + "─" * 53)
|
||||
for kind, s in stats.items():
|
||||
print(
|
||||
f" {kind:<15s} {s.md_file_count:>10d}"
|
||||
f" {s.md_entry_count:>12d} {s.lance_row_count:>12d}"
|
||||
)
|
||||
|
||||
# 3. id monotonicity
|
||||
print()
|
||||
print("─── id monotonicity ───")
|
||||
reports = await _scan_monotonicity(corpus)
|
||||
if _print_monotonicity(reports) > 0:
|
||||
rc = max(rc, 2)
|
||||
return rc
|
||||
|
||||
|
||||
# ── mode: readonly ──────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def run_readonly_mode(corpus: Path, owners_filter: list[str] | None) -> int:
|
||||
"""Direct LanceDB read + md read; no lifespan / cascade / ome started.
|
||||
|
||||
Covers the three daily-log kinds; agent_case + user_profile + agent_skill
|
||||
are NOT checked in this mode (use --mode lifespan on an idle corpus
|
||||
snapshot for full coverage).
|
||||
"""
|
||||
import lancedb
|
||||
|
||||
from everos.core.persistence import MarkdownReader
|
||||
from everos.memory.cascade.handlers.atomic_fact import AtomicFactHandler
|
||||
from everos.memory.cascade.handlers.episode import EpisodeHandler
|
||||
from everos.memory.cascade.handlers.foresight import ForesightHandler
|
||||
from tests._consistency_assertions import _daily_log_sha_for_entry
|
||||
|
||||
db = lancedb.connect(str(corpus / ".index" / "lancedb"))
|
||||
|
||||
kinds = [
|
||||
("episode", "episodes", "episode-", EpisodeHandler),
|
||||
("atomic_fact", ".atomic_facts", "atomic_fact-", AtomicFactHandler),
|
||||
("foresight", ".foresights", "foresight-", ForesightHandler),
|
||||
]
|
||||
|
||||
# Pick owners
|
||||
if owners_filter:
|
||||
owners = owners_filter
|
||||
else:
|
||||
owners = (
|
||||
sorted(p.name for p in (corpus / "users").iterdir() if p.is_dir())
|
||||
if (corpus / "users").exists()
|
||||
else []
|
||||
)
|
||||
|
||||
print("─── md ↔ LanceDB consistency (readonly) ───")
|
||||
rc = 0
|
||||
for table_name, dir_name, prefix, handler_cls in kinds:
|
||||
try:
|
||||
table = db.open_table(table_name)
|
||||
except FileNotFoundError:
|
||||
print(f" {table_name}: table not in lancedb (skip)")
|
||||
continue
|
||||
for owner in owners:
|
||||
md_dir = corpus / "users" / owner / dir_name
|
||||
if not md_dir.exists():
|
||||
continue
|
||||
md_files = sorted(md_dir.glob(f"{prefix}*.md"))
|
||||
md_sha_total: dict[str, str] = {}
|
||||
for md in md_files:
|
||||
parsed = await MarkdownReader.read(md)
|
||||
for entry in parsed.entries:
|
||||
md_sha_total[entry.id] = _daily_log_sha_for_entry(
|
||||
handler_cls, entry.as_structured()
|
||||
)
|
||||
arr = (
|
||||
table.search().where(f"owner_id = '{owner}'").limit(100_000).to_arrow()
|
||||
)
|
||||
lance_sha = dict(
|
||||
zip(
|
||||
arr["entry_id"].to_pylist(),
|
||||
arr["content_sha256"].to_pylist(),
|
||||
strict=True,
|
||||
)
|
||||
)
|
||||
only_md = sorted(set(md_sha_total) - set(lance_sha))
|
||||
only_lance = sorted(set(lance_sha) - set(md_sha_total))
|
||||
sha_mismatch = sorted(
|
||||
k
|
||||
for k in set(md_sha_total) & set(lance_sha)
|
||||
if md_sha_total[k] != lance_sha[k]
|
||||
)
|
||||
ok = not (only_md or only_lance or sha_mismatch)
|
||||
status = "OK" if ok else "DRIFT"
|
||||
if not ok:
|
||||
rc = 1
|
||||
print(
|
||||
f" {table_name:<12s} owner={owner:<12s}"
|
||||
f" md={len(md_sha_total):5d} lance={len(lance_sha):5d}"
|
||||
f" {status}"
|
||||
)
|
||||
if only_md:
|
||||
print(f" only_in_md (first 5): {only_md[:5]}")
|
||||
if only_lance:
|
||||
print(f" only_in_lance (first 5): {only_lance[:5]}")
|
||||
if sha_mismatch:
|
||||
print(f" sha_mismatch (first 5): {sha_mismatch[:5]}")
|
||||
|
||||
# id monotonicity (md-only, owner-filtered if provided)
|
||||
print()
|
||||
print("─── id monotonicity ───")
|
||||
reports = await _scan_monotonicity(corpus)
|
||||
if owners_filter:
|
||||
owner_paths = tuple(f"users/{o}/" for o in owners_filter)
|
||||
reports = [r for r in reports if any(r.path.startswith(p) for p in owner_paths)]
|
||||
if _print_monotonicity(reports) > 0:
|
||||
rc = max(rc, 2)
|
||||
return rc
|
||||
|
||||
|
||||
# ── main ────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _parse_args() -> argparse.Namespace:
|
||||
p = argparse.ArgumentParser(
|
||||
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
|
||||
)
|
||||
p.add_argument("corpus", help="memory root (e.g. ~/.everos-locomo-all-kv-fast)")
|
||||
p.add_argument(
|
||||
"--mode",
|
||||
choices=("lifespan", "readonly"),
|
||||
default="lifespan",
|
||||
help="lifespan = full strict check (idle corpus only); "
|
||||
"readonly = direct lance read (safe on active corpus)",
|
||||
)
|
||||
p.add_argument(
|
||||
"--owners",
|
||||
help="comma-separated owner filter (readonly mode only)",
|
||||
)
|
||||
return p.parse_args()
|
||||
|
||||
|
||||
async def main() -> int:
|
||||
args = _parse_args()
|
||||
corpus = Path(args.corpus).expanduser().resolve()
|
||||
if not corpus.exists():
|
||||
print(f"ERROR: corpus does not exist: {corpus}")
|
||||
return 1
|
||||
owners = (
|
||||
[o.strip() for o in args.owners.split(",") if o.strip()]
|
||||
if args.owners
|
||||
else None
|
||||
)
|
||||
print(f"corpus: {corpus}")
|
||||
print(f"mode: {args.mode}")
|
||||
if owners:
|
||||
print(f"owners: {owners}")
|
||||
print()
|
||||
if args.mode == "lifespan":
|
||||
return await run_lifespan_mode(corpus)
|
||||
return await run_readonly_mode(corpus, owners)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(asyncio.run(main()))
|
||||
138
scripts/check_datetime_discipline.py
Normal file
138
scripts/check_datetime_discipline.py
Normal file
@ -0,0 +1,138 @@
|
||||
"""CI gate: enforce the two-zone discipline at the source-code level.
|
||||
|
||||
Scans `src/` for code patterns that bypass
|
||||
:mod:`everos.component.utils.datetime` and would silently introduce
|
||||
naive or local-tz datetimes. Exits non-zero on any hit.
|
||||
|
||||
Forbidden patterns (with a small allowlist):
|
||||
|
||||
1. ``datetime.now()`` / ``datetime.utcnow()`` / ``datetime.today()`` —
|
||||
naive constructors / deprecated. Use ``get_utc_now()`` (storage) or
|
||||
``get_now_with_timezone()`` (display).
|
||||
2. ``time.time()`` / ``time.time_ns()`` — bypasses the helper module.
|
||||
Use ``to_timestamp_ms(get_utc_now())`` if you really need ms epoch.
|
||||
3. Direct ``datetime(YYYY, ...)`` constructor without ``tzinfo=`` —
|
||||
produces naive datetimes; use ``ensure_utc(datetime(...))`` instead.
|
||||
4. ``.astimezone(`` / ``.replace(tzinfo=`` outside the helper module —
|
||||
should go through ``to_display_tz`` / ``ensure_utc``.
|
||||
|
||||
Allowlist (legitimate uses):
|
||||
|
||||
* ``src/everos/component/utils/datetime.py`` — the helper module itself.
|
||||
* ``src/everos/core/persistence/sqlite/base.py`` — the SQLAlchemy ``load``
|
||||
event listener that re-attaches UTC on hydrate.
|
||||
|
||||
Run::
|
||||
|
||||
python scripts/check_datetime_discipline.py
|
||||
|
||||
Wired into ``make ci``; any violation fails the build.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
_ROOT = Path(__file__).resolve().parent.parent
|
||||
_SRC = _ROOT / "src"
|
||||
|
||||
_ALLOWLIST: set[Path] = {
|
||||
_ROOT / "src/everos/component/utils/datetime.py",
|
||||
_ROOT / "src/everos/core/persistence/sqlite/base.py",
|
||||
}
|
||||
|
||||
# (regex, message) pairs. Each regex must match on a single line.
|
||||
_HELPER_HINT = "use get_utc_now() / get_now_with_timezone()"
|
||||
_PATTERNS: list[tuple[re.Pattern[str], str]] = [
|
||||
(
|
||||
re.compile(r"\bdatetime\.now\s*\(\s*\)"),
|
||||
f"datetime.now() returns naive — {_HELPER_HINT}",
|
||||
),
|
||||
(
|
||||
re.compile(r"\bdatetime\.utcnow\s*\("),
|
||||
"datetime.utcnow() is deprecated and naive — use get_utc_now()",
|
||||
),
|
||||
(
|
||||
re.compile(r"\bdatetime\.today\s*\("),
|
||||
"datetime.today() returns naive — use today_with_timezone()",
|
||||
),
|
||||
(
|
||||
re.compile(r"\bdt\.datetime\.now\s*\(\s*\)"),
|
||||
f"dt.datetime.now() returns naive — {_HELPER_HINT}",
|
||||
),
|
||||
(
|
||||
re.compile(r"\bdt\.datetime\.utcnow\s*\("),
|
||||
"dt.datetime.utcnow() is deprecated and naive — use get_utc_now()",
|
||||
),
|
||||
(
|
||||
re.compile(r"\b_dt\.datetime\.now\s*\(\s*\)"),
|
||||
f"_dt.datetime.now() returns naive — {_HELPER_HINT}",
|
||||
),
|
||||
(
|
||||
re.compile(r"\btime\.time(?:_ns)?\s*\("),
|
||||
"time.time() bypasses the helper — use to_timestamp_ms(get_utc_now())",
|
||||
),
|
||||
(
|
||||
re.compile(r"\.astimezone\s*\("),
|
||||
".astimezone(...) outside helper — use to_display_tz() / ensure_utc()",
|
||||
),
|
||||
(
|
||||
re.compile(r"\.replace\s*\(\s*tzinfo\s*="),
|
||||
".replace(tzinfo=...) outside helper — use ensure_utc() / to_display_tz()",
|
||||
),
|
||||
]
|
||||
|
||||
# Skip lines that match these (comments, docstrings, `# tz-noqa`).
|
||||
_COMMENT_RE = re.compile(r"^\s*#")
|
||||
_DOCSTRING_TRIPLE = '"""'
|
||||
|
||||
|
||||
def _scan_file(path: Path) -> list[tuple[int, str, str]]:
|
||||
"""Return list of (line_no, line, message) violations in *path*."""
|
||||
if path in _ALLOWLIST:
|
||||
return []
|
||||
hits: list[tuple[int, str, str]] = []
|
||||
try:
|
||||
text = path.read_text(encoding="utf-8")
|
||||
except (OSError, UnicodeDecodeError):
|
||||
return []
|
||||
|
||||
# Strip out triple-quoted blocks (docstrings + multi-line literals).
|
||||
text_no_docstrings = re.sub(r'""".*?"""', "", text, flags=re.DOTALL)
|
||||
text_no_docstrings = re.sub(r"'''.*?'''", "", text_no_docstrings, flags=re.DOTALL)
|
||||
|
||||
for lineno, line in enumerate(text_no_docstrings.splitlines(), start=1):
|
||||
if _COMMENT_RE.match(line):
|
||||
continue
|
||||
if "# tz-noqa" in line:
|
||||
continue
|
||||
# Strip inline trailing comment to avoid false positives in
|
||||
# comment text like ``# replace(tzinfo=...) — explanation``.
|
||||
code_part = line.split("#", 1)[0]
|
||||
for pat, msg in _PATTERNS:
|
||||
if pat.search(code_part):
|
||||
hits.append((lineno, line.rstrip(), msg))
|
||||
break
|
||||
return hits
|
||||
|
||||
|
||||
def main() -> int:
|
||||
rc = 0
|
||||
for py in sorted(_SRC.rglob("*.py")):
|
||||
violations = _scan_file(py)
|
||||
if not violations:
|
||||
continue
|
||||
rel = py.relative_to(_ROOT)
|
||||
for lineno, line, msg in violations:
|
||||
print(f"{rel}:{lineno}: {msg}")
|
||||
print(f" {line}")
|
||||
rc = 1
|
||||
if rc == 0:
|
||||
print("OK — datetime discipline clean.")
|
||||
return rc
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
128
scripts/dump_openapi.py
Normal file
128
scripts/dump_openapi.py
Normal file
@ -0,0 +1,128 @@
|
||||
"""Dump the FastAPI OpenAPI schema to ``docs/openapi.json``.
|
||||
|
||||
Static export — does **not** start the server. Calls ``app.openapi()``
|
||||
directly on the FastAPI instance returned by ``create_app()``, which
|
||||
the runtime ``GET /openapi.json`` handler returns verbatim. No lifespan
|
||||
is run, so this is fast and side-effect-free.
|
||||
|
||||
Modes:
|
||||
|
||||
* default — write ``docs/openapi.json``.
|
||||
* ``--check`` — write to a temp file and ``diff`` against the on-disk
|
||||
copy. Exits non-zero on drift, so it can be wired into ``make lint``
|
||||
to fail PRs that touch the API surface without regenerating the
|
||||
committed schema. Same shape as ``check_datetime_discipline.py``.
|
||||
|
||||
Run::
|
||||
|
||||
python scripts/dump_openapi.py # write docs/openapi.json
|
||||
python scripts/dump_openapi.py --check # CI gate
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
_ROOT = Path(__file__).resolve().parent.parent
|
||||
_TARGET = _ROOT / "docs" / "openapi.json"
|
||||
|
||||
|
||||
def _build_schema() -> dict:
|
||||
"""Return the FastAPI app's full OpenAPI schema.
|
||||
|
||||
Force ``ENV=DEV`` so the ``openapi_url`` route is enabled — without
|
||||
it the dev-mode endpoint check (see ``app.py``) shadows the route.
|
||||
The schema content itself is identical in dev vs prod; the flag only
|
||||
controls whether the runtime ``GET /openapi.json`` is exposed. We
|
||||
flip it here so the static export matches the dev-mode endpoint
|
||||
output the e2e test compares against.
|
||||
"""
|
||||
import os
|
||||
|
||||
os.environ["ENV"] = "DEV"
|
||||
# Local import so an import-time evaluation of ``ENV`` (read inside
|
||||
# ``create_app``) sees the override above.
|
||||
from everos.entrypoints.api.app import create_app
|
||||
|
||||
# Pass an empty lifespan list so we don't pull up SQLite / LanceDB /
|
||||
# OME — the schema is computed from static route declarations alone.
|
||||
app = create_app(lifespan_providers=[])
|
||||
return app.openapi()
|
||||
|
||||
|
||||
def _render(schema: dict) -> str:
|
||||
"""Pretty-print the schema as JSON with stable key order + trailing newline."""
|
||||
return json.dumps(schema, indent=2, ensure_ascii=False, sort_keys=False) + "\n"
|
||||
|
||||
|
||||
def _write_target(content: str) -> None:
|
||||
_TARGET.parent.mkdir(parents=True, exist_ok=True)
|
||||
_TARGET.write_text(content, encoding="utf-8")
|
||||
|
||||
|
||||
def _check_against_target(content: str) -> int:
|
||||
if not _TARGET.is_file():
|
||||
print(
|
||||
f"error: {_TARGET.relative_to(_ROOT)} does not exist; "
|
||||
f"run `make openapi` to generate it.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
existing = _TARGET.read_text(encoding="utf-8")
|
||||
if existing == content:
|
||||
print(f"OK — {_TARGET.relative_to(_ROOT)} matches app.openapi() output.")
|
||||
return 0
|
||||
# Drift: print a unified diff to stderr so CI / reviewer can see what changed.
|
||||
import difflib
|
||||
|
||||
diff = "".join(
|
||||
difflib.unified_diff(
|
||||
existing.splitlines(keepends=True),
|
||||
content.splitlines(keepends=True),
|
||||
fromfile=f"{_TARGET.relative_to(_ROOT)} (committed)",
|
||||
tofile="app.openapi() (current)",
|
||||
)
|
||||
)
|
||||
# Limit to first ~200 lines so a giant schema rewrite stays scannable.
|
||||
capped = "".join(diff.splitlines(keepends=True)[:200])
|
||||
print(
|
||||
f"error: {_TARGET.relative_to(_ROOT)} is out of date.\n"
|
||||
"Run `make openapi` and commit the result.\n\n" + capped,
|
||||
file=sys.stderr,
|
||||
)
|
||||
if len(diff.splitlines()) > 200:
|
||||
print(
|
||||
f"... (truncated; full diff is {len(diff.splitlines())} lines)",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
parser = argparse.ArgumentParser(description=__doc__.splitlines()[0])
|
||||
parser.add_argument(
|
||||
"--check",
|
||||
action="store_true",
|
||||
help="Compare against docs/openapi.json without writing; exit 1 on drift.",
|
||||
)
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
schema = _build_schema()
|
||||
content = _render(schema)
|
||||
|
||||
if args.check:
|
||||
return _check_against_target(content)
|
||||
|
||||
_write_target(content)
|
||||
print(f"wrote {_TARGET.relative_to(_ROOT)} ({len(content)} bytes)")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Silence the unused-import warning on tempfile (kept for future use).
|
||||
_ = tempfile
|
||||
sys.exit(main())
|
||||
123
scripts/e2e_memorize/README.md
Normal file
123
scripts/e2e_memorize/README.md
Normal file
@ -0,0 +1,123 @@
|
||||
# End-to-end memorize test
|
||||
|
||||
In-process driver that pushes a realistic fixture through `service.memorize`,
|
||||
batching by 6 messages per `/add` call and then `/flush` at the end.
|
||||
|
||||
## What's here
|
||||
|
||||
| File | Purpose |
|
||||
|---|---|
|
||||
| `fixtures/chat_session.json` | 22 messages · 3 topic shifts · multi-user (Alice → Bob) — chat-mode fixture |
|
||||
| `fixtures/agent_session.json` | 21 items · 2 task threads · interleaved `tool_calls` / `tool` results — agent-mode fixture |
|
||||
| `run.py` | In-process runner (no HTTP) |
|
||||
|
||||
## Prereqs
|
||||
|
||||
1. **LLM client configured** in `.env`:
|
||||
- `EVEROS_LLM__API_KEY=...`
|
||||
- `EVEROS_LLM__BASE_URL=...` (OpenAI-compatible)
|
||||
- `EVEROS_LLM__MODEL=...` (defaults to `gpt-4o-mini`)
|
||||
- Without these, the boundary stage logs `memorize_no_llm_client` and skips the run.
|
||||
2. **Memory root**: defaults to `~/.everos`; override with `EVEROS_MEMORY__ROOT=...`.
|
||||
3. **Mode** is read from `settings.memorize.mode` (toml/env) before the first `memorize()` call.
|
||||
|
||||
## Run
|
||||
|
||||
```bash
|
||||
# Chat mode — boundary uses everalgo.boundary.detect_boundaries
|
||||
EVEROS_MEMORIZE__MODE=chat uv run python scripts/e2e_memorize/run.py \
|
||||
--fixture scripts/e2e_memorize/fixtures/chat_session.json \
|
||||
--expected-mode chat
|
||||
|
||||
# Agent mode — boundary uses everalgo.agent_memory.AgentBoundaryDetector
|
||||
# (filter→detect→remap; tool items preserved in cells)
|
||||
EVEROS_MEMORIZE__MODE=agent uv run python scripts/e2e_memorize/run.py \
|
||||
--fixture scripts/e2e_memorize/fixtures/agent_session.json \
|
||||
--expected-mode agent
|
||||
|
||||
# Dry run (print batch plan, no LLM calls)
|
||||
uv run python scripts/e2e_memorize/run.py \
|
||||
--fixture scripts/e2e_memorize/fixtures/chat_session.json --dry-run
|
||||
```
|
||||
|
||||
## What to verify after a run
|
||||
|
||||
### 1. Console output
|
||||
|
||||
Each batch prints `status=` (`accumulated` while buffering, `extracted` when
|
||||
cells got cut). Final `flush` should be `extracted` if any cell remained
|
||||
in the tail. The trailing file walker lists md / sqlite files modified
|
||||
in the last 10 minutes.
|
||||
|
||||
### 2. Episode md (sync — 4A)
|
||||
|
||||
```
|
||||
~/.everos/users/<owner_id>/episodes/episode-YYYY-MM-DD.md
|
||||
```
|
||||
|
||||
- Chat fixture: 2 owners (`u_alice`, `u_bob`) — expect Episodes split into
|
||||
~3-4 cells aligned with topic shifts (Python bug → weekend ramen → Q3
|
||||
review → SRE handoff/ramen wrap).
|
||||
- Agent fixture: 1 user (`u_alice`) — expect ~2 Episodes aligned with the
|
||||
two task threads (latency rollback → DB index fix).
|
||||
|
||||
### 3. SQLite memcell rows
|
||||
|
||||
```bash
|
||||
sqlite3 ~/.everos/.index/sqlite/system.db \
|
||||
"select memcell_id, track, owner_id, owner_type, json_array_length(sender_ids_json) as senders
|
||||
from memcell order by timestamp"
|
||||
```
|
||||
|
||||
- Chat run: rows with `track=user_memory`, `owner_type=user`.
|
||||
- Agent run: parallel rows for both tracks (`user_memory` **and**
|
||||
`agent_memory`) since agent mode dispatches both pipelines.
|
||||
|
||||
### 4. Unprocessed buffer
|
||||
|
||||
```bash
|
||||
sqlite3 ~/.everos/.index/sqlite/system.db \
|
||||
"select session_id, count(*) from unprocessed_buffer
|
||||
where track='memorize' group by session_id"
|
||||
```
|
||||
|
||||
After `flush` the buffer should be empty for the test session.
|
||||
|
||||
### 5. OME async output (only if subscribers exist)
|
||||
|
||||
- `users/<owner>/atomic_facts/atomic_fact-YYYY-MM-DD.md` (always; `extract_atomic_facts` is registered)
|
||||
- `users/<owner>/foresights/foresight-YYYY-MM-DD.md` (always; `extract_foresight` is registered)
|
||||
- `agents/<agent>/agent_cases/agent_case-YYYY-MM-DD.md` (**only after `extract_agent_cases` strategy is written + registered** — currently absent, the emit is a no-op)
|
||||
|
||||
### 6. Reset between runs
|
||||
|
||||
The fixture's session_id is randomised per invocation, so previous runs
|
||||
don't pollute the new one. To wipe everything:
|
||||
|
||||
```bash
|
||||
rm -rf ~/.everos/users ~/.everos/agents ~/.everos/.index/sqlite/system.db
|
||||
```
|
||||
|
||||
## Boundary expectations cheat sheet
|
||||
|
||||
### Chat fixture topic shifts (timestamps ms)
|
||||
|
||||
| Range | Topic |
|
||||
|---|---|
|
||||
| msgs 1-6 (`1747396800–1747397010`) | Python KeyError debugging |
|
||||
| msgs 7-12 (`1747400400–1747400610`) | Weekend ramen plans |
|
||||
| msgs 13-16 (`1747407600–1747407720`) | Q3 revenue review meeting prep |
|
||||
| msgs 17-22 (`1747411200–1747411410`) | Bob joins, SRE handoff + ramen + Q3 deck deadline |
|
||||
|
||||
Boundary detector should cut on topic gaps; 3 cuts → 4 cells is the most likely outcome.
|
||||
|
||||
### Agent fixture task threads
|
||||
|
||||
| Range | Task |
|
||||
|---|---|
|
||||
| items 1-13 (`1747396800–1747397140`) | API latency spike → identify keepalive pool regression → rollback |
|
||||
| items 14-21 (`1747400400–1747400720`) | DB connection pool exhaustion → find unindexed query → CREATE INDEX CONCURRENTLY |
|
||||
|
||||
Boundary detector should cut between item 13 and item 14 (timestamp jump
|
||||
~55 minutes, topic flip). Tool items inside each cell stay attached to
|
||||
their initiating chat turn.
|
||||
33
scripts/e2e_memorize/fixtures/agent_session.json
Normal file
33
scripts/e2e_memorize/fixtures/agent_session.json
Normal file
@ -0,0 +1,33 @@
|
||||
{
|
||||
"_doc": "Agent-mode end-to-end fixture. 23 items across 2 task threads: (1) production latency diagnosis with rollback, (2) DB connection-pool exhaustion investigation. Includes ToolCallRequest + ToolCallResult interleaved with user/assistant text turns, parallel tool calls, one assistant turn with empty content (only tool_calls), one extended chain-of-thought reasoning, and one tool failure + retry in task 2. Run with EVEROS_MEMORIZE__MODE=agent. Expected boundary detector cuts ~2 cells aligned with the two task threads; tool items fold into the cell containing the preceding chat turn.",
|
||||
"session_id_hint": "e2e_agent",
|
||||
"messages": [
|
||||
{"sender_id": "u_alice", "sender_name": "Alice", "role": "user", "timestamp": 1747396800000, "content": "Production API p99 latency spiked from 280ms to 2.5s starting around 21:00 UTC. Customer reports rolling in. Can you investigate?"},
|
||||
{"sender_id": "ops_agent", "sender_name": "OpsAgent", "role": "assistant", "timestamp": 1747396820000, "content": "I'll pull the recent deploys and check for correlation with the spike timestamp.", "tool_calls": [{"id": "call_001", "type": "function", "function": {"name": "list_recent_deploys", "arguments": "{\"hours_back\": 4, \"service\": \"api-gateway\"}"}}]},
|
||||
{"sender_id": "tool_runner", "sender_name": null, "role": "tool", "timestamp": 1747396825000, "tool_call_id": "call_001", "content": "[{\"deploy_id\": \"d-9f3a\", \"service\": \"api-gateway\", \"deployed_at\": \"2026-05-16T20:47:12Z\", \"author\": \"bob\", \"commit_sha\": \"a1b2c3d\", \"diff_summary\": \"refactor: switch upstream client to keepalive pool\"}, {\"deploy_id\": \"d-9f29\", \"service\": \"api-gateway\", \"deployed_at\": \"2026-05-16T15:12:03Z\", \"author\": \"carol\", \"commit_sha\": \"e4f5g6h\", \"diff_summary\": \"chore: bump otel-sdk to 1.31\"}]"},
|
||||
{"sender_id": "ops_agent", "sender_name": "OpsAgent", "role": "assistant", "timestamp": 1747396870000, "content": "", "tool_calls": [{"id": "call_002", "type": "function", "function": {"name": "get_deploy_diff", "arguments": "{\"deploy_id\": \"d-9f3a\", \"files\": [\"upstream/client.go\", \"config/pool.yaml\"]}"}}]},
|
||||
{"sender_id": "tool_runner", "sender_name": null, "role": "tool", "timestamp": 1747396880000, "tool_call_id": "call_002", "content": "config/pool.yaml: max_idle_conns_per_host changed from 32 → 4. upstream/client.go: new transport with IdleConnTimeout=10s (was 90s). The default Go transport now reuses far fewer connections."},
|
||||
{"sender_id": "ops_agent", "sender_name": "OpsAgent", "role": "assistant", "timestamp": 1747396930000, "content": "Two changes in d-9f3a stand out, and the interaction between them likely explains the cliff. First, max_idle_conns_per_host went from 32 down to 4 — that alone caps how many warm sockets the upstream client can pin per backend. Second, IdleConnTimeout dropped from 90s to 10s, so even the few warm sockets we do keep get torn down between requests during normal traffic gaps. Combined effect: under sustained load p99 of ~250rps × ~6 backends, the pool is permanently saturated and every spillover request pays a TCP+TLS handshake (~800ms-1.5s on the affected region) instead of reusing a connection. That maps directly to the 280ms → 2.5s latency jump. Before I file a rollback recommendation I want to confirm the prediction against live pool metrics — if reuse_ratio collapsed and new-connection rate is up >10x, that's enough signal.", "tool_calls": [{"id": "call_003", "type": "function", "function": {"name": "query_pool_metrics", "arguments": "{\"service\": \"api-gateway\", \"window_min\": 15}"}}]},
|
||||
{"sender_id": "tool_runner", "sender_name": null, "role": "tool", "timestamp": 1747396938000, "tool_call_id": "call_003", "content": "{\"idle_conns_avg\": 1.2, \"new_conn_rate_per_s\": 184, \"new_conn_rate_baseline\": 6, \"reuse_ratio\": 0.04, \"reuse_ratio_baseline\": 0.91}"},
|
||||
{"sender_id": "ops_agent", "sender_name": "OpsAgent", "role": "assistant", "timestamp": 1747396980000, "content": "Confirmed: reuse_ratio collapsed from 0.91 to 0.04, new-connection rate is 30x baseline. This is the root cause. Recommend rollback of d-9f3a — the keepalive refactor needs a config review before re-deploying. Want me to initiate rollback?"},
|
||||
{"sender_id": "u_alice", "sender_name": "Alice", "role": "user", "timestamp": 1747397040000, "content": "Yes, roll it back. Page Bob async so he sees the rollback when he comes online tomorrow — no need to wake him up."},
|
||||
{"sender_id": "ops_agent", "sender_name": "OpsAgent", "role": "assistant", "timestamp": 1747397070000, "content": "Initiating rollback to d-9f29 and queuing a non-paging Slack DM for Bob.", "tool_calls": [{"id": "call_004", "type": "function", "function": {"name": "rollback_deploy", "arguments": "{\"service\": \"api-gateway\", \"to_deploy_id\": \"d-9f29\", \"reason\": \"p99 latency regression from keepalive pool refactor (max_idle_conns 32->4)\"}"}}, {"id": "call_005", "type": "function", "function": {"name": "slack_dm", "arguments": "{\"user\": \"bob\", \"channel\": \"async\", \"message\": \"Rolled back api-gateway d-9f3a at 21:04 UTC — keepalive pool change caused p99 spike. Logs in #incident-2026-05-16.\"}"}}]},
|
||||
{"sender_id": "tool_runner", "sender_name": null, "role": "tool", "timestamp": 1747397115000, "tool_call_id": "call_004", "content": "{\"status\": \"ok\", \"rolled_back_to\": \"d-9f29\", \"completed_at\": \"2026-05-16T21:05:01Z\", \"verification\": {\"p99_5min_after\": \"312ms\", \"reuse_ratio_after\": 0.88}}"},
|
||||
{"sender_id": "tool_runner", "sender_name": null, "role": "tool", "timestamp": 1747397116000, "tool_call_id": "call_005", "content": "{\"status\": \"queued\", \"message_id\": \"slack_msg_xyz\"}"},
|
||||
{"sender_id": "ops_agent", "sender_name": "OpsAgent", "role": "assistant", "timestamp": 1747397140000, "content": "Rollback complete. p99 dropped back to 312ms within 5 minutes, reuse ratio recovered to 0.88. Slack DM queued for Bob. Incident-2026-05-16 is the channel I logged it to."},
|
||||
|
||||
{"sender_id": "u_alice", "sender_name": "Alice", "role": "user", "timestamp": 1747400400000, "content": "Different issue — we're seeing connection pool exhaustion warnings on the auth-service Postgres pool the last hour. Can you check whether it's a leak or just a sustained traffic spike?"},
|
||||
{"sender_id": "ops_agent", "sender_name": "OpsAgent", "role": "assistant", "timestamp": 1747400420000, "content": "Pulling pool stats and recent traffic shape.", "tool_calls": [{"id": "call_006", "type": "function", "function": {"name": "query_db_pool", "arguments": "{\"service\": \"auth-service\", \"window_min\": 60}"}}, {"id": "call_007", "type": "function", "function": {"name": "query_request_rate", "arguments": "{\"service\": \"auth-service\", \"window_min\": 60}"}}]},
|
||||
{"sender_id": "tool_runner", "sender_name": null, "role": "tool", "timestamp": 1747400430000, "tool_call_id": "call_006", "content": "{\"max_conns\": 50, \"in_use_now\": 49, \"in_use_p99_1h\": 50, \"idle_p99_1h\": 1, \"wait_queue_depth_p99\": 18, \"long_running_queries_above_30s\": 12}"},
|
||||
{"sender_id": "tool_runner", "sender_name": null, "role": "tool", "timestamp": 1747400431000, "tool_call_id": "call_007", "content": "{\"rps_now\": 142, \"rps_baseline\": 138, \"rps_p99_1h\": 151}"},
|
||||
{"sender_id": "ops_agent", "sender_name": "OpsAgent", "role": "assistant", "timestamp": 1747400470000, "content": "Traffic is flat (rps 142 vs baseline 138), but 12 long-running queries above 30s and wait queue depth p99 of 18. This is a leak / unbounded query, not a traffic surge. Let me find the offending queries.", "tool_calls": [{"id": "call_008", "type": "function", "function": {"name": "list_long_running_queries", "arguments": "{\"service\": \"auth-service\", \"min_duration_s\": 30, \"window_min\": 60}"}}]},
|
||||
{"sender_id": "tool_runner", "sender_name": null, "role": "tool", "timestamp": 1747400478000, "tool_call_id": "call_008", "content": "{\"error\": \"timeout\", \"detail\": \"query against pg_stat_activity exceeded 15s while pool was saturated; retry with a narrower window\"}"},
|
||||
{"sender_id": "ops_agent", "sender_name": "OpsAgent", "role": "assistant", "timestamp": 1747400490000, "content": "Tool itself is starved by the same pool exhaustion — fair enough. Retrying against the last 10 minutes only so pg_stat_activity has a tighter slice to scan.", "tool_calls": [{"id": "call_008b", "type": "function", "function": {"name": "list_long_running_queries", "arguments": "{\"service\": \"auth-service\", \"min_duration_s\": 30, \"window_min\": 10}"}}]},
|
||||
{"sender_id": "tool_runner", "sender_name": null, "role": "tool", "timestamp": 1747400499000, "tool_call_id": "call_008b", "content": "[{\"query_sha\": \"q-71f\", \"sql_truncated\": \"SELECT u.* FROM users u JOIN sessions s ON s.user_id = u.id WHERE s.expired_at > now() - interval '30 day'\", \"avg_duration_s\": 84, \"calls_last_hour\": 11, \"missing_index\": \"sessions.expired_at\"}]"},
|
||||
{"sender_id": "ops_agent", "sender_name": "OpsAgent", "role": "assistant", "timestamp": 1747400510000, "content": "Single offender — query q-71f is doing a full sessions-table scan because sessions.expired_at is unindexed. Each call takes ~84s and holds a connection. 11 calls in the last hour saturate the pool. Recommend: (1) add the index now (concurrent), (2) add a query timeout cap on this endpoint long-term. Want me to add the index?"},
|
||||
{"sender_id": "u_alice", "sender_name": "Alice", "role": "user", "timestamp": 1747400560000, "content": "Yes please add the index. Use CREATE INDEX CONCURRENTLY so we don't block writes. Then post in #incident-2026-05-16 with the runbook entry."},
|
||||
{"sender_id": "ops_agent", "sender_name": "OpsAgent", "role": "assistant", "timestamp": 1747400590000, "content": "On it. Creating the concurrent index and posting the runbook update.", "tool_calls": [{"id": "call_009", "type": "function", "function": {"name": "execute_sql", "arguments": "{\"db\": \"auth-service\", \"sql\": \"CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_sessions_expired_at ON sessions(expired_at)\", \"timeout_s\": 600}"}}]},
|
||||
{"sender_id": "tool_runner", "sender_name": null, "role": "tool", "timestamp": 1747400650000, "tool_call_id": "call_009", "content": "{\"status\": \"ok\", \"duration_s\": 47, \"rows_indexed\": 1841229}"},
|
||||
{"sender_id": "ops_agent", "sender_name": "OpsAgent", "role": "assistant", "timestamp": 1747400720000, "content": "Index created in 47s over 1.84M rows. Query q-71f should drop from ~84s to sub-second. Pool will drain in the next minute as long-running queries finish. Posted runbook to #incident-2026-05-16."}
|
||||
]
|
||||
}
|
||||
31
scripts/e2e_memorize/fixtures/chat_session.json
Normal file
31
scripts/e2e_memorize/fixtures/chat_session.json
Normal file
@ -0,0 +1,31 @@
|
||||
{
|
||||
"_doc": "Chat-mode end-to-end fixture. 22 messages across 3 topics + a multi-user handoff. Run with EVEROS_MEMORIZE__MODE=chat. Expected boundary detector cuts ~3-4 cells (Python bug → weekend plans → cross-team meeting).",
|
||||
"session_id_hint": "e2e_chat",
|
||||
"messages": [
|
||||
{"sender_id": "u_alice", "sender_name": "Alice", "role": "user", "timestamp": 1747396800000, "content": "I'm getting a weird KeyError on a dict comprehension when the source list has None entries. The traceback points at the dict literal, not the comprehension itself."},
|
||||
{"sender_id": "assistant", "sender_name": "Bot", "role": "assistant", "timestamp": 1747396830000, "content": "Can you share a minimal repro? Something like the comprehension expression plus 2-3 sample input items. I want to see whether the None is on the key side or the value side."},
|
||||
{"sender_id": "u_alice", "sender_name": "Alice", "role": "user", "timestamp": 1747396890000, "content": "Sure: `{r['id']: r['name'] for r in rows}` where rows comes from a left-join and sometimes the right side returns None. So r is None for those rows."},
|
||||
{"sender_id": "assistant", "sender_name": "Bot", "role": "assistant", "timestamp": 1747396920000, "content": "Got it. The None is the row itself, not a field. `None['id']` raises TypeError actually, but if you preprocess the rows to dicts upstream, the missing key on a partial dict would be KeyError. Either way, filter before the comprehension: `{r['id']: r['name'] for r in rows if r is not None and 'id' in r}`."},
|
||||
{"sender_id": "u_alice", "sender_name": "Alice", "role": "user", "timestamp": 1747396980000, "content": "That worked, thanks. Also occasionally the SQL itself wraps the row in a tuple instead of dict — pyodbc behaviour. Got bitten by it last quarter too."},
|
||||
{"sender_id": "assistant", "sender_name": "Bot", "role": "assistant", "timestamp": 1747397010000, "content": "pyodbc returns Row objects by default; you'd need `cursor.execute(...).fetchall()` then iterate. Or set `cursor.row_factory = lambda c, r: dict(zip([col[0] for col in c.description], r))` to normalise upstream."},
|
||||
|
||||
{"sender_id": "u_alice", "sender_name": "Alice", "role": "user", "timestamp": 1747400400000, "content": "Switching topics — any weekend plans? I'm thinking of trying the new ramen place on Carmine Street, supposedly the tonkotsu is decent."},
|
||||
{"sender_id": "assistant", "sender_name": "Bot", "role": "assistant", "timestamp": 1747400430000, "content": "Tonkotsu sounds good. If you go Saturday before 1pm you'll skip the queue. There's also a coffee place two doors down that does decent cold brew if you want a pre-meal stop."},
|
||||
{"sender_id": "u_alice", "sender_name": "Alice", "role": "user", "timestamp": 1747400490000, "content": "Cold brew yes. Bob mentioned wanting to come too, do you think the ramen place takes reservations for three?"},
|
||||
{"sender_id": "assistant", "sender_name": "Bot", "role": "assistant", "timestamp": 1747400520000, "content": "Their site says walk-in only on weekends. For three people the wait is usually 25-40 minutes around noon — manageable but worth knowing."},
|
||||
{"sender_id": "u_alice", "sender_name": "Alice", "role": "user", "timestamp": 1747400580000, "content": "OK we'll show up at 11:30 then. Will send Bob the address."},
|
||||
{"sender_id": "assistant", "sender_name": "Bot", "role": "assistant", "timestamp": 1747400610000, "content": "Sensible. Reminder it's cash-only for the first 6 months apparently — saw that on a Reddit thread last week."},
|
||||
|
||||
{"sender_id": "u_alice", "sender_name": "Alice", "role": "user", "timestamp": 1747407600000, "content": "One more thing — the Q3 revenue review is next Thursday at 2pm, who from engineering needs to attend? I want to make sure platform team is represented since we're presenting the infra cost reductions."},
|
||||
{"sender_id": "assistant", "sender_name": "Bot", "role": "assistant", "timestamp": 1747407630000, "content": "Standard attendees are eng director + relevant TL. For platform cost reductions, your eng lead Carol should be there since she owns the runbook. Add Dan as a backup in case Carol's on PTO."},
|
||||
{"sender_id": "u_alice", "sender_name": "Alice", "role": "user", "timestamp": 1747407690000, "content": "Good call. Carol mentioned PTO that week actually. Let me ping Dan to confirm he can do the slides handoff."},
|
||||
{"sender_id": "assistant", "sender_name": "Bot", "role": "assistant", "timestamp": 1747407720000, "content": "If Dan has bandwidth, also ask him to pre-share the deck Monday so finance has time to review the cost numbers before the meeting. Saves a lot of meeting time clarifying figures."},
|
||||
|
||||
{"sender_id": "u_bob", "sender_name": "Bob", "role": "user", "timestamp": 1747411200000, "content": "Hey, Alice forwarded me about the ramen plan and the Thursday meeting. I can do both but the Thursday slot conflicts with the SRE oncall handoff at 1:45 — anyone we can swap with?"},
|
||||
{"sender_id": "assistant", "sender_name": "Bot", "role": "assistant", "timestamp": 1747411230000, "content": "Erin usually does the Wednesday → Thursday handoff for SRE. She owes you one from last cycle. Ask her to take the 1:45 slot and you can ack the comms async at 1:30."},
|
||||
{"sender_id": "u_bob", "sender_name": "Bob", "role": "user", "timestamp": 1747411290000, "content": "Erin's good. I'll ping her on Slack today. About the ramen — Alice said 11:30 Saturday, that works for me, walking distance from my place."},
|
||||
{"sender_id": "assistant", "sender_name": "Bot", "role": "assistant", "timestamp": 1747411320000, "content": "Great. Just remind Alice it's cash-only for now — she'll forget otherwise based on past experience."},
|
||||
{"sender_id": "u_bob", "sender_name": "Bob", "role": "user", "timestamp": 1747411380000, "content": "Ha, true. I'll grab some cash on the way. Also can you remind me about the Q3 revenue deck — when does it need to be done by, EOD Tuesday?"},
|
||||
{"sender_id": "assistant", "sender_name": "Bot", "role": "assistant", "timestamp": 1747411410000, "content": "Yes — Dan agreed to pre-share Monday, so practical deadline is EOD Tuesday for finance to review Wednesday. After Wednesday review they may ask for last-minute number tweaks; build slack into Thursday morning for that."}
|
||||
]
|
||||
}
|
||||
186
scripts/e2e_memorize/run.py
Normal file
186
scripts/e2e_memorize/run.py
Normal file
@ -0,0 +1,186 @@
|
||||
"""End-to-end memorize runner — in-process call into ``service.memorize``.
|
||||
|
||||
Calls ``service.memorize.memorize()`` directly (not via HTTP) so this works
|
||||
without ``everos server start``. Drives a fixture through ``/add`` in
|
||||
N-sized batches, then triggers ``/flush`` to drain the tail.
|
||||
|
||||
Reads ``settings.memorize.mode`` from current env / toml — set the mode via
|
||||
``EVEROS_MEMORIZE__MODE=chat|agent`` *before* invoking this script (the
|
||||
config is cached after the first ``load_settings()`` call).
|
||||
|
||||
Usage:
|
||||
EVEROS_MEMORIZE__MODE=chat uv run python scripts/e2e_memorize/run.py \\
|
||||
--fixture scripts/e2e_memorize/fixtures/chat_session.json
|
||||
|
||||
EVEROS_MEMORIZE__MODE=agent uv run python scripts/e2e_memorize/run.py \\
|
||||
--fixture scripts/e2e_memorize/fixtures/agent_session.json --batch-size 5
|
||||
|
||||
After it finishes, check:
|
||||
~/.everos/users/<owner>/episodes/<date>.md (written sync by 4A)
|
||||
~/.everos/.index/sqlite/system.db memcell rows (written by boundary)
|
||||
~/.everos/agents/<agent>/agent_cases/<date>.md (written async by OME
|
||||
- only if a consumer of AgentMemCellWritten is registered)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
from sqlmodel import SQLModel
|
||||
|
||||
from everos.component.llm import get_llm_client
|
||||
from everos.config import load_settings
|
||||
from everos.core.persistence import MemoryRoot
|
||||
from everos.infra.persistence.sqlite import dispose_engine, get_engine
|
||||
from everos.service.memorize import _get_engine as _get_ome_engine
|
||||
from everos.service.memorize import memorize
|
||||
|
||||
|
||||
def _chunks(items: list[dict], n: int) -> list[list[dict]]:
|
||||
return [items[i : i + n] for i in range(0, len(items), n)]
|
||||
|
||||
|
||||
def _print_header(mode: str, fixture_path: Path, session_id: str) -> None:
|
||||
print("=" * 72)
|
||||
print(f" everos e2e memorize · mode={mode}")
|
||||
print(f" fixture : {fixture_path.name}")
|
||||
print(f" session_id : {session_id}")
|
||||
print(f" memory root : {MemoryRoot.default().root}")
|
||||
llm_state = "<configured>" if get_llm_client() else "<None — pipeline will skip>"
|
||||
print(f" llm_client : {llm_state}")
|
||||
print("=" * 72)
|
||||
|
||||
|
||||
def _list_written_files(session_id: str, mode: str) -> None:
|
||||
"""Walk memory root and print files touched in this run."""
|
||||
root = MemoryRoot.default().root
|
||||
cutoff = time.time() - 600 # files modified in the last 10 min
|
||||
print()
|
||||
print("─── files modified within the last 10 minutes under memory root ───")
|
||||
interesting = ("users", "agents", "knowledge", ".index")
|
||||
for sub in interesting:
|
||||
base = Path(root) / sub
|
||||
if not base.is_dir():
|
||||
continue
|
||||
for p in sorted(base.rglob("*")):
|
||||
if p.is_file() and p.stat().st_mtime >= cutoff:
|
||||
rel = p.relative_to(root)
|
||||
size = p.stat().st_size
|
||||
print(f" {rel} ({size}b)")
|
||||
print()
|
||||
print(f"Tip: grep '{session_id}' in any episode md to find this run's entries.")
|
||||
|
||||
|
||||
async def _setup() -> None:
|
||||
"""Create sqlite schema + start OME engine — the bits the HTTP lifespan
|
||||
normally handles. LanceDB is not needed for the memorize sync path
|
||||
(only cascade reads it), so we skip it.
|
||||
"""
|
||||
engine = get_engine()
|
||||
async with engine.begin() as conn:
|
||||
await conn.run_sync(SQLModel.metadata.create_all)
|
||||
ome = _get_ome_engine()
|
||||
await ome.start()
|
||||
|
||||
|
||||
async def _teardown() -> None:
|
||||
ome = _get_ome_engine()
|
||||
await ome.stop()
|
||||
await dispose_engine()
|
||||
|
||||
|
||||
async def _run(args: argparse.Namespace) -> None:
|
||||
settings = load_settings()
|
||||
mode = settings.memorize.mode
|
||||
if args.expected_mode and args.expected_mode != mode:
|
||||
print(
|
||||
f"!! expected mode={args.expected_mode!r} but "
|
||||
f"settings.memorize.mode={mode!r}. "
|
||||
"Set EVEROS_MEMORIZE__MODE before launching."
|
||||
)
|
||||
sys.exit(2)
|
||||
|
||||
fixture_path = Path(args.fixture).resolve() # noqa: ASYNC240
|
||||
fixture = json.loads(fixture_path.read_text()) # noqa: ASYNC230
|
||||
messages: list[dict] = fixture["messages"]
|
||||
session_id = f"{fixture.get('session_id_hint', 'e2e')}_{uuid.uuid4().hex[:8]}"
|
||||
|
||||
_print_header(mode, fixture_path, session_id)
|
||||
|
||||
if args.dry_run:
|
||||
for i, batch in enumerate(_chunks(messages, args.batch_size), start=1):
|
||||
print(
|
||||
f"[dry] batch {i}: {len(batch)} msgs "
|
||||
f"(first content: {batch[0]['content'][:60]!r})"
|
||||
)
|
||||
print("[dry] would flush at the end")
|
||||
return
|
||||
|
||||
await _setup()
|
||||
try:
|
||||
batches = _chunks(messages, args.batch_size)
|
||||
for i, batch in enumerate(batches, start=1):
|
||||
result = await memorize(
|
||||
{"session_id": session_id, "messages": batch}, is_final=False
|
||||
)
|
||||
print(
|
||||
f"add batch {i}/{len(batches)} ({len(batch)} msgs) → "
|
||||
f"status={result.status:<11s} message_count={result.message_count}"
|
||||
)
|
||||
|
||||
print()
|
||||
print("flushing residual tail...")
|
||||
flush_result = await memorize(
|
||||
{"session_id": session_id, "messages": []}, is_final=True
|
||||
)
|
||||
print(
|
||||
f"flush → status={flush_result.status:<11s} "
|
||||
f"message_count={flush_result.message_count}"
|
||||
)
|
||||
|
||||
# OME strategies are fire-and-forget; each cell fires 2 strategies
|
||||
# (atomic_facts + foresight), each ~5-10s on a real LLM. Sleep long
|
||||
# enough for ~8-10 invocations to finish before engine.stop() drains
|
||||
# the scheduler — otherwise APS cancels in-flight LLM calls.
|
||||
await asyncio.sleep(30)
|
||||
|
||||
_list_written_files(session_id, mode)
|
||||
finally:
|
||||
await _teardown()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument(
|
||||
"--fixture",
|
||||
required=True,
|
||||
help="path to fixture JSON (e.g. fixtures/chat_session.json)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--batch-size",
|
||||
type=int,
|
||||
default=6,
|
||||
help="how many messages per /add call (default 6 — 20 msgs across ~4 batches)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--expected-mode",
|
||||
choices=["chat", "agent"],
|
||||
help="sanity check: fail fast if settings.memorize.mode mismatches",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="print the batch plan without calling memorize",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
asyncio.run(_run(args))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user