chore: initialize EverOS 1.0.0

md-first memory extraction framework for AI agents.

Markdown is the single source of truth; SQLite holds state and LanceDB
provides the rebuildable vector + BM25 + scalar index. The codebase follows
a single-direction DDD layering (entrypoints -> service -> memory -> infra,
with component / core / config cross-cutting) enforced by import-linter.

Engineering surface:
- Coding conventions in .claude/rules/ (path-scoped) and workflows in
  .claude/skills/ (/commit, /new-branch, /pr).
- GitHub Actions CI runs make lint + test + integration; pre-commit mirrors
  the gates locally (ruff, hygiene hooks, gitlint commit-msg).
- Commit messages follow Conventional Commits, enforced by gitlint.
- make lint also enforces datetime two-zone discipline and OpenAPI drift.
This commit is contained in:
Elliot Chen
2026-06-05 22:35:51 +08:00
commit 518b8eca85
636 changed files with 160553 additions and 0 deletions

View File

@ -0,0 +1,45 @@
"""Tests for ingest content coercion + text derivation (tagged rendering)."""
from __future__ import annotations
from everos.memory.extract.ingest.multimodal import (
coerce_items,
derive_text,
normalise_content,
)
def test_coerce_str_to_text_item() -> None:
assert coerce_items("hi") == [{"type": "text", "text": "hi"}]
def test_derive_text_renders_parsed_nontext_as_tag() -> None:
items = [
{"type": "text", "text": "before"},
{"type": "image", "name": "p.png", "parsed_content": "OCR TEXT"},
{"type": "text", "text": "after"},
]
text, non_text = derive_text(items)
assert "[IMAGE: p.png]\nOCR TEXT" in text
assert text.startswith("before")
assert text.endswith("after")
assert non_text == 0
def test_derive_text_counts_unparsed_nontext() -> None:
text, non_text = derive_text([{"type": "image", "uri": "x"}])
assert text == ""
assert non_text == 1
def test_derive_text_tag_without_name() -> None:
text, _ = derive_text([{"type": "pdf", "parsed_content": "DOC"}])
assert text == "[PDF]\nDOC"
def test_normalise_content_text_only_unchanged() -> None:
items, text, non_text = normalise_content("hello")
assert items == [{"type": "text", "text": "hello"}]
assert text == "hello"
assert non_text == 0

View File

@ -0,0 +1,38 @@
"""Tests for the multimodal capability guard."""
from __future__ import annotations
import pytest
from everos.core.errors import MultimodalNotEnabledError
from everos.memory.extract.parser import availability
def test_has_unparsed_multimodal_true_for_unparsed_nontext() -> None:
items = [{"type": "text", "text": "hi"}, {"type": "image", "uri": "x"}]
assert availability.has_unparsed_multimodal(items) is True
def test_has_unparsed_multimodal_false_when_all_text() -> None:
items = [{"type": "text", "text": "hi"}]
assert availability.has_unparsed_multimodal(items) is False
def test_has_unparsed_multimodal_false_when_already_parsed() -> None:
items = [{"type": "image", "uri": "x", "parsed_content": "ocr"}]
assert availability.has_unparsed_multimodal(items) is False
def test_require_multimodal_raises_when_unavailable(
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(availability, "multimodal_available", lambda: False)
with pytest.raises(MultimodalNotEnabledError):
availability.require_multimodal()
def test_require_multimodal_ok_when_available(
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(availability, "multimodal_available", lambda: True)
availability.require_multimodal() # must not raise

View File

@ -0,0 +1,183 @@
"""Tests for enrich_content_items (everalgo.parser.aparse is monkeypatched)."""
from __future__ import annotations
import base64
from typing import Any
import pytest
# ``everalgo.parser`` ships under the ``[multimodal]`` extra (see
# pyproject.toml). CI doesn't install that extra by default, and these
# tests monkeypatch ``everalgo.parser.aparse`` — which requires the
# module to actually be importable, otherwise ``monkeypatch.setattr``
# fails at resolve-time. Skip the whole module when the optional
# dependency isn't present; we still run when ``multimodal`` is installed.
pytest.importorskip("everalgo.parser")
from everalgo.llm import LLMError # noqa: E402
from everalgo.types import ParsedContent # noqa: E402
from everos.core.errors import UnsupportedModalityError # noqa: E402
from everos.memory.extract.parser import enrich_content_items # noqa: E402
def _img_item() -> dict[str, Any]:
return {
"type": "image",
"base64": base64.b64encode(b"\x89PNG").decode(),
"ext": "png",
}
def _html_b64_item() -> dict[str, Any]:
return {
"type": "html",
"base64": base64.b64encode(b"<html><body>v9.9.9</body></html>").decode(),
"ext": "html",
}
def _html_uri_item() -> dict[str, Any]:
return {"type": "html", "uri": "https://example.com/page.html"}
async def test_enrich_backfills_parsed_content(
monkeypatch: pytest.MonkeyPatch,
) -> None:
async def fake_aparse(raw_file: Any, *, llm: Any) -> ParsedContent:
return ParsedContent(text="OCR RESULT")
monkeypatch.setattr("everalgo.parser.aparse", fake_aparse)
items: list[dict[str, Any]] = [{"type": "text", "text": "hi"}, _img_item()]
await enrich_content_items(items, llm=object(), max_concurrency=2)
assert items[1]["parsed_content"] == "OCR RESULT"
assert items[1]["parse_status"] == "success"
assert "parsed_content" not in items[0] # text item untouched
async def test_enrich_unsupported_modality_raises(
monkeypatch: pytest.MonkeyPatch,
) -> None:
async def fake_aparse(raw_file: Any, *, llm: Any) -> ParsedContent:
raise NotImplementedError("video deferred")
monkeypatch.setattr("everalgo.parser.aparse", fake_aparse)
with pytest.raises(UnsupportedModalityError):
await enrich_content_items([_img_item()], llm=object())
async def test_enrich_transient_llm_error_degrades(
monkeypatch: pytest.MonkeyPatch,
) -> None:
async def fake_aparse(raw_file: Any, *, llm: Any) -> ParsedContent:
raise LLMError("provider down")
monkeypatch.setattr("everalgo.parser.aparse", fake_aparse)
items = [_img_item()]
await enrich_content_items(items, llm=object()) # must not raise
assert items[0]["parse_status"] == "failed"
assert "parsed_content" not in items[0]
async def test_enrich_html_base64_routes_as_html_bytes(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""A type=html base64 item reaches the parser as html-extension bytes.
Locks the "normal HTML file call" contract: base64 + ext=html maps to
a RawFile the parser dispatches as HTML (vs the 415 that a text-only
html item produces — see test_ingest for that negative path).
"""
seen: dict[str, Any] = {}
async def fake_aparse(raw_file: Any, *, llm: Any) -> ParsedContent:
seen["extension"] = raw_file.extension
seen["content"] = raw_file.content
return ParsedContent(text="HTML PARSED")
monkeypatch.setattr("everalgo.parser.aparse", fake_aparse)
items = [_html_b64_item()]
await enrich_content_items(items, llm=object())
assert items[0]["parsed_content"] == "HTML PARSED"
assert items[0]["parse_status"] == "success"
assert seen["extension"] == "html"
assert b"v9.9.9" in seen["content"]
async def test_enrich_http_uri_routes_as_uri(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""An http(s) uri item reaches the parser as a uri RawFile (no bytes).
Proves everos forwards uri-backed items to the parser, which is what
drives everalgo's URL-fetch dispatch path (http/https only; file:// is
rejected downstream).
"""
seen: dict[str, Any] = {}
async def fake_aparse(raw_file: Any, *, llm: Any) -> ParsedContent:
seen["uri"] = raw_file.uri
seen["content"] = raw_file.content
return ParsedContent(text="URL PARSED")
monkeypatch.setattr("everalgo.parser.aparse", fake_aparse)
items = [_html_uri_item()]
await enrich_content_items(items, llm=object())
assert items[0]["parsed_content"] == "URL PARSED"
assert items[0]["parse_status"] == "success"
assert seen["uri"] == "https://example.com/page.html"
assert seen["content"] == b""
async def test_enrich_html_text_only_raises_unsupported(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""type=html carrying only ``text`` (no uri/base64) is undispatchable.
Any non-text item is routed to the parser, which needs a fetchable or
decodable payload; a bare ``text`` has neither, so it surfaces as a
MultimodalError (the route maps it to HTTP 415). To inline HTML *as
text*, callers must use ``type="text"`` instead.
"""
async def fake_aparse(raw_file: Any, *, llm: Any) -> ParsedContent:
return ParsedContent(text="should-not-be-reached")
monkeypatch.setattr("everalgo.parser.aparse", fake_aparse)
with pytest.raises(UnsupportedModalityError):
await enrich_content_items(
[{"type": "html", "text": "<p>hi</p>"}], llm=object()
)
async def test_enrich_file_uri_hydrates_and_parses(
monkeypatch: pytest.MonkeyPatch,
tmp_path: Any,
) -> None:
"""A ``file://`` item is read locally and handed to the parser as bytes.
Proves EverOS hydrates the file (everalgo never sees the path / fs) — the
parser receives ``content`` bytes, not a uri.
"""
seen: dict[str, Any] = {}
async def fake_aparse(raw_file: Any, *, llm: Any) -> ParsedContent:
seen["content"] = raw_file.content
seen["uri"] = raw_file.uri
return ParsedContent(text="FILE PARSED")
monkeypatch.setattr("everalgo.parser.aparse", fake_aparse)
f = tmp_path / "doc.html"
f.write_bytes(b"<html>hello</html>")
items = [{"type": "html", "uri": f"file://{f}"}]
await enrich_content_items(items, llm=object())
assert items[0]["parsed_content"] == "FILE PARSED"
assert items[0]["parse_status"] == "success"
assert seen["content"] == b"<html>hello</html>" # hydrated, not a pointer
assert seen["uri"] == ""

View File

@ -0,0 +1,105 @@
"""Tests for ContentItem -> everalgo RawFile mapping + file:// hydration."""
from __future__ import annotations
import base64
from pathlib import Path
import pytest
from everos.config import load_settings
from everos.memory.extract.parser.mapping import build_raw_file, to_raw_file
@pytest.fixture(autouse=True)
def _clear_settings_cache():
"""file:// guardrails read settings; keep the lru_cache from leaking
env overrides across tests."""
load_settings.cache_clear()
yield
load_settings.cache_clear()
def test_uri_item_maps_to_rawfile_uri() -> None:
rf = to_raw_file({"type": "image", "uri": "https://x/y.png"})
assert rf.uri == "https://x/y.png"
assert rf.content == b""
def test_base64_item_decodes_and_lowercases_extension() -> None:
raw = b"\x89PNG\r\n"
rf = to_raw_file(
{"type": "image", "base64": base64.b64encode(raw).decode(), "ext": ".PNG"}
)
assert rf.content == raw
assert rf.extension == "png"
def test_item_without_uri_or_base64_raises() -> None:
with pytest.raises(ValueError):
to_raw_file({"type": "image"})
# ── build_raw_file: file:// hydration + guardrails ──────────────────────
async def test_build_raw_file_delegates_http_uri() -> None:
"""http(s) uris stay in uri form (everalgo fetches), not hydrated."""
rf = await build_raw_file({"type": "html", "uri": "https://example.com"})
assert rf.uri == "https://example.com"
assert rf.content == b""
async def test_build_raw_file_hydrates_file_uri(tmp_path: Path) -> None:
"""file:// is read locally into a hydrated RawFile (content + ext)."""
f = tmp_path / "notes.html"
f.write_bytes(b"<html><body>v9.9.9</body></html>")
rf = await build_raw_file({"type": "html", "uri": f"file://{f}"})
assert rf.content == b"<html><body>v9.9.9</body></html>"
assert rf.extension == "html"
assert rf.uri == "" # hydrated, not a pointer
async def test_build_raw_file_file_uri_ext_hint_wins(tmp_path: Path) -> None:
f = tmp_path / "blob" # no suffix
f.write_bytes(b"%PDF-1.4 ...")
rf = await build_raw_file({"type": "pdf", "uri": f"file://{f}", "ext": "pdf"})
assert rf.extension == "pdf"
async def test_build_raw_file_missing_file_raises(tmp_path: Path) -> None:
with pytest.raises(ValueError):
await build_raw_file({"type": "pdf", "uri": f"file://{tmp_path}/nope.pdf"})
async def test_build_raw_file_oversize_raises(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
f = tmp_path / "big.html"
f.write_bytes(b"x" * 100)
monkeypatch.setenv("EVEROS_MULTIMODAL__FILE_URI_MAX_BYTES", "10")
load_settings.cache_clear()
with pytest.raises(ValueError, match="too large"):
await build_raw_file({"type": "html", "uri": f"file://{f}"})
async def test_build_raw_file_outside_allowlist_raises(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
f = tmp_path / "secret.html"
f.write_bytes(b"<html></html>")
monkeypatch.setenv("EVEROS_MULTIMODAL__FILE_URI_ALLOW_DIRS", '["/some/other/root"]')
load_settings.cache_clear()
with pytest.raises(ValueError, match="outside the allowed roots"):
await build_raw_file({"type": "html", "uri": f"file://{f}"})
async def test_build_raw_file_inside_allowlist_ok(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
f = tmp_path / "ok.html"
f.write_bytes(b"<html>ok</html>")
monkeypatch.setenv("EVEROS_MULTIMODAL__FILE_URI_ALLOW_DIRS", f'["{tmp_path}"]')
load_settings.cache_clear()
rf = await build_raw_file({"type": "html", "uri": f"file://{f}"})
assert rf.content == b"<html>ok</html>"

View File

@ -0,0 +1,61 @@
"""``AgentMemoryPipeline.run`` — empty short-circuit + per-cell event emit."""
from __future__ import annotations
from everalgo.types import ChatMessage, MemCell
from everos.memory import IngestResult
from everos.memory.events import AgentPipelineStarted
from everos.memory.extract.pipeline.agent_memory import AgentMemoryPipeline
class _FakeEngine:
"""Captures emitted events; mirrors ``OfflineEngine.emit`` async signature."""
def __init__(self) -> None:
self.events: list[AgentPipelineStarted] = []
async def emit(self, event: AgentPipelineStarted) -> None:
self.events.append(event)
def _make_cell(n_items: int, ts: int = 1_700_000_000_000) -> MemCell:
items = [
ChatMessage(
id=f"m{i}",
role="user",
sender_id="u1",
sender_name="u",
content="hi",
timestamp=ts,
)
for i in range(n_items)
]
return MemCell(items=items, timestamp=ts)
async def test_empty_cells_short_circuit() -> None:
engine = _FakeEngine()
pipeline = AgentMemoryPipeline(engine) # type: ignore[arg-type]
ingested = IngestResult(session_id="s1", messages=[])
out = await pipeline.run(ingested, cells=[], memcell_ids=[])
assert out.track == "agent_memory"
assert out.status == "accumulated"
assert out.message_count == 0
assert engine.events == []
async def test_emits_one_event_per_cell() -> None:
engine = _FakeEngine()
pipeline = AgentMemoryPipeline(engine) # type: ignore[arg-type]
ingested = IngestResult(session_id="s1", messages=[])
cells = [_make_cell(n_items=2), _make_cell(n_items=3)]
memcell_ids = ["mc_a", "mc_b"]
out = await pipeline.run(ingested, cells=cells, memcell_ids=memcell_ids)
assert out.track == "agent_memory"
assert out.status == "extracted"
assert out.message_count == 5 # 2 + 3
assert [e.memcell_id for e in engine.events] == ["mc_a", "mc_b"]
assert all(e.session_id == "s1" for e in engine.events)
assert all(isinstance(e, AgentPipelineStarted) for e in engine.events)

View File

@ -0,0 +1,123 @@
from __future__ import annotations
import datetime as _dt
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from everalgo.types import ChatMessage, MemCell
from everalgo.types import Episode as AlgoEpisode
from everos.core.persistence import EntryId
from everos.memory import IngestResult
from everos.memory.events import EpisodeExtracted, UserPipelineStarted
from everos.memory.extract.pipeline.user_memory import UserMemoryPipeline
from everos.memory.models import CanonicalMessage
def _sample_memcell() -> MemCell:
return MemCell(
items=[
ChatMessage(
id="m1",
role="user",
content="hello",
timestamp=1_700_000_000_000,
sender_id="u1",
),
],
timestamp=1_700_000_000_000,
)
class _CapturingEngine:
def __init__(self) -> None:
self.emitted: list[object] = []
async def emit(self, event: object) -> None:
self.emitted.append(event)
async def test_emit_pipeline_started_routes_through_engine() -> None:
engine = _CapturingEngine()
pipeline = UserMemoryPipeline(
episode_writer=MagicMock(),
prompt_loader=MagicMock(),
llm_client=MagicMock(),
engine=engine,
)
cell = _sample_memcell()
await pipeline._emit_pipeline_started( # noqa: SLF001 — test introspection
memcell_id="mc_a",
session_id="s1",
app_id="claude_code",
project_id="oss",
cell=cell,
)
started = [e for e in engine.emitted if isinstance(e, UserPipelineStarted)]
assert len(started) == 1
assert started[0].memcell_id == "mc_a"
assert started[0].session_id == "s1"
assert started[0].app_id == "claude_code"
assert started[0].project_id == "oss"
assert started[0].memcell is cell
@pytest.mark.asyncio
async def test_emit_episode_extracted_after_md_write() -> None:
"""Each per-sender Episode write emits EpisodeExtracted with the md entry id."""
engine = _CapturingEngine()
episode_writer = MagicMock()
episode_writer.append_entry = AsyncMock(
return_value=EntryId(prefix="ep", date=_dt.date(2026, 5, 17), seq=1)
)
episode_writer.path_for = MagicMock(
return_value="users/u1/episodes/episode-2026-05-17.md"
)
prompt_loader = MagicMock()
prompt_loader.load = MagicMock(return_value="<prompt>")
llm_client = MagicMock()
pipeline = UserMemoryPipeline(
episode_writer=episode_writer,
prompt_loader=prompt_loader,
llm_client=llm_client,
engine=engine,
)
cell = _sample_memcell()
ingested = IngestResult(
session_id="s1",
messages=[
CanonicalMessage(
message_id="m1",
session_id="s1",
sender_id="u1",
role="user",
timestamp=_dt.datetime.fromtimestamp(1_700_000_000, tz=_dt.UTC),
text="hello",
)
],
)
algo_ep = AlgoEpisode(
owner_id="u1", episode="they said hello", timestamp=1_700_000_000_000
)
with patch.object( # noqa: SLF001
pipeline._ep_ext, "aextract", new=AsyncMock(return_value=algo_ep)
):
outcome = await pipeline.run(
ingested=ingested,
cells=[cell],
memcell_ids=["mc_a"],
per_cell_all_senders=[["u1"]],
)
assert outcome.status == "extracted"
extracted = [e for e in engine.emitted if isinstance(e, EpisodeExtracted)]
assert len(extracted) == 1
assert extracted[0].memcell_id == "mc_a"
assert extracted[0].episode_entry_id == "ep_20260517_00000001"
assert extracted[0].episode_text == "they said hello"
assert extracted[0].episode_timestamp_ms == 1_700_000_000_000
assert extracted[0].owner_id == "u1"