chore: initialize EverOS 1.0.0

md-first memory extraction framework for AI agents.

Markdown is the single source of truth; SQLite holds state and LanceDB
provides the rebuildable vector + BM25 + scalar index. The codebase follows
a single-direction DDD layering (entrypoints -> service -> memory -> infra,
with component / core / config cross-cutting) enforced by import-linter.

Engineering surface:
- Coding conventions in .claude/rules/ (path-scoped) and workflows in
  .claude/skills/ (/commit, /new-branch, /pr).
- GitHub Actions CI runs make lint + test + integration; pre-commit mirrors
  the gates locally (ruff, hygiene hooks, gitlint commit-msg).
- Commit messages follow Conventional Commits, enforced by gitlint.
- make lint also enforces datetime two-zone discipline and OpenAPI drift.
This commit is contained in:
Elliot Chen
2026-06-05 22:35:51 +08:00
commit 518b8eca85
636 changed files with 160553 additions and 0 deletions

View File

@ -0,0 +1,38 @@
"""Tests for the multimodal capability guard."""
from __future__ import annotations
import pytest
from everos.core.errors import MultimodalNotEnabledError
from everos.memory.extract.parser import availability
def test_has_unparsed_multimodal_true_for_unparsed_nontext() -> None:
items = [{"type": "text", "text": "hi"}, {"type": "image", "uri": "x"}]
assert availability.has_unparsed_multimodal(items) is True
def test_has_unparsed_multimodal_false_when_all_text() -> None:
items = [{"type": "text", "text": "hi"}]
assert availability.has_unparsed_multimodal(items) is False
def test_has_unparsed_multimodal_false_when_already_parsed() -> None:
items = [{"type": "image", "uri": "x", "parsed_content": "ocr"}]
assert availability.has_unparsed_multimodal(items) is False
def test_require_multimodal_raises_when_unavailable(
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(availability, "multimodal_available", lambda: False)
with pytest.raises(MultimodalNotEnabledError):
availability.require_multimodal()
def test_require_multimodal_ok_when_available(
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(availability, "multimodal_available", lambda: True)
availability.require_multimodal() # must not raise

View File

@ -0,0 +1,183 @@
"""Tests for enrich_content_items (everalgo.parser.aparse is monkeypatched)."""
from __future__ import annotations
import base64
from typing import Any
import pytest
# ``everalgo.parser`` ships under the ``[multimodal]`` extra (see
# pyproject.toml). CI doesn't install that extra by default, and these
# tests monkeypatch ``everalgo.parser.aparse`` — which requires the
# module to actually be importable, otherwise ``monkeypatch.setattr``
# fails at resolve-time. Skip the whole module when the optional
# dependency isn't present; we still run when ``multimodal`` is installed.
pytest.importorskip("everalgo.parser")
from everalgo.llm import LLMError # noqa: E402
from everalgo.types import ParsedContent # noqa: E402
from everos.core.errors import UnsupportedModalityError # noqa: E402
from everos.memory.extract.parser import enrich_content_items # noqa: E402
def _img_item() -> dict[str, Any]:
return {
"type": "image",
"base64": base64.b64encode(b"\x89PNG").decode(),
"ext": "png",
}
def _html_b64_item() -> dict[str, Any]:
return {
"type": "html",
"base64": base64.b64encode(b"<html><body>v9.9.9</body></html>").decode(),
"ext": "html",
}
def _html_uri_item() -> dict[str, Any]:
return {"type": "html", "uri": "https://example.com/page.html"}
async def test_enrich_backfills_parsed_content(
monkeypatch: pytest.MonkeyPatch,
) -> None:
async def fake_aparse(raw_file: Any, *, llm: Any) -> ParsedContent:
return ParsedContent(text="OCR RESULT")
monkeypatch.setattr("everalgo.parser.aparse", fake_aparse)
items: list[dict[str, Any]] = [{"type": "text", "text": "hi"}, _img_item()]
await enrich_content_items(items, llm=object(), max_concurrency=2)
assert items[1]["parsed_content"] == "OCR RESULT"
assert items[1]["parse_status"] == "success"
assert "parsed_content" not in items[0] # text item untouched
async def test_enrich_unsupported_modality_raises(
monkeypatch: pytest.MonkeyPatch,
) -> None:
async def fake_aparse(raw_file: Any, *, llm: Any) -> ParsedContent:
raise NotImplementedError("video deferred")
monkeypatch.setattr("everalgo.parser.aparse", fake_aparse)
with pytest.raises(UnsupportedModalityError):
await enrich_content_items([_img_item()], llm=object())
async def test_enrich_transient_llm_error_degrades(
monkeypatch: pytest.MonkeyPatch,
) -> None:
async def fake_aparse(raw_file: Any, *, llm: Any) -> ParsedContent:
raise LLMError("provider down")
monkeypatch.setattr("everalgo.parser.aparse", fake_aparse)
items = [_img_item()]
await enrich_content_items(items, llm=object()) # must not raise
assert items[0]["parse_status"] == "failed"
assert "parsed_content" not in items[0]
async def test_enrich_html_base64_routes_as_html_bytes(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""A type=html base64 item reaches the parser as html-extension bytes.
Locks the "normal HTML file call" contract: base64 + ext=html maps to
a RawFile the parser dispatches as HTML (vs the 415 that a text-only
html item produces — see test_ingest for that negative path).
"""
seen: dict[str, Any] = {}
async def fake_aparse(raw_file: Any, *, llm: Any) -> ParsedContent:
seen["extension"] = raw_file.extension
seen["content"] = raw_file.content
return ParsedContent(text="HTML PARSED")
monkeypatch.setattr("everalgo.parser.aparse", fake_aparse)
items = [_html_b64_item()]
await enrich_content_items(items, llm=object())
assert items[0]["parsed_content"] == "HTML PARSED"
assert items[0]["parse_status"] == "success"
assert seen["extension"] == "html"
assert b"v9.9.9" in seen["content"]
async def test_enrich_http_uri_routes_as_uri(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""An http(s) uri item reaches the parser as a uri RawFile (no bytes).
Proves everos forwards uri-backed items to the parser, which is what
drives everalgo's URL-fetch dispatch path (http/https only; file:// is
rejected downstream).
"""
seen: dict[str, Any] = {}
async def fake_aparse(raw_file: Any, *, llm: Any) -> ParsedContent:
seen["uri"] = raw_file.uri
seen["content"] = raw_file.content
return ParsedContent(text="URL PARSED")
monkeypatch.setattr("everalgo.parser.aparse", fake_aparse)
items = [_html_uri_item()]
await enrich_content_items(items, llm=object())
assert items[0]["parsed_content"] == "URL PARSED"
assert items[0]["parse_status"] == "success"
assert seen["uri"] == "https://example.com/page.html"
assert seen["content"] == b""
async def test_enrich_html_text_only_raises_unsupported(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""type=html carrying only ``text`` (no uri/base64) is undispatchable.
Any non-text item is routed to the parser, which needs a fetchable or
decodable payload; a bare ``text`` has neither, so it surfaces as a
MultimodalError (the route maps it to HTTP 415). To inline HTML *as
text*, callers must use ``type="text"`` instead.
"""
async def fake_aparse(raw_file: Any, *, llm: Any) -> ParsedContent:
return ParsedContent(text="should-not-be-reached")
monkeypatch.setattr("everalgo.parser.aparse", fake_aparse)
with pytest.raises(UnsupportedModalityError):
await enrich_content_items(
[{"type": "html", "text": "<p>hi</p>"}], llm=object()
)
async def test_enrich_file_uri_hydrates_and_parses(
monkeypatch: pytest.MonkeyPatch,
tmp_path: Any,
) -> None:
"""A ``file://`` item is read locally and handed to the parser as bytes.
Proves EverOS hydrates the file (everalgo never sees the path / fs) — the
parser receives ``content`` bytes, not a uri.
"""
seen: dict[str, Any] = {}
async def fake_aparse(raw_file: Any, *, llm: Any) -> ParsedContent:
seen["content"] = raw_file.content
seen["uri"] = raw_file.uri
return ParsedContent(text="FILE PARSED")
monkeypatch.setattr("everalgo.parser.aparse", fake_aparse)
f = tmp_path / "doc.html"
f.write_bytes(b"<html>hello</html>")
items = [{"type": "html", "uri": f"file://{f}"}]
await enrich_content_items(items, llm=object())
assert items[0]["parsed_content"] == "FILE PARSED"
assert items[0]["parse_status"] == "success"
assert seen["content"] == b"<html>hello</html>" # hydrated, not a pointer
assert seen["uri"] == ""

View File

@ -0,0 +1,105 @@
"""Tests for ContentItem -> everalgo RawFile mapping + file:// hydration."""
from __future__ import annotations
import base64
from pathlib import Path
import pytest
from everos.config import load_settings
from everos.memory.extract.parser.mapping import build_raw_file, to_raw_file
@pytest.fixture(autouse=True)
def _clear_settings_cache():
"""file:// guardrails read settings; keep the lru_cache from leaking
env overrides across tests."""
load_settings.cache_clear()
yield
load_settings.cache_clear()
def test_uri_item_maps_to_rawfile_uri() -> None:
rf = to_raw_file({"type": "image", "uri": "https://x/y.png"})
assert rf.uri == "https://x/y.png"
assert rf.content == b""
def test_base64_item_decodes_and_lowercases_extension() -> None:
raw = b"\x89PNG\r\n"
rf = to_raw_file(
{"type": "image", "base64": base64.b64encode(raw).decode(), "ext": ".PNG"}
)
assert rf.content == raw
assert rf.extension == "png"
def test_item_without_uri_or_base64_raises() -> None:
with pytest.raises(ValueError):
to_raw_file({"type": "image"})
# ── build_raw_file: file:// hydration + guardrails ──────────────────────
async def test_build_raw_file_delegates_http_uri() -> None:
"""http(s) uris stay in uri form (everalgo fetches), not hydrated."""
rf = await build_raw_file({"type": "html", "uri": "https://example.com"})
assert rf.uri == "https://example.com"
assert rf.content == b""
async def test_build_raw_file_hydrates_file_uri(tmp_path: Path) -> None:
"""file:// is read locally into a hydrated RawFile (content + ext)."""
f = tmp_path / "notes.html"
f.write_bytes(b"<html><body>v9.9.9</body></html>")
rf = await build_raw_file({"type": "html", "uri": f"file://{f}"})
assert rf.content == b"<html><body>v9.9.9</body></html>"
assert rf.extension == "html"
assert rf.uri == "" # hydrated, not a pointer
async def test_build_raw_file_file_uri_ext_hint_wins(tmp_path: Path) -> None:
f = tmp_path / "blob" # no suffix
f.write_bytes(b"%PDF-1.4 ...")
rf = await build_raw_file({"type": "pdf", "uri": f"file://{f}", "ext": "pdf"})
assert rf.extension == "pdf"
async def test_build_raw_file_missing_file_raises(tmp_path: Path) -> None:
with pytest.raises(ValueError):
await build_raw_file({"type": "pdf", "uri": f"file://{tmp_path}/nope.pdf"})
async def test_build_raw_file_oversize_raises(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
f = tmp_path / "big.html"
f.write_bytes(b"x" * 100)
monkeypatch.setenv("EVEROS_MULTIMODAL__FILE_URI_MAX_BYTES", "10")
load_settings.cache_clear()
with pytest.raises(ValueError, match="too large"):
await build_raw_file({"type": "html", "uri": f"file://{f}"})
async def test_build_raw_file_outside_allowlist_raises(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
f = tmp_path / "secret.html"
f.write_bytes(b"<html></html>")
monkeypatch.setenv("EVEROS_MULTIMODAL__FILE_URI_ALLOW_DIRS", '["/some/other/root"]')
load_settings.cache_clear()
with pytest.raises(ValueError, match="outside the allowed roots"):
await build_raw_file({"type": "html", "uri": f"file://{f}"})
async def test_build_raw_file_inside_allowlist_ok(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
f = tmp_path / "ok.html"
f.write_bytes(b"<html>ok</html>")
monkeypatch.setenv("EVEROS_MULTIMODAL__FILE_URI_ALLOW_DIRS", f'["{tmp_path}"]')
load_settings.cache_clear()
rf = await build_raw_file({"type": "html", "uri": f"file://{f}"})
assert rf.content == b"<html>ok</html>"