chore: initialize EverOS 1.0.0
md-first memory extraction framework for AI agents. Markdown is the single source of truth; SQLite holds state and LanceDB provides the rebuildable vector + BM25 + scalar index. The codebase follows a single-direction DDD layering (entrypoints -> service -> memory -> infra, with component / core / config cross-cutting) enforced by import-linter. Engineering surface: - Coding conventions in .claude/rules/ (path-scoped) and workflows in .claude/skills/ (/commit, /new-branch, /pr). - GitHub Actions CI runs make lint + test + integration; pre-commit mirrors the gates locally (ruff, hygiene hooks, gitlint commit-msg). - Commit messages follow Conventional Commits, enforced by gitlint. - make lint also enforces datetime two-zone discipline and OpenAPI drift.
This commit is contained in:
@ -0,0 +1,38 @@
|
||||
"""Tests for the multimodal capability guard."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from everos.core.errors import MultimodalNotEnabledError
|
||||
from everos.memory.extract.parser import availability
|
||||
|
||||
|
||||
def test_has_unparsed_multimodal_true_for_unparsed_nontext() -> None:
|
||||
items = [{"type": "text", "text": "hi"}, {"type": "image", "uri": "x"}]
|
||||
assert availability.has_unparsed_multimodal(items) is True
|
||||
|
||||
|
||||
def test_has_unparsed_multimodal_false_when_all_text() -> None:
|
||||
items = [{"type": "text", "text": "hi"}]
|
||||
assert availability.has_unparsed_multimodal(items) is False
|
||||
|
||||
|
||||
def test_has_unparsed_multimodal_false_when_already_parsed() -> None:
|
||||
items = [{"type": "image", "uri": "x", "parsed_content": "ocr"}]
|
||||
assert availability.has_unparsed_multimodal(items) is False
|
||||
|
||||
|
||||
def test_require_multimodal_raises_when_unavailable(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setattr(availability, "multimodal_available", lambda: False)
|
||||
with pytest.raises(MultimodalNotEnabledError):
|
||||
availability.require_multimodal()
|
||||
|
||||
|
||||
def test_require_multimodal_ok_when_available(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setattr(availability, "multimodal_available", lambda: True)
|
||||
availability.require_multimodal() # must not raise
|
||||
183
tests/unit/test_memory/test_extract/test_parser/test_enrich.py
Normal file
183
tests/unit/test_memory/test_extract/test_parser/test_enrich.py
Normal file
@ -0,0 +1,183 @@
|
||||
"""Tests for enrich_content_items (everalgo.parser.aparse is monkeypatched)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
|
||||
# ``everalgo.parser`` ships under the ``[multimodal]`` extra (see
|
||||
# pyproject.toml). CI doesn't install that extra by default, and these
|
||||
# tests monkeypatch ``everalgo.parser.aparse`` — which requires the
|
||||
# module to actually be importable, otherwise ``monkeypatch.setattr``
|
||||
# fails at resolve-time. Skip the whole module when the optional
|
||||
# dependency isn't present; we still run when ``multimodal`` is installed.
|
||||
pytest.importorskip("everalgo.parser")
|
||||
|
||||
from everalgo.llm import LLMError # noqa: E402
|
||||
from everalgo.types import ParsedContent # noqa: E402
|
||||
|
||||
from everos.core.errors import UnsupportedModalityError # noqa: E402
|
||||
from everos.memory.extract.parser import enrich_content_items # noqa: E402
|
||||
|
||||
|
||||
def _img_item() -> dict[str, Any]:
|
||||
return {
|
||||
"type": "image",
|
||||
"base64": base64.b64encode(b"\x89PNG").decode(),
|
||||
"ext": "png",
|
||||
}
|
||||
|
||||
|
||||
def _html_b64_item() -> dict[str, Any]:
|
||||
return {
|
||||
"type": "html",
|
||||
"base64": base64.b64encode(b"<html><body>v9.9.9</body></html>").decode(),
|
||||
"ext": "html",
|
||||
}
|
||||
|
||||
|
||||
def _html_uri_item() -> dict[str, Any]:
|
||||
return {"type": "html", "uri": "https://example.com/page.html"}
|
||||
|
||||
|
||||
async def test_enrich_backfills_parsed_content(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
async def fake_aparse(raw_file: Any, *, llm: Any) -> ParsedContent:
|
||||
return ParsedContent(text="OCR RESULT")
|
||||
|
||||
monkeypatch.setattr("everalgo.parser.aparse", fake_aparse)
|
||||
items: list[dict[str, Any]] = [{"type": "text", "text": "hi"}, _img_item()]
|
||||
await enrich_content_items(items, llm=object(), max_concurrency=2)
|
||||
|
||||
assert items[1]["parsed_content"] == "OCR RESULT"
|
||||
assert items[1]["parse_status"] == "success"
|
||||
assert "parsed_content" not in items[0] # text item untouched
|
||||
|
||||
|
||||
async def test_enrich_unsupported_modality_raises(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
async def fake_aparse(raw_file: Any, *, llm: Any) -> ParsedContent:
|
||||
raise NotImplementedError("video deferred")
|
||||
|
||||
monkeypatch.setattr("everalgo.parser.aparse", fake_aparse)
|
||||
with pytest.raises(UnsupportedModalityError):
|
||||
await enrich_content_items([_img_item()], llm=object())
|
||||
|
||||
|
||||
async def test_enrich_transient_llm_error_degrades(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
async def fake_aparse(raw_file: Any, *, llm: Any) -> ParsedContent:
|
||||
raise LLMError("provider down")
|
||||
|
||||
monkeypatch.setattr("everalgo.parser.aparse", fake_aparse)
|
||||
items = [_img_item()]
|
||||
await enrich_content_items(items, llm=object()) # must not raise
|
||||
|
||||
assert items[0]["parse_status"] == "failed"
|
||||
assert "parsed_content" not in items[0]
|
||||
|
||||
|
||||
async def test_enrich_html_base64_routes_as_html_bytes(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""A type=html base64 item reaches the parser as html-extension bytes.
|
||||
|
||||
Locks the "normal HTML file call" contract: base64 + ext=html maps to
|
||||
a RawFile the parser dispatches as HTML (vs the 415 that a text-only
|
||||
html item produces — see test_ingest for that negative path).
|
||||
"""
|
||||
seen: dict[str, Any] = {}
|
||||
|
||||
async def fake_aparse(raw_file: Any, *, llm: Any) -> ParsedContent:
|
||||
seen["extension"] = raw_file.extension
|
||||
seen["content"] = raw_file.content
|
||||
return ParsedContent(text="HTML PARSED")
|
||||
|
||||
monkeypatch.setattr("everalgo.parser.aparse", fake_aparse)
|
||||
items = [_html_b64_item()]
|
||||
await enrich_content_items(items, llm=object())
|
||||
|
||||
assert items[0]["parsed_content"] == "HTML PARSED"
|
||||
assert items[0]["parse_status"] == "success"
|
||||
assert seen["extension"] == "html"
|
||||
assert b"v9.9.9" in seen["content"]
|
||||
|
||||
|
||||
async def test_enrich_http_uri_routes_as_uri(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""An http(s) uri item reaches the parser as a uri RawFile (no bytes).
|
||||
|
||||
Proves everos forwards uri-backed items to the parser, which is what
|
||||
drives everalgo's URL-fetch dispatch path (http/https only; file:// is
|
||||
rejected downstream).
|
||||
"""
|
||||
seen: dict[str, Any] = {}
|
||||
|
||||
async def fake_aparse(raw_file: Any, *, llm: Any) -> ParsedContent:
|
||||
seen["uri"] = raw_file.uri
|
||||
seen["content"] = raw_file.content
|
||||
return ParsedContent(text="URL PARSED")
|
||||
|
||||
monkeypatch.setattr("everalgo.parser.aparse", fake_aparse)
|
||||
items = [_html_uri_item()]
|
||||
await enrich_content_items(items, llm=object())
|
||||
|
||||
assert items[0]["parsed_content"] == "URL PARSED"
|
||||
assert items[0]["parse_status"] == "success"
|
||||
assert seen["uri"] == "https://example.com/page.html"
|
||||
assert seen["content"] == b""
|
||||
|
||||
|
||||
async def test_enrich_html_text_only_raises_unsupported(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""type=html carrying only ``text`` (no uri/base64) is undispatchable.
|
||||
|
||||
Any non-text item is routed to the parser, which needs a fetchable or
|
||||
decodable payload; a bare ``text`` has neither, so it surfaces as a
|
||||
MultimodalError (the route maps it to HTTP 415). To inline HTML *as
|
||||
text*, callers must use ``type="text"`` instead.
|
||||
"""
|
||||
|
||||
async def fake_aparse(raw_file: Any, *, llm: Any) -> ParsedContent:
|
||||
return ParsedContent(text="should-not-be-reached")
|
||||
|
||||
monkeypatch.setattr("everalgo.parser.aparse", fake_aparse)
|
||||
with pytest.raises(UnsupportedModalityError):
|
||||
await enrich_content_items(
|
||||
[{"type": "html", "text": "<p>hi</p>"}], llm=object()
|
||||
)
|
||||
|
||||
|
||||
async def test_enrich_file_uri_hydrates_and_parses(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
tmp_path: Any,
|
||||
) -> None:
|
||||
"""A ``file://`` item is read locally and handed to the parser as bytes.
|
||||
|
||||
Proves EverOS hydrates the file (everalgo never sees the path / fs) — the
|
||||
parser receives ``content`` bytes, not a uri.
|
||||
"""
|
||||
seen: dict[str, Any] = {}
|
||||
|
||||
async def fake_aparse(raw_file: Any, *, llm: Any) -> ParsedContent:
|
||||
seen["content"] = raw_file.content
|
||||
seen["uri"] = raw_file.uri
|
||||
return ParsedContent(text="FILE PARSED")
|
||||
|
||||
monkeypatch.setattr("everalgo.parser.aparse", fake_aparse)
|
||||
f = tmp_path / "doc.html"
|
||||
f.write_bytes(b"<html>hello</html>")
|
||||
items = [{"type": "html", "uri": f"file://{f}"}]
|
||||
await enrich_content_items(items, llm=object())
|
||||
|
||||
assert items[0]["parsed_content"] == "FILE PARSED"
|
||||
assert items[0]["parse_status"] == "success"
|
||||
assert seen["content"] == b"<html>hello</html>" # hydrated, not a pointer
|
||||
assert seen["uri"] == ""
|
||||
105
tests/unit/test_memory/test_extract/test_parser/test_mapping.py
Normal file
105
tests/unit/test_memory/test_extract/test_parser/test_mapping.py
Normal file
@ -0,0 +1,105 @@
|
||||
"""Tests for ContentItem -> everalgo RawFile mapping + file:// hydration."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from everos.config import load_settings
|
||||
from everos.memory.extract.parser.mapping import build_raw_file, to_raw_file
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _clear_settings_cache():
|
||||
"""file:// guardrails read settings; keep the lru_cache from leaking
|
||||
env overrides across tests."""
|
||||
load_settings.cache_clear()
|
||||
yield
|
||||
load_settings.cache_clear()
|
||||
|
||||
|
||||
def test_uri_item_maps_to_rawfile_uri() -> None:
|
||||
rf = to_raw_file({"type": "image", "uri": "https://x/y.png"})
|
||||
assert rf.uri == "https://x/y.png"
|
||||
assert rf.content == b""
|
||||
|
||||
|
||||
def test_base64_item_decodes_and_lowercases_extension() -> None:
|
||||
raw = b"\x89PNG\r\n"
|
||||
rf = to_raw_file(
|
||||
{"type": "image", "base64": base64.b64encode(raw).decode(), "ext": ".PNG"}
|
||||
)
|
||||
assert rf.content == raw
|
||||
assert rf.extension == "png"
|
||||
|
||||
|
||||
def test_item_without_uri_or_base64_raises() -> None:
|
||||
with pytest.raises(ValueError):
|
||||
to_raw_file({"type": "image"})
|
||||
|
||||
|
||||
# ── build_raw_file: file:// hydration + guardrails ──────────────────────
|
||||
|
||||
|
||||
async def test_build_raw_file_delegates_http_uri() -> None:
|
||||
"""http(s) uris stay in uri form (everalgo fetches), not hydrated."""
|
||||
rf = await build_raw_file({"type": "html", "uri": "https://example.com"})
|
||||
assert rf.uri == "https://example.com"
|
||||
assert rf.content == b""
|
||||
|
||||
|
||||
async def test_build_raw_file_hydrates_file_uri(tmp_path: Path) -> None:
|
||||
"""file:// is read locally into a hydrated RawFile (content + ext)."""
|
||||
f = tmp_path / "notes.html"
|
||||
f.write_bytes(b"<html><body>v9.9.9</body></html>")
|
||||
rf = await build_raw_file({"type": "html", "uri": f"file://{f}"})
|
||||
assert rf.content == b"<html><body>v9.9.9</body></html>"
|
||||
assert rf.extension == "html"
|
||||
assert rf.uri == "" # hydrated, not a pointer
|
||||
|
||||
|
||||
async def test_build_raw_file_file_uri_ext_hint_wins(tmp_path: Path) -> None:
|
||||
f = tmp_path / "blob" # no suffix
|
||||
f.write_bytes(b"%PDF-1.4 ...")
|
||||
rf = await build_raw_file({"type": "pdf", "uri": f"file://{f}", "ext": "pdf"})
|
||||
assert rf.extension == "pdf"
|
||||
|
||||
|
||||
async def test_build_raw_file_missing_file_raises(tmp_path: Path) -> None:
|
||||
with pytest.raises(ValueError):
|
||||
await build_raw_file({"type": "pdf", "uri": f"file://{tmp_path}/nope.pdf"})
|
||||
|
||||
|
||||
async def test_build_raw_file_oversize_raises(
|
||||
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
f = tmp_path / "big.html"
|
||||
f.write_bytes(b"x" * 100)
|
||||
monkeypatch.setenv("EVEROS_MULTIMODAL__FILE_URI_MAX_BYTES", "10")
|
||||
load_settings.cache_clear()
|
||||
with pytest.raises(ValueError, match="too large"):
|
||||
await build_raw_file({"type": "html", "uri": f"file://{f}"})
|
||||
|
||||
|
||||
async def test_build_raw_file_outside_allowlist_raises(
|
||||
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
f = tmp_path / "secret.html"
|
||||
f.write_bytes(b"<html></html>")
|
||||
monkeypatch.setenv("EVEROS_MULTIMODAL__FILE_URI_ALLOW_DIRS", '["/some/other/root"]')
|
||||
load_settings.cache_clear()
|
||||
with pytest.raises(ValueError, match="outside the allowed roots"):
|
||||
await build_raw_file({"type": "html", "uri": f"file://{f}"})
|
||||
|
||||
|
||||
async def test_build_raw_file_inside_allowlist_ok(
|
||||
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
f = tmp_path / "ok.html"
|
||||
f.write_bytes(b"<html>ok</html>")
|
||||
monkeypatch.setenv("EVEROS_MULTIMODAL__FILE_URI_ALLOW_DIRS", f'["{tmp_path}"]')
|
||||
load_settings.cache_clear()
|
||||
rf = await build_raw_file({"type": "html", "uri": f"file://{f}"})
|
||||
assert rf.content == b"<html>ok</html>"
|
||||
Reference in New Issue
Block a user