chore: initialize EverOS 1.0.0

md-first memory extraction framework for AI agents. Markdown is the single source of truth; SQLite holds state and LanceDB provides the rebuildable vector + BM25 + scalar index. The codebase follows a single-direction DDD layering (entrypoints -> service -> memory -> infra, with component / core / config cross-cutting) enforced by import-linter. Engineering surface: - Coding conventions in .claude/rules/ (path-scoped) and workflows in .claude/skills/ (/commit, /new-branch, /pr). - GitHub Actions CI runs make lint + test + integration; pre-commit mirrors the gates locally (ruff, hygiene hooks, gitlint commit-msg). - Commit messages follow Conventional Commits, enforced by gitlint. - make lint also enforces datetime two-zone discipline and OpenAPI drift.
2026-06-05 22:35:51 +08:00
commit 518b8eca85
636 changed files with 160553 additions and 0 deletions
--- a/tests/integration/test_memorize_agent_mode.py
+++ b/tests/integration/test_memorize_agent_mode.py
@ -0,0 +1,268 @@
+"""Agent-mode memorize integration tests.
+
+Covers the agent branches that ``test_memorize_integration.py`` skips:
+
+- :mod:`service.memorize` agent dispatch (asyncio.gather of user + agent
+  pipelines)
+- :mod:`service._boundary` agent-mode detection via
+  :class:`everalgo.agent_memory.AgentBoundaryDetector`
+- :mod:`memory.extract.pipeline.agent_memory.AgentMemoryPipeline` end-to-end
+
+Self-contained: the chat-baseline file keeps its fixture local, so we
+copy the minimum scaffolding rather than refactor it into a shared
+conftest.
+"""
+
+from __future__ import annotations
+
+import importlib
+import json
+import sqlite3
+from collections.abc import AsyncIterator, Callable
+from pathlib import Path
+from typing import Any
+from unittest.mock import AsyncMock
+
+import pytest
+import pytest_asyncio
+from everalgo.llm.types import ChatMessage as LLMChatMessage
+from everalgo.llm.types import ChatResponse
+from everalgo.testing.fake_llm import FakeLLMClient
+from sqlmodel import SQLModel
+
+from everos.core.persistence import MemoryRoot
+from everos.service.memorize import MemorizeResult, memorize
+
+
+def _boundary_response(boundaries: list[int]) -> str:
+    return json.dumps(
+        {"reasoning": "test", "boundaries": boundaries, "should_wait": False}
+    )
+
+
+def _make_fake_llm(boundary_responses: list[list[int]] | None = None) -> FakeLLMClient:
+    queue: list[list[int]] = list(boundary_responses or [])
+
+    def handler(messages: list[LLMChatMessage], **_: Any) -> ChatResponse:
+        prompt = messages[0].content
+        if "boundaries" in prompt.lower() or "memcell" in prompt.lower():
+            cuts = queue.pop(0) if queue else []
+            return ChatResponse(content=_boundary_response(cuts), model="fake")
+        return ChatResponse(
+            content=json.dumps({"title": "T", "content": "B"}), model="fake"
+        )
+
+    return FakeLLMClient(handler=handler)
+
+
+def _msg(
+    role: str,
+    content: str,
+    *,
+    sender_id: str = "u_alice",
+    timestamp: int = 1_700_000_000_000,
+    tool_calls: list[dict] | None = None,
+    tool_call_id: str | None = None,
+) -> dict[str, Any]:
+    out: dict[str, Any] = {
+        "sender_id": sender_id,
+        "role": role,
+        "content": content,
+        "timestamp": timestamp,
+    }
+    if tool_calls is not None:
+        out["tool_calls"] = tool_calls
+    if tool_call_id is not None:
+        out["tool_call_id"] = tool_call_id
+    return out
+
+
+def _user(content: str, ts: int, *, sender: str = "u_alice") -> dict[str, Any]:
+    return _msg("user", content, sender_id=sender, timestamp=ts)
+
+
+def _assistant(content: str, ts: int) -> dict[str, Any]:
+    return _msg("assistant", content, sender_id="assistant", timestamp=ts)
+
+
+def _memcell_rows(tmp_path: Path) -> list[sqlite3.Row]:
+    db = tmp_path / ".index" / "sqlite" / "system.db"
+    if not db.is_file():
+        return []
+    conn = sqlite3.connect(db)
+    conn.row_factory = sqlite3.Row
+    try:
+        return list(conn.execute("SELECT * FROM memcell ORDER BY timestamp"))
+    finally:
+        conn.close()
+
+
+@pytest_asyncio.fixture
+async def memorize_env(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> AsyncIterator[Callable[..., Any]]:
+    """Same shape as the chat-baseline fixture; ``mode`` defaults to ``agent``."""
+    monkeypatch.setattr(
+        MemoryRoot, "default", classmethod(lambda cls: MemoryRoot(root=tmp_path))
+    )
+    (tmp_path / ".index" / "sqlite").mkdir(parents=True, exist_ok=True)
+
+    svc = importlib.import_module("everos.service.memorize")
+    af_mod = importlib.import_module("everos.memory.strategies.extract_atomic_facts")
+    fs_mod = importlib.import_module("everos.memory.strategies.extract_foresight")
+    ac_mod = importlib.import_module("everos.memory.strategies.extract_agent_case")
+    client_mod = importlib.import_module("everos.component.llm.client")
+
+    for attr in (
+        "_episode_writer",
+        "_prompt_loader",
+        "_user_pipeline",
+        "_agent_pipeline",
+        "_ome_engine",
+    ):
+        monkeypatch.setattr(svc, attr, None, raising=False)
+    monkeypatch.setattr(client_mod, "_llm_client", None, raising=False)
+    monkeypatch.setattr(af_mod, "_writer", None, raising=False)
+    monkeypatch.setattr(fs_mod, "_writer", None, raising=False)
+
+    started: dict[str, Any] = {"engine": None}
+
+    async def _setup(*, mode: str = "agent", fake_llm: FakeLLMClient) -> None:
+        monkeypatch.setenv("EVEROS_MEMORIZE__MODE", mode)
+        monkeypatch.setenv("EVEROS_LLM__API_KEY", "fake-key")
+        monkeypatch.setenv("EVEROS_LLM__BASE_URL", "https://fake.example.com")
+
+        from everos.config import load_settings
+
+        load_settings.cache_clear()
+
+        monkeypatch.setattr(client_mod, "_llm_client", fake_llm)
+
+        from everos.infra.persistence.sqlite import dispose_engine, get_engine
+
+        db_engine = get_engine()
+        async with db_engine.begin() as conn:
+            await conn.run_sync(SQLModel.metadata.create_all)
+        started["dispose"] = dispose_engine
+
+        # Silence OME strategies so agent_case / atomic / foresight don't
+        # try real extraction logic during these tests.
+        noop = AsyncMock(return_value=[])
+        for mod in (af_mod, fs_mod, ac_mod):
+            extractor_attr = next(
+                (n for n in dir(mod) if n.endswith("Extractor")), None
+            )
+            if extractor_attr:
+                monkeypatch.setattr(
+                    mod,
+                    extractor_attr,
+                    lambda *a, **k: type("M", (), {"aextract": noop})(),
+                )
+
+        engine = svc._get_engine()
+        await engine.start()
+        started["engine"] = engine
+
+    yield _setup
+
+    if started.get("engine") is not None:
+        await started["engine"].stop()
+    if started.get("dispose") is not None:
+        await started["dispose"]()
+
+
+# ── Tests ────────────────────────────────────────────────────────────
+
+
+async def test_agent_mode_two_user_assistant_msgs(
+    tmp_path: Path, memorize_env: Callable[..., Any]
+) -> None:
+    """Agent mode happy path: one cell, both user + agent pipelines fire."""
+    fake = _make_fake_llm(boundary_responses=[[]])
+    await memorize_env(mode="agent", fake_llm=fake)
+
+    result = await memorize(
+        {
+            "session_id": "test_agent_basic",
+            "messages": [
+                _user("hello", 1_700_000_000_000),
+                _assistant("hi there", 1_700_000_001_000),
+            ],
+        },
+        is_final=True,
+    )
+    assert isinstance(result, MemorizeResult)
+    assert result.status == "extracted"
+
+    rows = _memcell_rows(tmp_path)
+    assert len(rows) == 1
+    assert rows[0]["raw_type"] == "AgentTrajectory"
+
+
+async def test_agent_mode_preserves_tool_items(
+    tmp_path: Path, memorize_env: Callable[..., Any]
+) -> None:
+    """Agent mode keeps ``role=tool`` rows inside the cell (chat mode drops them)."""
+    fake = _make_fake_llm(boundary_responses=[[]])
+    await memorize_env(mode="agent", fake_llm=fake)
+
+    payload = {
+        "session_id": "test_agent_tools",
+        "messages": [
+            _user("debug this", 1_700_000_000_000),
+            _msg(
+                "assistant",
+                "calling tool",
+                timestamp=1_700_000_001_000,
+                tool_calls=[
+                    {
+                        "id": "c1",
+                        "type": "function",
+                        "function": {"name": "x", "arguments": "{}"},
+                    }
+                ],
+            ),
+            _msg(
+                "tool",
+                "result",
+                sender_id="tool",
+                timestamp=1_700_000_002_000,
+                tool_call_id="c1",
+            ),
+            _assistant("here's the answer", 1_700_000_003_000),
+        ],
+    }
+    result = await memorize(payload, is_final=True)
+    assert result.status == "extracted"
+
+    rows = _memcell_rows(tmp_path)
+    assert len(rows) == 1
+    ids = json.loads(rows[0]["message_ids_json"])
+    # All four preserved in agent mode (chat mode would have 2).
+    assert len(ids) == 4
+
+
+async def test_agent_mode_dispatch_no_double_insert(
+    tmp_path: Path, memorize_env: Callable[..., Any]
+) -> None:
+    """Dual pipeline dispatch must not double-insert the memcell row."""
+    fake = _make_fake_llm(boundary_responses=[[]])
+    await memorize_env(mode="agent", fake_llm=fake)
+
+    await memorize(
+        {
+            "session_id": "test_agent_dispatch",
+            "messages": [
+                _user("u1", 1_700_000_000_000),
+                _assistant("a1", 1_700_000_001_000),
+                _user("u2", 1_700_000_002_000),
+                _assistant("a2", 1_700_000_003_000),
+            ],
+        },
+        is_final=True,
+    )
+
+    rows = _memcell_rows(tmp_path)
+    assert len(rows) == 1  # boundary stage owns the ledger
+    payload = json.loads(rows[0]["payload_json"])
+    assert len(payload["items"]) == 4