Files
EverOS/tests/integration/test_memorize_agent_mode.py
Elliot Chen 518b8eca85 chore: initialize EverOS 1.0.0
md-first memory extraction framework for AI agents.

Markdown is the single source of truth; SQLite holds state and LanceDB
provides the rebuildable vector + BM25 + scalar index. The codebase follows
a single-direction DDD layering (entrypoints -> service -> memory -> infra,
with component / core / config cross-cutting) enforced by import-linter.

Engineering surface:
- Coding conventions in .claude/rules/ (path-scoped) and workflows in
  .claude/skills/ (/commit, /new-branch, /pr).
- GitHub Actions CI runs make lint + test + integration; pre-commit mirrors
  the gates locally (ruff, hygiene hooks, gitlint commit-msg).
- Commit messages follow Conventional Commits, enforced by gitlint.
- make lint also enforces datetime two-zone discipline and OpenAPI drift.
2026-06-06 07:33:17 +08:00

269 lines
8.8 KiB
Python

"""Agent-mode memorize integration tests.
Covers the agent branches that ``test_memorize_integration.py`` skips:
- :mod:`service.memorize` agent dispatch (asyncio.gather of user + agent
pipelines)
- :mod:`service._boundary` agent-mode detection via
:class:`everalgo.agent_memory.AgentBoundaryDetector`
- :mod:`memory.extract.pipeline.agent_memory.AgentMemoryPipeline` end-to-end
Self-contained: the chat-baseline file keeps its fixture local, so we
copy the minimum scaffolding rather than refactor it into a shared
conftest.
"""
from __future__ import annotations
import importlib
import json
import sqlite3
from collections.abc import AsyncIterator, Callable
from pathlib import Path
from typing import Any
from unittest.mock import AsyncMock
import pytest
import pytest_asyncio
from everalgo.llm.types import ChatMessage as LLMChatMessage
from everalgo.llm.types import ChatResponse
from everalgo.testing.fake_llm import FakeLLMClient
from sqlmodel import SQLModel
from everos.core.persistence import MemoryRoot
from everos.service.memorize import MemorizeResult, memorize
def _boundary_response(boundaries: list[int]) -> str:
return json.dumps(
{"reasoning": "test", "boundaries": boundaries, "should_wait": False}
)
def _make_fake_llm(boundary_responses: list[list[int]] | None = None) -> FakeLLMClient:
queue: list[list[int]] = list(boundary_responses or [])
def handler(messages: list[LLMChatMessage], **_: Any) -> ChatResponse:
prompt = messages[0].content
if "boundaries" in prompt.lower() or "memcell" in prompt.lower():
cuts = queue.pop(0) if queue else []
return ChatResponse(content=_boundary_response(cuts), model="fake")
return ChatResponse(
content=json.dumps({"title": "T", "content": "B"}), model="fake"
)
return FakeLLMClient(handler=handler)
def _msg(
role: str,
content: str,
*,
sender_id: str = "u_alice",
timestamp: int = 1_700_000_000_000,
tool_calls: list[dict] | None = None,
tool_call_id: str | None = None,
) -> dict[str, Any]:
out: dict[str, Any] = {
"sender_id": sender_id,
"role": role,
"content": content,
"timestamp": timestamp,
}
if tool_calls is not None:
out["tool_calls"] = tool_calls
if tool_call_id is not None:
out["tool_call_id"] = tool_call_id
return out
def _user(content: str, ts: int, *, sender: str = "u_alice") -> dict[str, Any]:
return _msg("user", content, sender_id=sender, timestamp=ts)
def _assistant(content: str, ts: int) -> dict[str, Any]:
return _msg("assistant", content, sender_id="assistant", timestamp=ts)
def _memcell_rows(tmp_path: Path) -> list[sqlite3.Row]:
db = tmp_path / ".index" / "sqlite" / "system.db"
if not db.is_file():
return []
conn = sqlite3.connect(db)
conn.row_factory = sqlite3.Row
try:
return list(conn.execute("SELECT * FROM memcell ORDER BY timestamp"))
finally:
conn.close()
@pytest_asyncio.fixture
async def memorize_env(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> AsyncIterator[Callable[..., Any]]:
"""Same shape as the chat-baseline fixture; ``mode`` defaults to ``agent``."""
monkeypatch.setattr(
MemoryRoot, "default", classmethod(lambda cls: MemoryRoot(root=tmp_path))
)
(tmp_path / ".index" / "sqlite").mkdir(parents=True, exist_ok=True)
svc = importlib.import_module("everos.service.memorize")
af_mod = importlib.import_module("everos.memory.strategies.extract_atomic_facts")
fs_mod = importlib.import_module("everos.memory.strategies.extract_foresight")
ac_mod = importlib.import_module("everos.memory.strategies.extract_agent_case")
client_mod = importlib.import_module("everos.component.llm.client")
for attr in (
"_episode_writer",
"_prompt_loader",
"_user_pipeline",
"_agent_pipeline",
"_ome_engine",
):
monkeypatch.setattr(svc, attr, None, raising=False)
monkeypatch.setattr(client_mod, "_llm_client", None, raising=False)
monkeypatch.setattr(af_mod, "_writer", None, raising=False)
monkeypatch.setattr(fs_mod, "_writer", None, raising=False)
started: dict[str, Any] = {"engine": None}
async def _setup(*, mode: str = "agent", fake_llm: FakeLLMClient) -> None:
monkeypatch.setenv("EVEROS_MEMORIZE__MODE", mode)
monkeypatch.setenv("EVEROS_LLM__API_KEY", "fake-key")
monkeypatch.setenv("EVEROS_LLM__BASE_URL", "https://fake.example.com")
from everos.config import load_settings
load_settings.cache_clear()
monkeypatch.setattr(client_mod, "_llm_client", fake_llm)
from everos.infra.persistence.sqlite import dispose_engine, get_engine
db_engine = get_engine()
async with db_engine.begin() as conn:
await conn.run_sync(SQLModel.metadata.create_all)
started["dispose"] = dispose_engine
# Silence OME strategies so agent_case / atomic / foresight don't
# try real extraction logic during these tests.
noop = AsyncMock(return_value=[])
for mod in (af_mod, fs_mod, ac_mod):
extractor_attr = next(
(n for n in dir(mod) if n.endswith("Extractor")), None
)
if extractor_attr:
monkeypatch.setattr(
mod,
extractor_attr,
lambda *a, **k: type("M", (), {"aextract": noop})(),
)
engine = svc._get_engine()
await engine.start()
started["engine"] = engine
yield _setup
if started.get("engine") is not None:
await started["engine"].stop()
if started.get("dispose") is not None:
await started["dispose"]()
# ── Tests ────────────────────────────────────────────────────────────
async def test_agent_mode_two_user_assistant_msgs(
tmp_path: Path, memorize_env: Callable[..., Any]
) -> None:
"""Agent mode happy path: one cell, both user + agent pipelines fire."""
fake = _make_fake_llm(boundary_responses=[[]])
await memorize_env(mode="agent", fake_llm=fake)
result = await memorize(
{
"session_id": "test_agent_basic",
"messages": [
_user("hello", 1_700_000_000_000),
_assistant("hi there", 1_700_000_001_000),
],
},
is_final=True,
)
assert isinstance(result, MemorizeResult)
assert result.status == "extracted"
rows = _memcell_rows(tmp_path)
assert len(rows) == 1
assert rows[0]["raw_type"] == "AgentTrajectory"
async def test_agent_mode_preserves_tool_items(
tmp_path: Path, memorize_env: Callable[..., Any]
) -> None:
"""Agent mode keeps ``role=tool`` rows inside the cell (chat mode drops them)."""
fake = _make_fake_llm(boundary_responses=[[]])
await memorize_env(mode="agent", fake_llm=fake)
payload = {
"session_id": "test_agent_tools",
"messages": [
_user("debug this", 1_700_000_000_000),
_msg(
"assistant",
"calling tool",
timestamp=1_700_000_001_000,
tool_calls=[
{
"id": "c1",
"type": "function",
"function": {"name": "x", "arguments": "{}"},
}
],
),
_msg(
"tool",
"result",
sender_id="tool",
timestamp=1_700_000_002_000,
tool_call_id="c1",
),
_assistant("here's the answer", 1_700_000_003_000),
],
}
result = await memorize(payload, is_final=True)
assert result.status == "extracted"
rows = _memcell_rows(tmp_path)
assert len(rows) == 1
ids = json.loads(rows[0]["message_ids_json"])
# All four preserved in agent mode (chat mode would have 2).
assert len(ids) == 4
async def test_agent_mode_dispatch_no_double_insert(
tmp_path: Path, memorize_env: Callable[..., Any]
) -> None:
"""Dual pipeline dispatch must not double-insert the memcell row."""
fake = _make_fake_llm(boundary_responses=[[]])
await memorize_env(mode="agent", fake_llm=fake)
await memorize(
{
"session_id": "test_agent_dispatch",
"messages": [
_user("u1", 1_700_000_000_000),
_assistant("a1", 1_700_000_001_000),
_user("u2", 1_700_000_002_000),
_assistant("a2", 1_700_000_003_000),
],
},
is_final=True,
)
rows = _memcell_rows(tmp_path)
assert len(rows) == 1 # boundary stage owns the ledger
payload = json.loads(rows[0]["payload_json"])
assert len(payload["items"]) == 4