md-first memory extraction framework for AI agents. Markdown is the single source of truth; SQLite holds state and LanceDB provides the rebuildable vector + BM25 + scalar index. The codebase follows a single-direction DDD layering (entrypoints -> service -> memory -> infra, with component / core / config cross-cutting) enforced by import-linter. Engineering surface: - Coding conventions in .claude/rules/ (path-scoped) and workflows in .claude/skills/ (/commit, /new-branch, /pr). - GitHub Actions CI runs make lint + test + integration; pre-commit mirrors the gates locally (ruff, hygiene hooks, gitlint commit-msg). - Commit messages follow Conventional Commits, enforced by gitlint. - make lint also enforces datetime two-zone discipline and OpenAPI drift.
269 lines
8.8 KiB
Python
269 lines
8.8 KiB
Python
"""Agent-mode memorize integration tests.
|
|
|
|
Covers the agent branches that ``test_memorize_integration.py`` skips:
|
|
|
|
- :mod:`service.memorize` agent dispatch (asyncio.gather of user + agent
|
|
pipelines)
|
|
- :mod:`service._boundary` agent-mode detection via
|
|
:class:`everalgo.agent_memory.AgentBoundaryDetector`
|
|
- :mod:`memory.extract.pipeline.agent_memory.AgentMemoryPipeline` end-to-end
|
|
|
|
Self-contained: the chat-baseline file keeps its fixture local, so we
|
|
copy the minimum scaffolding rather than refactor it into a shared
|
|
conftest.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import importlib
|
|
import json
|
|
import sqlite3
|
|
from collections.abc import AsyncIterator, Callable
|
|
from pathlib import Path
|
|
from typing import Any
|
|
from unittest.mock import AsyncMock
|
|
|
|
import pytest
|
|
import pytest_asyncio
|
|
from everalgo.llm.types import ChatMessage as LLMChatMessage
|
|
from everalgo.llm.types import ChatResponse
|
|
from everalgo.testing.fake_llm import FakeLLMClient
|
|
from sqlmodel import SQLModel
|
|
|
|
from everos.core.persistence import MemoryRoot
|
|
from everos.service.memorize import MemorizeResult, memorize
|
|
|
|
|
|
def _boundary_response(boundaries: list[int]) -> str:
|
|
return json.dumps(
|
|
{"reasoning": "test", "boundaries": boundaries, "should_wait": False}
|
|
)
|
|
|
|
|
|
def _make_fake_llm(boundary_responses: list[list[int]] | None = None) -> FakeLLMClient:
|
|
queue: list[list[int]] = list(boundary_responses or [])
|
|
|
|
def handler(messages: list[LLMChatMessage], **_: Any) -> ChatResponse:
|
|
prompt = messages[0].content
|
|
if "boundaries" in prompt.lower() or "memcell" in prompt.lower():
|
|
cuts = queue.pop(0) if queue else []
|
|
return ChatResponse(content=_boundary_response(cuts), model="fake")
|
|
return ChatResponse(
|
|
content=json.dumps({"title": "T", "content": "B"}), model="fake"
|
|
)
|
|
|
|
return FakeLLMClient(handler=handler)
|
|
|
|
|
|
def _msg(
|
|
role: str,
|
|
content: str,
|
|
*,
|
|
sender_id: str = "u_alice",
|
|
timestamp: int = 1_700_000_000_000,
|
|
tool_calls: list[dict] | None = None,
|
|
tool_call_id: str | None = None,
|
|
) -> dict[str, Any]:
|
|
out: dict[str, Any] = {
|
|
"sender_id": sender_id,
|
|
"role": role,
|
|
"content": content,
|
|
"timestamp": timestamp,
|
|
}
|
|
if tool_calls is not None:
|
|
out["tool_calls"] = tool_calls
|
|
if tool_call_id is not None:
|
|
out["tool_call_id"] = tool_call_id
|
|
return out
|
|
|
|
|
|
def _user(content: str, ts: int, *, sender: str = "u_alice") -> dict[str, Any]:
|
|
return _msg("user", content, sender_id=sender, timestamp=ts)
|
|
|
|
|
|
def _assistant(content: str, ts: int) -> dict[str, Any]:
|
|
return _msg("assistant", content, sender_id="assistant", timestamp=ts)
|
|
|
|
|
|
def _memcell_rows(tmp_path: Path) -> list[sqlite3.Row]:
|
|
db = tmp_path / ".index" / "sqlite" / "system.db"
|
|
if not db.is_file():
|
|
return []
|
|
conn = sqlite3.connect(db)
|
|
conn.row_factory = sqlite3.Row
|
|
try:
|
|
return list(conn.execute("SELECT * FROM memcell ORDER BY timestamp"))
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
@pytest_asyncio.fixture
|
|
async def memorize_env(
|
|
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
|
|
) -> AsyncIterator[Callable[..., Any]]:
|
|
"""Same shape as the chat-baseline fixture; ``mode`` defaults to ``agent``."""
|
|
monkeypatch.setattr(
|
|
MemoryRoot, "default", classmethod(lambda cls: MemoryRoot(root=tmp_path))
|
|
)
|
|
(tmp_path / ".index" / "sqlite").mkdir(parents=True, exist_ok=True)
|
|
|
|
svc = importlib.import_module("everos.service.memorize")
|
|
af_mod = importlib.import_module("everos.memory.strategies.extract_atomic_facts")
|
|
fs_mod = importlib.import_module("everos.memory.strategies.extract_foresight")
|
|
ac_mod = importlib.import_module("everos.memory.strategies.extract_agent_case")
|
|
client_mod = importlib.import_module("everos.component.llm.client")
|
|
|
|
for attr in (
|
|
"_episode_writer",
|
|
"_prompt_loader",
|
|
"_user_pipeline",
|
|
"_agent_pipeline",
|
|
"_ome_engine",
|
|
):
|
|
monkeypatch.setattr(svc, attr, None, raising=False)
|
|
monkeypatch.setattr(client_mod, "_llm_client", None, raising=False)
|
|
monkeypatch.setattr(af_mod, "_writer", None, raising=False)
|
|
monkeypatch.setattr(fs_mod, "_writer", None, raising=False)
|
|
|
|
started: dict[str, Any] = {"engine": None}
|
|
|
|
async def _setup(*, mode: str = "agent", fake_llm: FakeLLMClient) -> None:
|
|
monkeypatch.setenv("EVEROS_MEMORIZE__MODE", mode)
|
|
monkeypatch.setenv("EVEROS_LLM__API_KEY", "fake-key")
|
|
monkeypatch.setenv("EVEROS_LLM__BASE_URL", "https://fake.example.com")
|
|
|
|
from everos.config import load_settings
|
|
|
|
load_settings.cache_clear()
|
|
|
|
monkeypatch.setattr(client_mod, "_llm_client", fake_llm)
|
|
|
|
from everos.infra.persistence.sqlite import dispose_engine, get_engine
|
|
|
|
db_engine = get_engine()
|
|
async with db_engine.begin() as conn:
|
|
await conn.run_sync(SQLModel.metadata.create_all)
|
|
started["dispose"] = dispose_engine
|
|
|
|
# Silence OME strategies so agent_case / atomic / foresight don't
|
|
# try real extraction logic during these tests.
|
|
noop = AsyncMock(return_value=[])
|
|
for mod in (af_mod, fs_mod, ac_mod):
|
|
extractor_attr = next(
|
|
(n for n in dir(mod) if n.endswith("Extractor")), None
|
|
)
|
|
if extractor_attr:
|
|
monkeypatch.setattr(
|
|
mod,
|
|
extractor_attr,
|
|
lambda *a, **k: type("M", (), {"aextract": noop})(),
|
|
)
|
|
|
|
engine = svc._get_engine()
|
|
await engine.start()
|
|
started["engine"] = engine
|
|
|
|
yield _setup
|
|
|
|
if started.get("engine") is not None:
|
|
await started["engine"].stop()
|
|
if started.get("dispose") is not None:
|
|
await started["dispose"]()
|
|
|
|
|
|
# ── Tests ────────────────────────────────────────────────────────────
|
|
|
|
|
|
async def test_agent_mode_two_user_assistant_msgs(
|
|
tmp_path: Path, memorize_env: Callable[..., Any]
|
|
) -> None:
|
|
"""Agent mode happy path: one cell, both user + agent pipelines fire."""
|
|
fake = _make_fake_llm(boundary_responses=[[]])
|
|
await memorize_env(mode="agent", fake_llm=fake)
|
|
|
|
result = await memorize(
|
|
{
|
|
"session_id": "test_agent_basic",
|
|
"messages": [
|
|
_user("hello", 1_700_000_000_000),
|
|
_assistant("hi there", 1_700_000_001_000),
|
|
],
|
|
},
|
|
is_final=True,
|
|
)
|
|
assert isinstance(result, MemorizeResult)
|
|
assert result.status == "extracted"
|
|
|
|
rows = _memcell_rows(tmp_path)
|
|
assert len(rows) == 1
|
|
assert rows[0]["raw_type"] == "AgentTrajectory"
|
|
|
|
|
|
async def test_agent_mode_preserves_tool_items(
|
|
tmp_path: Path, memorize_env: Callable[..., Any]
|
|
) -> None:
|
|
"""Agent mode keeps ``role=tool`` rows inside the cell (chat mode drops them)."""
|
|
fake = _make_fake_llm(boundary_responses=[[]])
|
|
await memorize_env(mode="agent", fake_llm=fake)
|
|
|
|
payload = {
|
|
"session_id": "test_agent_tools",
|
|
"messages": [
|
|
_user("debug this", 1_700_000_000_000),
|
|
_msg(
|
|
"assistant",
|
|
"calling tool",
|
|
timestamp=1_700_000_001_000,
|
|
tool_calls=[
|
|
{
|
|
"id": "c1",
|
|
"type": "function",
|
|
"function": {"name": "x", "arguments": "{}"},
|
|
}
|
|
],
|
|
),
|
|
_msg(
|
|
"tool",
|
|
"result",
|
|
sender_id="tool",
|
|
timestamp=1_700_000_002_000,
|
|
tool_call_id="c1",
|
|
),
|
|
_assistant("here's the answer", 1_700_000_003_000),
|
|
],
|
|
}
|
|
result = await memorize(payload, is_final=True)
|
|
assert result.status == "extracted"
|
|
|
|
rows = _memcell_rows(tmp_path)
|
|
assert len(rows) == 1
|
|
ids = json.loads(rows[0]["message_ids_json"])
|
|
# All four preserved in agent mode (chat mode would have 2).
|
|
assert len(ids) == 4
|
|
|
|
|
|
async def test_agent_mode_dispatch_no_double_insert(
|
|
tmp_path: Path, memorize_env: Callable[..., Any]
|
|
) -> None:
|
|
"""Dual pipeline dispatch must not double-insert the memcell row."""
|
|
fake = _make_fake_llm(boundary_responses=[[]])
|
|
await memorize_env(mode="agent", fake_llm=fake)
|
|
|
|
await memorize(
|
|
{
|
|
"session_id": "test_agent_dispatch",
|
|
"messages": [
|
|
_user("u1", 1_700_000_000_000),
|
|
_assistant("a1", 1_700_000_001_000),
|
|
_user("u2", 1_700_000_002_000),
|
|
_assistant("a2", 1_700_000_003_000),
|
|
],
|
|
},
|
|
is_final=True,
|
|
)
|
|
|
|
rows = _memcell_rows(tmp_path)
|
|
assert len(rows) == 1 # boundary stage owns the ledger
|
|
payload = json.loads(rows[0]["payload_json"])
|
|
assert len(payload["items"]) == 4
|