chore: initialize EverOS 1.0.0
md-first memory extraction framework for AI agents. Markdown is the single source of truth; SQLite holds state and LanceDB provides the rebuildable vector + BM25 + scalar index. The codebase follows a single-direction DDD layering (entrypoints -> service -> memory -> infra, with component / core / config cross-cutting) enforced by import-linter. Engineering surface: - Coding conventions in .claude/rules/ (path-scoped) and workflows in .claude/skills/ (/commit, /new-branch, /pr). - GitHub Actions CI runs make lint + test + integration; pre-commit mirrors the gates locally (ruff, hygiene hooks, gitlint commit-msg). - Commit messages follow Conventional Commits, enforced by gitlint. - make lint also enforces datetime two-zone discipline and OpenAPI drift.
This commit is contained in:
268
tests/integration/test_memorize_agent_mode.py
Normal file
268
tests/integration/test_memorize_agent_mode.py
Normal file
@ -0,0 +1,268 @@
|
||||
"""Agent-mode memorize integration tests.
|
||||
|
||||
Covers the agent branches that ``test_memorize_integration.py`` skips:
|
||||
|
||||
- :mod:`service.memorize` agent dispatch (asyncio.gather of user + agent
|
||||
pipelines)
|
||||
- :mod:`service._boundary` agent-mode detection via
|
||||
:class:`everalgo.agent_memory.AgentBoundaryDetector`
|
||||
- :mod:`memory.extract.pipeline.agent_memory.AgentMemoryPipeline` end-to-end
|
||||
|
||||
Self-contained: the chat-baseline file keeps its fixture local, so we
|
||||
copy the minimum scaffolding rather than refactor it into a shared
|
||||
conftest.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib
|
||||
import json
|
||||
import sqlite3
|
||||
from collections.abc import AsyncIterator, Callable
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
from everalgo.llm.types import ChatMessage as LLMChatMessage
|
||||
from everalgo.llm.types import ChatResponse
|
||||
from everalgo.testing.fake_llm import FakeLLMClient
|
||||
from sqlmodel import SQLModel
|
||||
|
||||
from everos.core.persistence import MemoryRoot
|
||||
from everos.service.memorize import MemorizeResult, memorize
|
||||
|
||||
|
||||
def _boundary_response(boundaries: list[int]) -> str:
|
||||
return json.dumps(
|
||||
{"reasoning": "test", "boundaries": boundaries, "should_wait": False}
|
||||
)
|
||||
|
||||
|
||||
def _make_fake_llm(boundary_responses: list[list[int]] | None = None) -> FakeLLMClient:
|
||||
queue: list[list[int]] = list(boundary_responses or [])
|
||||
|
||||
def handler(messages: list[LLMChatMessage], **_: Any) -> ChatResponse:
|
||||
prompt = messages[0].content
|
||||
if "boundaries" in prompt.lower() or "memcell" in prompt.lower():
|
||||
cuts = queue.pop(0) if queue else []
|
||||
return ChatResponse(content=_boundary_response(cuts), model="fake")
|
||||
return ChatResponse(
|
||||
content=json.dumps({"title": "T", "content": "B"}), model="fake"
|
||||
)
|
||||
|
||||
return FakeLLMClient(handler=handler)
|
||||
|
||||
|
||||
def _msg(
|
||||
role: str,
|
||||
content: str,
|
||||
*,
|
||||
sender_id: str = "u_alice",
|
||||
timestamp: int = 1_700_000_000_000,
|
||||
tool_calls: list[dict] | None = None,
|
||||
tool_call_id: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
out: dict[str, Any] = {
|
||||
"sender_id": sender_id,
|
||||
"role": role,
|
||||
"content": content,
|
||||
"timestamp": timestamp,
|
||||
}
|
||||
if tool_calls is not None:
|
||||
out["tool_calls"] = tool_calls
|
||||
if tool_call_id is not None:
|
||||
out["tool_call_id"] = tool_call_id
|
||||
return out
|
||||
|
||||
|
||||
def _user(content: str, ts: int, *, sender: str = "u_alice") -> dict[str, Any]:
|
||||
return _msg("user", content, sender_id=sender, timestamp=ts)
|
||||
|
||||
|
||||
def _assistant(content: str, ts: int) -> dict[str, Any]:
|
||||
return _msg("assistant", content, sender_id="assistant", timestamp=ts)
|
||||
|
||||
|
||||
def _memcell_rows(tmp_path: Path) -> list[sqlite3.Row]:
|
||||
db = tmp_path / ".index" / "sqlite" / "system.db"
|
||||
if not db.is_file():
|
||||
return []
|
||||
conn = sqlite3.connect(db)
|
||||
conn.row_factory = sqlite3.Row
|
||||
try:
|
||||
return list(conn.execute("SELECT * FROM memcell ORDER BY timestamp"))
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def memorize_env(
|
||||
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> AsyncIterator[Callable[..., Any]]:
|
||||
"""Same shape as the chat-baseline fixture; ``mode`` defaults to ``agent``."""
|
||||
monkeypatch.setattr(
|
||||
MemoryRoot, "default", classmethod(lambda cls: MemoryRoot(root=tmp_path))
|
||||
)
|
||||
(tmp_path / ".index" / "sqlite").mkdir(parents=True, exist_ok=True)
|
||||
|
||||
svc = importlib.import_module("everos.service.memorize")
|
||||
af_mod = importlib.import_module("everos.memory.strategies.extract_atomic_facts")
|
||||
fs_mod = importlib.import_module("everos.memory.strategies.extract_foresight")
|
||||
ac_mod = importlib.import_module("everos.memory.strategies.extract_agent_case")
|
||||
client_mod = importlib.import_module("everos.component.llm.client")
|
||||
|
||||
for attr in (
|
||||
"_episode_writer",
|
||||
"_prompt_loader",
|
||||
"_user_pipeline",
|
||||
"_agent_pipeline",
|
||||
"_ome_engine",
|
||||
):
|
||||
monkeypatch.setattr(svc, attr, None, raising=False)
|
||||
monkeypatch.setattr(client_mod, "_llm_client", None, raising=False)
|
||||
monkeypatch.setattr(af_mod, "_writer", None, raising=False)
|
||||
monkeypatch.setattr(fs_mod, "_writer", None, raising=False)
|
||||
|
||||
started: dict[str, Any] = {"engine": None}
|
||||
|
||||
async def _setup(*, mode: str = "agent", fake_llm: FakeLLMClient) -> None:
|
||||
monkeypatch.setenv("EVEROS_MEMORIZE__MODE", mode)
|
||||
monkeypatch.setenv("EVEROS_LLM__API_KEY", "fake-key")
|
||||
monkeypatch.setenv("EVEROS_LLM__BASE_URL", "https://fake.example.com")
|
||||
|
||||
from everos.config import load_settings
|
||||
|
||||
load_settings.cache_clear()
|
||||
|
||||
monkeypatch.setattr(client_mod, "_llm_client", fake_llm)
|
||||
|
||||
from everos.infra.persistence.sqlite import dispose_engine, get_engine
|
||||
|
||||
db_engine = get_engine()
|
||||
async with db_engine.begin() as conn:
|
||||
await conn.run_sync(SQLModel.metadata.create_all)
|
||||
started["dispose"] = dispose_engine
|
||||
|
||||
# Silence OME strategies so agent_case / atomic / foresight don't
|
||||
# try real extraction logic during these tests.
|
||||
noop = AsyncMock(return_value=[])
|
||||
for mod in (af_mod, fs_mod, ac_mod):
|
||||
extractor_attr = next(
|
||||
(n for n in dir(mod) if n.endswith("Extractor")), None
|
||||
)
|
||||
if extractor_attr:
|
||||
monkeypatch.setattr(
|
||||
mod,
|
||||
extractor_attr,
|
||||
lambda *a, **k: type("M", (), {"aextract": noop})(),
|
||||
)
|
||||
|
||||
engine = svc._get_engine()
|
||||
await engine.start()
|
||||
started["engine"] = engine
|
||||
|
||||
yield _setup
|
||||
|
||||
if started.get("engine") is not None:
|
||||
await started["engine"].stop()
|
||||
if started.get("dispose") is not None:
|
||||
await started["dispose"]()
|
||||
|
||||
|
||||
# ── Tests ────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_agent_mode_two_user_assistant_msgs(
|
||||
tmp_path: Path, memorize_env: Callable[..., Any]
|
||||
) -> None:
|
||||
"""Agent mode happy path: one cell, both user + agent pipelines fire."""
|
||||
fake = _make_fake_llm(boundary_responses=[[]])
|
||||
await memorize_env(mode="agent", fake_llm=fake)
|
||||
|
||||
result = await memorize(
|
||||
{
|
||||
"session_id": "test_agent_basic",
|
||||
"messages": [
|
||||
_user("hello", 1_700_000_000_000),
|
||||
_assistant("hi there", 1_700_000_001_000),
|
||||
],
|
||||
},
|
||||
is_final=True,
|
||||
)
|
||||
assert isinstance(result, MemorizeResult)
|
||||
assert result.status == "extracted"
|
||||
|
||||
rows = _memcell_rows(tmp_path)
|
||||
assert len(rows) == 1
|
||||
assert rows[0]["raw_type"] == "AgentTrajectory"
|
||||
|
||||
|
||||
async def test_agent_mode_preserves_tool_items(
|
||||
tmp_path: Path, memorize_env: Callable[..., Any]
|
||||
) -> None:
|
||||
"""Agent mode keeps ``role=tool`` rows inside the cell (chat mode drops them)."""
|
||||
fake = _make_fake_llm(boundary_responses=[[]])
|
||||
await memorize_env(mode="agent", fake_llm=fake)
|
||||
|
||||
payload = {
|
||||
"session_id": "test_agent_tools",
|
||||
"messages": [
|
||||
_user("debug this", 1_700_000_000_000),
|
||||
_msg(
|
||||
"assistant",
|
||||
"calling tool",
|
||||
timestamp=1_700_000_001_000,
|
||||
tool_calls=[
|
||||
{
|
||||
"id": "c1",
|
||||
"type": "function",
|
||||
"function": {"name": "x", "arguments": "{}"},
|
||||
}
|
||||
],
|
||||
),
|
||||
_msg(
|
||||
"tool",
|
||||
"result",
|
||||
sender_id="tool",
|
||||
timestamp=1_700_000_002_000,
|
||||
tool_call_id="c1",
|
||||
),
|
||||
_assistant("here's the answer", 1_700_000_003_000),
|
||||
],
|
||||
}
|
||||
result = await memorize(payload, is_final=True)
|
||||
assert result.status == "extracted"
|
||||
|
||||
rows = _memcell_rows(tmp_path)
|
||||
assert len(rows) == 1
|
||||
ids = json.loads(rows[0]["message_ids_json"])
|
||||
# All four preserved in agent mode (chat mode would have 2).
|
||||
assert len(ids) == 4
|
||||
|
||||
|
||||
async def test_agent_mode_dispatch_no_double_insert(
|
||||
tmp_path: Path, memorize_env: Callable[..., Any]
|
||||
) -> None:
|
||||
"""Dual pipeline dispatch must not double-insert the memcell row."""
|
||||
fake = _make_fake_llm(boundary_responses=[[]])
|
||||
await memorize_env(mode="agent", fake_llm=fake)
|
||||
|
||||
await memorize(
|
||||
{
|
||||
"session_id": "test_agent_dispatch",
|
||||
"messages": [
|
||||
_user("u1", 1_700_000_000_000),
|
||||
_assistant("a1", 1_700_000_001_000),
|
||||
_user("u2", 1_700_000_002_000),
|
||||
_assistant("a2", 1_700_000_003_000),
|
||||
],
|
||||
},
|
||||
is_final=True,
|
||||
)
|
||||
|
||||
rows = _memcell_rows(tmp_path)
|
||||
assert len(rows) == 1 # boundary stage owns the ledger
|
||||
payload = json.loads(rows[0]["payload_json"])
|
||||
assert len(payload["items"]) == 4
|
||||
Reference in New Issue
Block a user