EverOS/tests/integration/test_memorize_agent_mode.py

"""Agent-mode memorize integration tests.

Covers the agent branches that ``test_memorize_integration.py`` skips:

- :mod:`service.memorize` agent dispatch (asyncio.gather of user + agent
  pipelines)
- :mod:`service._boundary` agent-mode detection via
  :class:`everalgo.agent_memory.AgentBoundaryDetector`
- :mod:`memory.extract.pipeline.agent_memory.AgentMemoryPipeline` end-to-end

Self-contained: the chat-baseline file keeps its fixture local, so we
copy the minimum scaffolding rather than refactor it into a shared
conftest.
"""

from __future__ import annotations

import importlib
import json
import sqlite3
from collections.abc import AsyncIterator, Callable
from pathlib import Path
from typing import Any
from unittest.mock import AsyncMock

import pytest
import pytest_asyncio
from everalgo.llm.types import ChatMessage as LLMChatMessage
from everalgo.llm.types import ChatResponse
from everalgo.testing.fake_llm import FakeLLMClient
from sqlmodel import SQLModel

from everos.core.persistence import MemoryRoot
from everos.service.memorize import MemorizeResult, memorize


def _boundary_response(boundaries: list[int]) -> str:
    return json.dumps(
        {"reasoning": "test", "boundaries": boundaries, "should_wait": False}
    )


def _make_fake_llm(boundary_responses: list[list[int]] | None = None) -> FakeLLMClient:
    queue: list[list[int]] = list(boundary_responses or [])

    def handler(messages: list[LLMChatMessage], **_: Any) -> ChatResponse:
        prompt = messages[0].content
        if "boundaries" in prompt.lower() or "memcell" in prompt.lower():
            cuts = queue.pop(0) if queue else []
            return ChatResponse(content=_boundary_response(cuts), model="fake")
        return ChatResponse(
            content=json.dumps({"title": "T", "content": "B"}), model="fake"
        )

    return FakeLLMClient(handler=handler)


def _msg(
    role: str,
    content: str,
    *,
    sender_id: str = "u_alice",
    timestamp: int = 1_700_000_000_000,
    tool_calls: list[dict] | None = None,
    tool_call_id: str | None = None,
) -> dict[str, Any]:
    out: dict[str, Any] = {
        "sender_id": sender_id,
        "role": role,
        "content": content,
        "timestamp": timestamp,
    }
    if tool_calls is not None:
        out["tool_calls"] = tool_calls
    if tool_call_id is not None:
        out["tool_call_id"] = tool_call_id
    return out


def _user(content: str, ts: int, *, sender: str = "u_alice") -> dict[str, Any]:
    return _msg("user", content, sender_id=sender, timestamp=ts)


def _assistant(content: str, ts: int) -> dict[str, Any]:
    return _msg("assistant", content, sender_id="assistant", timestamp=ts)


def _memcell_rows(tmp_path: Path) -> list[sqlite3.Row]:
    db = tmp_path / ".index" / "sqlite" / "system.db"
    if not db.is_file():
        return []
    conn = sqlite3.connect(db)
    conn.row_factory = sqlite3.Row
    try:
        return list(conn.execute("SELECT * FROM memcell ORDER BY timestamp"))
    finally:
        conn.close()


@pytest_asyncio.fixture
async def memorize_env(
    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> AsyncIterator[Callable[..., Any]]:
    """Same shape as the chat-baseline fixture; ``mode`` defaults to ``agent``."""
    monkeypatch.setattr(
        MemoryRoot, "default", classmethod(lambda cls: MemoryRoot(root=tmp_path))
    )
    (tmp_path / ".index" / "sqlite").mkdir(parents=True, exist_ok=True)

    svc = importlib.import_module("everos.service.memorize")
    af_mod = importlib.import_module("everos.memory.strategies.extract_atomic_facts")
    fs_mod = importlib.import_module("everos.memory.strategies.extract_foresight")
    ac_mod = importlib.import_module("everos.memory.strategies.extract_agent_case")
    client_mod = importlib.import_module("everos.component.llm.client")

    for attr in (
        "_episode_writer",
        "_prompt_loader",
        "_user_pipeline",
        "_agent_pipeline",
        "_ome_engine",
    ):
        monkeypatch.setattr(svc, attr, None, raising=False)
    monkeypatch.setattr(client_mod, "_llm_client", None, raising=False)
    monkeypatch.setattr(af_mod, "_writer", None, raising=False)
    monkeypatch.setattr(fs_mod, "_writer", None, raising=False)

    started: dict[str, Any] = {"engine": None}

    async def _setup(*, mode: str = "agent", fake_llm: FakeLLMClient) -> None:
        monkeypatch.setenv("EVEROS_MEMORIZE__MODE", mode)
        monkeypatch.setenv("EVEROS_LLM__API_KEY", "fake-key")
        monkeypatch.setenv("EVEROS_LLM__BASE_URL", "https://fake.example.com")

        from everos.config import load_settings

        load_settings.cache_clear()

        monkeypatch.setattr(client_mod, "_llm_client", fake_llm)

        from everos.infra.persistence.sqlite import dispose_engine, get_engine

        db_engine = get_engine()
        async with db_engine.begin() as conn:
            await conn.run_sync(SQLModel.metadata.create_all)
        started["dispose"] = dispose_engine

        # Silence OME strategies so agent_case / atomic / foresight don't
        # try real extraction logic during these tests.
        noop = AsyncMock(return_value=[])
        for mod in (af_mod, fs_mod, ac_mod):
            extractor_attr = next(
                (n for n in dir(mod) if n.endswith("Extractor")), None
            )
            if extractor_attr:
                monkeypatch.setattr(
                    mod,
                    extractor_attr,
                    lambda *a, **k: type("M", (), {"aextract": noop})(),
                )

        engine = svc._get_engine()
        await engine.start()
        started["engine"] = engine

    yield _setup

    if started.get("engine") is not None:
        await started["engine"].stop()
    if started.get("dispose") is not None:
        await started["dispose"]()


# ── Tests ────────────────────────────────────────────────────────────


async def test_agent_mode_two_user_assistant_msgs(
    tmp_path: Path, memorize_env: Callable[..., Any]
) -> None:
    """Agent mode happy path: one cell, both user + agent pipelines fire."""
    fake = _make_fake_llm(boundary_responses=[[]])
    await memorize_env(mode="agent", fake_llm=fake)

    result = await memorize(
        {
            "session_id": "test_agent_basic",
            "messages": [
                _user("hello", 1_700_000_000_000),
                _assistant("hi there", 1_700_000_001_000),
            ],
        },
        is_final=True,
    )
    assert isinstance(result, MemorizeResult)
    assert result.status == "extracted"

    rows = _memcell_rows(tmp_path)
    assert len(rows) == 1
    assert rows[0]["raw_type"] == "AgentTrajectory"


async def test_agent_mode_preserves_tool_items(
    tmp_path: Path, memorize_env: Callable[..., Any]
) -> None:
    """Agent mode keeps ``role=tool`` rows inside the cell (chat mode drops them)."""
    fake = _make_fake_llm(boundary_responses=[[]])
    await memorize_env(mode="agent", fake_llm=fake)

    payload = {
        "session_id": "test_agent_tools",
        "messages": [
            _user("debug this", 1_700_000_000_000),
            _msg(
                "assistant",
                "calling tool",
                timestamp=1_700_000_001_000,
                tool_calls=[
                    {
                        "id": "c1",
                        "type": "function",
                        "function": {"name": "x", "arguments": "{}"},
                    }
                ],
            ),
            _msg(
                "tool",
                "result",
                sender_id="tool",
                timestamp=1_700_000_002_000,
                tool_call_id="c1",
            ),
            _assistant("here's the answer", 1_700_000_003_000),
        ],
    }
    result = await memorize(payload, is_final=True)
    assert result.status == "extracted"

    rows = _memcell_rows(tmp_path)
    assert len(rows) == 1
    ids = json.loads(rows[0]["message_ids_json"])
    # All four preserved in agent mode (chat mode would have 2).
    assert len(ids) == 4


async def test_agent_mode_dispatch_no_double_insert(
    tmp_path: Path, memorize_env: Callable[..., Any]
) -> None:
    """Dual pipeline dispatch must not double-insert the memcell row."""
    fake = _make_fake_llm(boundary_responses=[[]])
    await memorize_env(mode="agent", fake_llm=fake)

    await memorize(
        {
            "session_id": "test_agent_dispatch",
            "messages": [
                _user("u1", 1_700_000_000_000),
                _assistant("a1", 1_700_000_001_000),
                _user("u2", 1_700_000_002_000),
                _assistant("a2", 1_700_000_003_000),
            ],
        },
        is_final=True,
    )

    rows = _memcell_rows(tmp_path)
    assert len(rows) == 1  # boundary stage owns the ledger
    payload = json.loads(rows[0]["payload_json"])
    assert len(payload["items"]) == 4