feat: extend memory search and attachment mapping

This commit is contained in:
2026-06-15 17:25:44 +08:00
parent 15462a95cb
commit e5cd87789f
9 changed files with 1194 additions and 54 deletions

View File

@ -21,6 +21,7 @@ class FakeBackendClient:
def __init__(
self,
search_results: list[dict[str, Any]] | None = None,
search_data: dict[str, list[dict[str, Any]]] | None = None,
health_error: Exception | None = None,
add_failures: int = 0,
flush_failures: int = 0,
@ -29,6 +30,7 @@ class FakeBackendClient:
self.flush_calls: list[dict[str, str]] = []
self.search_calls: list[dict[str, Any]] = []
self.search_results = search_results or []
self.search_data = search_data
self.health_error = health_error
self.add_failures = add_failures
self.flush_failures = flush_failures
@ -56,7 +58,8 @@ class FakeBackendClient:
async def search_memory(self, payload: dict[str, Any]) -> dict[str, Any]:
self.search_calls.append(payload)
return {"request_id": "search", "data": {"episodes": self.search_results}}
data = self.search_data or {"episodes": self.search_results}
return {"request_id": "search", "data": data}
async def health_check(self) -> dict[str, Any]:
if self.health_error is not None:
@ -115,6 +118,75 @@ def create_test_resource(
)
def test_attachment_repository_deduplicates_and_lists_by_user_session(
repo: MemoryRepository,
) -> None:
values = {
"user_id": "u_123",
"app_id": "default",
"project_id": "default",
"session_id": "chat:c_1",
"resource_id": None,
"content_type": "image",
"name": "picture.png",
"internal_uri": "file:///private/picture.png",
"source": "memory_add_uri",
"sha256": None,
}
first = repo.create_attachment(**values)
second = repo.create_attachment(**values)
assert second["id"] == first["id"]
assert repo.list_attachments_for_session("u_123", "chat:c_1") == [first]
assert repo.list_attachments_for_session("other", "chat:c_1") == []
def test_soft_delete_resource_also_soft_deletes_attachments(
repo: MemoryRepository,
) -> None:
create_test_resource(repo, resource_id="r_1", user_id="u_123")
repo.create_attachment(
user_id="u_123",
app_id="default",
project_id="default",
session_id="resource:u_123:r_1",
resource_id="r_1",
content_type="text",
name="a.txt",
internal_uri="file:///private/a.txt",
source="resource_upload",
sha256="sha-r_1",
)
repo.soft_delete_resource("r_1", "u_123")
assert repo.list_attachments_for_session("u_123", "resource:u_123:r_1") == []
def test_soft_delete_resource_does_not_delete_other_users_attachments(
repo: MemoryRepository,
) -> None:
create_test_resource(repo, resource_id="r_1", user_id="alice")
repo.create_attachment(
user_id="alice",
app_id="default",
project_id="default",
session_id="resource:alice:r_1",
resource_id="r_1",
content_type="text",
name="a.txt",
internal_uri="file:///private/a.txt",
source="resource_upload",
sha256="sha-r_1",
)
repo.soft_delete_resource("r_1", "bob")
attachments = repo.list_attachments_for_session("alice", "resource:alice:r_1")
assert len(attachments) == 1
async def create_user(client: httpx.AsyncClient, user_id: str = "u_123") -> str:
response = await client.post("/users", json={"user_id": user_id})
assert response.status_code == 200, response.text
@ -343,6 +415,31 @@ async def test_upload_binary_resource_sends_base64_content_to_backend(
assert "uri" not in content
@pytest.mark.asyncio
async def test_upload_resource_creates_attachment_mapping(
config: GatewayConfig,
repo: MemoryRepository,
) -> None:
backend = FakeBackendClient()
async with app_client(config, backend) as client:
user_key = await create_user(client)
response = await client.post(
"/resources",
data={"user_id": "u_123", "user_key": user_key},
files={"file": ("picture.png", b"png bytes", "image/png")},
)
assert response.status_code == 200, response.text
session_id = response.json()["session_id"]
attachments = repo.list_attachments_for_session("u_123", session_id)
assert len(attachments) == 1
assert attachments[0]["resource_id"] == response.json()["resource_id"]
assert attachments[0]["content_type"] == "image"
assert attachments[0]["name"] == "picture.png"
assert attachments[0]["internal_uri"].startswith("file://")
assert attachments[0]["source"] == "resource_upload"
@pytest.mark.asyncio
async def test_upload_resource_uses_current_timestamp(
config: GatewayConfig,
@ -607,6 +704,129 @@ async def test_add_memory_forwards_multimodal_payload_to_backend(
]
@pytest.mark.asyncio
async def test_add_memory_creates_uri_attachment_mapping(
config: GatewayConfig,
repo: MemoryRepository,
) -> None:
backend = FakeBackendClient()
uri = "file:///home/tom/memory-gateway/tests/simple-multimodal-image.png"
async with app_client(config, backend) as client:
user_key = await create_user(client)
response = await client.post(
"/memories/add",
json={
"user_id": "u_123",
"user_key": user_key,
"session_id": "chat:c_uri",
"messages": [
{
"sender_id": "u_123",
"role": "user",
"timestamp": 1234567890123,
"content": [
{
"type": "image",
"uri": uri,
"name": "simple-multimodal-image.png",
"ext": "png",
}
],
}
],
},
)
assert response.status_code == 200, response.text
attachments = repo.list_attachments_for_session("u_123", "chat:c_uri")
assert [(item["name"], item["internal_uri"], item["source"]) for item in attachments] == [
("simple-multimodal-image.png", uri, "memory_add_uri")
]
assert backend.add_calls[0]["messages"][0]["content"][0]["uri"] == uri
@pytest.mark.asyncio
async def test_add_memory_materializes_base64_attachment(
config: GatewayConfig,
repo: MemoryRepository,
) -> None:
backend = FakeBackendClient()
encoded = base64.b64encode(b"wav bytes").decode("ascii")
async with app_client(config, backend) as client:
user_key = await create_user(client)
response = await client.post(
"/memories/add",
json={
"user_id": "u_123",
"user_key": user_key,
"session_id": "chat:c_base64",
"messages": [
{
"sender_id": "u_123",
"role": "user",
"timestamp": 1234567890123,
"content": [
{
"type": "audio",
"base64": encoded,
"name": "tone.wav",
"ext": "wav",
}
],
}
],
},
)
assert response.status_code == 200, response.text
attachments = repo.list_attachments_for_session("u_123", "chat:c_base64")
assert len(attachments) == 1
attachment = attachments[0]
assert attachment["name"] == "tone.wav"
assert attachment["source"] == "memory_add_base64"
path = Path(attachment["internal_uri"].removeprefix("file://"))
assert path.read_bytes() == b"wav bytes"
assert backend.add_calls[0]["messages"][0]["content"][0]["base64"] == encoded
@pytest.mark.asyncio
async def test_add_memory_deduplicates_retried_base64_attachment(
config: GatewayConfig,
repo: MemoryRepository,
) -> None:
backend = FakeBackendClient()
encoded = base64.b64encode(b"same bytes").decode("ascii")
payload: dict[str, Any] = {
"user_id": "u_123",
"session_id": "chat:c_retry",
"messages": [
{
"sender_id": "u_123",
"role": "user",
"timestamp": 1234567890123,
"content": [
{
"type": "image",
"base64": encoded,
"name": "same.png",
"ext": "png",
}
],
}
],
}
async with app_client(config, backend) as client:
user_key = await create_user(client)
payload["user_key"] = user_key
first = await client.post("/memories/add", json=payload)
second = await client.post("/memories/add", json=payload)
assert first.status_code == 200, first.text
assert second.status_code == 200, second.text
attachments = repo.list_attachments_for_session("u_123", "chat:c_retry")
assert len(attachments) == 1
@pytest.mark.asyncio
async def test_flush_memory_forwards_request_to_backend(
config: GatewayConfig,
@ -639,6 +859,313 @@ async def test_flush_memory_forwards_request_to_backend(
]
@pytest.mark.asyncio
async def test_search_forwards_default_upstream_options(
config: GatewayConfig,
) -> None:
backend = FakeBackendClient()
async with app_client(config, backend) as client:
user_key = await create_user(client)
response = await client.post(
"/memories/search",
json={
"user_id": "u_123",
"user_key": user_key,
"query": "hello",
"scope": ["all_user_memory"],
},
)
assert response.status_code == 200, response.text
assert backend.search_calls == [
{
"user_id": "u_123",
"query": "hello",
"method": "hybrid",
"top_k": 8,
"include_profile": True,
"enable_llm_rerank": True,
"app_id": "default",
"project_id": "default",
}
]
@pytest.mark.asyncio
async def test_search_forwards_all_upstream_options(
config: GatewayConfig,
) -> None:
backend = FakeBackendClient()
async with app_client(config, backend) as client:
user_key = await create_user(client)
response = await client.post(
"/memories/search",
json={
"user_id": "u_123",
"user_key": user_key,
"agent_id": "agent_456",
"query": "hello",
"scope": ["all_user_memory"],
"method": "keyword",
"top_k": -1,
"radius": 0.4,
"include_profile": False,
"enable_llm_rerank": False,
"app_id": "app_1",
"project_id": "project_1",
},
)
assert response.status_code == 200, response.text
assert backend.search_calls == [
{
"agent_id": "agent_456",
"query": "hello",
"method": "keyword",
"top_k": -1,
"radius": 0.4,
"include_profile": False,
"enable_llm_rerank": False,
"app_id": "app_1",
"project_id": "project_1",
}
]
@pytest.mark.asyncio
@pytest.mark.parametrize(
("field", "value"),
[
("method", "invalid"),
("radius", 1.1),
("top_k", 0),
],
)
async def test_search_rejects_invalid_upstream_options(
config: GatewayConfig,
field: str,
value: Any,
) -> None:
backend = FakeBackendClient()
async with app_client(config, backend) as client:
user_key = await create_user(client)
payload = {
"user_id": "u_123",
"user_key": user_key,
"query": "hello",
"scope": ["all_user_memory"],
field: value,
}
response = await client.post("/memories/search", json=payload)
assert response.status_code == 422, response.text
assert backend.search_calls == []
@pytest.mark.asyncio
async def test_search_combines_custom_and_scope_filters(
config: GatewayConfig,
repo: MemoryRepository,
) -> None:
create_test_resource(repo, resource_id="r_1", user_id="u_123")
backend = FakeBackendClient()
custom_filters = {"OR": [{"type": "Conversation"}, {"sender_ids": "u_123"}]}
async with app_client(config, backend) as client:
user_key = await create_user(client)
response = await client.post(
"/memories/search",
json={
"user_id": "u_123",
"user_key": user_key,
"query": "hello",
"scope": ["resources"],
"filters": custom_filters,
},
)
assert response.status_code == 200, response.text
assert backend.search_calls[0]["filters"] == {
"AND": [
custom_filters,
{"session_id": {"in": ["resource:u_123:r_1"]}},
]
}
@pytest.mark.asyncio
async def test_search_labels_all_memory_types(
config: GatewayConfig,
) -> None:
backend = FakeBackendClient(
search_data={
"episodes": [{"id": "ep_1", "session_id": "chat:c_1", "episode": "e"}],
"profiles": [{"id": "profile_1", "profile_data": {"name": "Tom"}}],
"agent_cases": [
{"id": "case_1", "session_id": "chat:c_1", "task_intent": "case"}
],
"agent_skills": [{"id": "skill_1", "content": "skill"}],
"unprocessed_messages": [
{"id": "message_1", "session_id": "chat:c_1", "content": "pending"}
],
}
)
async with app_client(config, backend) as client:
user_key = await create_user(client)
response = await client.post(
"/memories/search",
json={
"user_id": "u_123",
"user_key": user_key,
"query": "hello",
"scope": ["all_user_memory"],
},
)
assert response.status_code == 200, response.text
assert [item["memory_type"] for item in response.json()["results"]] == [
"episode",
"profile",
"agent_case",
"agent_skill",
"unprocessed_message",
]
@pytest.mark.asyncio
async def test_search_returns_attachment_when_raw_contains_filename(
config: GatewayConfig,
repo: MemoryRepository,
) -> None:
repo.create_attachment(
user_id="u_123",
app_id="default",
project_id="default",
session_id="chat:c_1",
resource_id=None,
content_type="image",
name="Picture.PNG",
internal_uri="file:///private/Picture.PNG",
source="memory_add_uri",
sha256=None,
)
backend = FakeBackendClient(
[{"id": "ep_1", "session_id": "chat:c_1", "episode": "Saw picture.png"}]
)
async with app_client(config, backend) as client:
user_key = await create_user(client)
response = await client.post(
"/memories/search",
json={
"user_id": "u_123",
"user_key": user_key,
"query": "picture",
"scope": ["all_user_memory"],
},
)
assert response.status_code == 200, response.text
assert response.json()["results"][0]["attachments"] == [
{
"type": "image",
"name": "Picture.PNG",
"internal_uri": "file:///private/Picture.PNG",
}
]
@pytest.mark.asyncio
async def test_search_omits_unmentioned_and_base64_only_attachments(
config: GatewayConfig,
repo: MemoryRepository,
) -> None:
repo.create_attachment(
user_id="u_123",
app_id="default",
project_id="default",
session_id="chat:c_1",
resource_id=None,
content_type="audio",
name="tone.wav",
internal_uri="file:///private/tone.wav",
source="memory_add_base64",
sha256=None,
)
backend = FakeBackendClient(
search_data={
"unprocessed_messages": [
{
"id": "message_1",
"session_id": "chat:c_1",
"content": [{"base64": "encoded-prefix-tone.wav"}],
}
]
}
)
async with app_client(config, backend) as client:
user_key = await create_user(client)
response = await client.post(
"/memories/search",
json={
"user_id": "u_123",
"user_key": user_key,
"query": "audio",
"scope": ["all_user_memory"],
},
)
assert response.status_code == 200, response.text
assert response.json()["results"][0]["attachments"] == []
@pytest.mark.asyncio
async def test_search_attachment_mapping_is_user_isolated(
config: GatewayConfig,
repo: MemoryRepository,
) -> None:
for user_id, name in (("alice", "alice.png"), ("bob", "bob.png")):
repo.create_attachment(
user_id=user_id,
app_id="default",
project_id="default",
session_id="chat:shared",
resource_id=None,
content_type="image",
name=name,
internal_uri=f"file:///private/{name}",
source="memory_add_uri",
sha256=None,
)
backend = FakeBackendClient(
[
{
"id": "ep_1",
"session_id": "chat:shared",
"episode": "alice.png and bob.png",
}
]
)
async with app_client(config, backend) as client:
user_key = await create_user(client, "alice")
response = await client.post(
"/memories/search",
json={
"user_id": "alice",
"user_key": user_key,
"query": "images",
"scope": ["all_user_memory"],
},
)
assert response.status_code == 200, response.text
assert response.json()["results"][0]["attachments"] == [
{
"type": "image",
"name": "alice.png",
"internal_uri": "file:///private/alice.png",
}
]
@pytest.mark.asyncio
async def test_deleted_resource_is_excluded_from_resource_scope_search(
config: GatewayConfig,