fix(service): enhance message filtering to drop empty chat messages while retaining tool requests
Some checks failed
CI / lint (push) Has been cancelled
CI / unit tests (push) Has been cancelled
CI / integration tests (push) Has been cancelled
CI / package build (push) Has been cancelled
Commit lint / pull request title (push) Has been cancelled
Commit lint / commit messages (push) Has been cancelled
Docs / links (push) Has been cancelled

This commit is contained in:
2026-06-16 16:18:24 +08:00
parent 0910affc78
commit b243018aff
14 changed files with 248 additions and 38 deletions

View File

@ -0,0 +1,32 @@
"""Validation paths for ``POST /api/v1/memory/add`` request DTOs."""
from __future__ import annotations
from everos.entrypoints.api.routes.memorize import ContentItemDTO, MemorizeAddRequest
def test_add_request_accepts_md_content_item() -> None:
req = MemorizeAddRequest.model_validate(
{
"session_id": "s_md",
"messages": [
{
"sender_id": "u1",
"role": "user",
"timestamp": 1_700_000_000_000,
"content": [
{
"type": "md",
"text": "# Deploy\nUse nginx.",
"name": "deploy.md",
}
],
}
],
}
)
content = req.messages[0].content
assert isinstance(content, list)
assert isinstance(content[0], ContentItemDTO)
assert content[0].type == "md"

View File

@ -2,11 +2,18 @@
from __future__ import annotations
import base64
from pathlib import Path
import pytest
from everos.config import load_settings
from everos.memory.extract.ingest.multimodal import (
coerce_items,
derive_text,
normalise_content,
)
from everos.memory.extract.ingest.service import process
def test_coerce_str_to_text_item() -> None:
@ -46,3 +53,85 @@ def test_normalise_content_text_only_unchanged() -> None:
assert items == [{"type": "text", "text": "hello"}]
assert text == "hello"
assert non_text == 0
@pytest.fixture(autouse=True)
def _clear_settings_cache():
load_settings.cache_clear()
yield
load_settings.cache_clear()
async def test_process_renders_md_text_without_multimodal_parser(
monkeypatch: pytest.MonkeyPatch,
) -> None:
import everos.memory.extract.ingest.service as ingest_service
monkeypatch.setattr(
ingest_service,
"require_multimodal",
lambda: (_ for _ in ()).throw(AssertionError("parser should not run")),
)
result = await process(
{
"session_id": "s_md_text",
"messages": [
{
"sender_id": "u1",
"role": "user",
"timestamp": 1_700_000_000_000,
"content": [{"type": "md", "text": "# Deploy\nUse nginx."}],
}
],
}
)
assert result.messages[0].text == "# Deploy\nUse nginx."
assert result.messages[0].content_items[0]["type"] == "md"
assert result.messages[0].content_items[0]["text"] == "# Deploy\nUse nginx."
assert result.unparsed_non_text_count == 0
async def test_process_reads_md_file_uri_as_utf8_text(tmp_path: Path) -> None:
doc = tmp_path / "guide.md"
doc.write_text("# 部署\n配置域名。", encoding="utf-8")
result = await process(
{
"session_id": "s_md_uri",
"messages": [
{
"sender_id": "u1",
"role": "user",
"timestamp": 1_700_000_000_000,
"content": [
{"type": "md", "uri": f"file://{doc}", "name": "guide.md"}
],
}
],
}
)
assert result.messages[0].text == "# 部署\n配置域名。"
assert result.messages[0].content_items[0]["text"] == "# 部署\n配置域名。"
async def test_process_decodes_md_base64_as_utf8_text() -> None:
encoded = base64.b64encode("## Notes\n记住配置。".encode()).decode("ascii")
result = await process(
{
"session_id": "s_md_base64",
"messages": [
{
"sender_id": "u1",
"role": "user",
"timestamp": 1_700_000_000_000,
"content": [{"type": "md", "base64": encoded, "ext": "md"}],
}
],
}
)
assert result.messages[0].text == "## Notes\n记住配置。"
assert result.messages[0].content_items[0]["text"] == "## Notes\n记住配置。"

View File

@ -18,6 +18,11 @@ def test_has_unparsed_multimodal_false_when_all_text() -> None:
assert availability.has_unparsed_multimodal(items) is False
def test_has_unparsed_multimodal_false_for_md() -> None:
items = [{"type": "md", "text": "# hi"}]
assert availability.has_unparsed_multimodal(items) is False
def test_has_unparsed_multimodal_false_when_already_parsed() -> None:
items = [{"type": "image", "uri": "x", "parsed_content": "ocr"}]
assert availability.has_unparsed_multimodal(items) is False

View File

@ -74,6 +74,26 @@ def test_filter_agent_keeps_everything() -> None:
assert [m.message_id for m in out] == ["m1", "m2"]
def test_filter_drops_empty_plain_chat_messages_but_keeps_tool_requests() -> None:
msgs = [
_msg("m1", "user", text=""),
_msg("m2", "assistant", text=" "),
_msg(
"m3",
"assistant",
text="",
tool_calls=[ToolCall(id="tc1", function={"name": "f", "arguments": "{}"})],
),
_msg("m4", "user", text="ok"),
]
chat_out = _filter_for_mode(msgs, "chat")
agent_out = _filter_for_mode(msgs, "agent")
assert [m.message_id for m in chat_out] == ["m4"]
assert [m.message_id for m in agent_out] == ["m3", "m4"]
# ── _to_conversation_item dispatch ────────────────────────────────────────