fix(service): enhance message filtering to drop empty chat messages while retaining tool requests
Some checks failed
CI / lint (push) Has been cancelled
CI / unit tests (push) Has been cancelled
CI / integration tests (push) Has been cancelled
CI / package build (push) Has been cancelled
Commit lint / pull request title (push) Has been cancelled
Commit lint / commit messages (push) Has been cancelled
Docs / links (push) Has been cancelled
Some checks failed
CI / lint (push) Has been cancelled
CI / unit tests (push) Has been cancelled
CI / integration tests (push) Has been cancelled
CI / package build (push) Has been cancelled
Commit lint / pull request title (push) Has been cancelled
Commit lint / commit messages (push) Has been cancelled
Docs / links (push) Has been cancelled
This commit is contained in:
@ -2,11 +2,18 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from everos.config import load_settings
|
||||
from everos.memory.extract.ingest.multimodal import (
|
||||
coerce_items,
|
||||
derive_text,
|
||||
normalise_content,
|
||||
)
|
||||
from everos.memory.extract.ingest.service import process
|
||||
|
||||
|
||||
def test_coerce_str_to_text_item() -> None:
|
||||
@ -46,3 +53,85 @@ def test_normalise_content_text_only_unchanged() -> None:
|
||||
assert items == [{"type": "text", "text": "hello"}]
|
||||
assert text == "hello"
|
||||
assert non_text == 0
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _clear_settings_cache():
|
||||
load_settings.cache_clear()
|
||||
yield
|
||||
load_settings.cache_clear()
|
||||
|
||||
|
||||
async def test_process_renders_md_text_without_multimodal_parser(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
import everos.memory.extract.ingest.service as ingest_service
|
||||
|
||||
monkeypatch.setattr(
|
||||
ingest_service,
|
||||
"require_multimodal",
|
||||
lambda: (_ for _ in ()).throw(AssertionError("parser should not run")),
|
||||
)
|
||||
result = await process(
|
||||
{
|
||||
"session_id": "s_md_text",
|
||||
"messages": [
|
||||
{
|
||||
"sender_id": "u1",
|
||||
"role": "user",
|
||||
"timestamp": 1_700_000_000_000,
|
||||
"content": [{"type": "md", "text": "# Deploy\nUse nginx."}],
|
||||
}
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
assert result.messages[0].text == "# Deploy\nUse nginx."
|
||||
assert result.messages[0].content_items[0]["type"] == "md"
|
||||
assert result.messages[0].content_items[0]["text"] == "# Deploy\nUse nginx."
|
||||
assert result.unparsed_non_text_count == 0
|
||||
|
||||
|
||||
async def test_process_reads_md_file_uri_as_utf8_text(tmp_path: Path) -> None:
|
||||
doc = tmp_path / "guide.md"
|
||||
doc.write_text("# 部署\n配置域名。", encoding="utf-8")
|
||||
|
||||
result = await process(
|
||||
{
|
||||
"session_id": "s_md_uri",
|
||||
"messages": [
|
||||
{
|
||||
"sender_id": "u1",
|
||||
"role": "user",
|
||||
"timestamp": 1_700_000_000_000,
|
||||
"content": [
|
||||
{"type": "md", "uri": f"file://{doc}", "name": "guide.md"}
|
||||
],
|
||||
}
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
assert result.messages[0].text == "# 部署\n配置域名。"
|
||||
assert result.messages[0].content_items[0]["text"] == "# 部署\n配置域名。"
|
||||
|
||||
|
||||
async def test_process_decodes_md_base64_as_utf8_text() -> None:
|
||||
encoded = base64.b64encode("## Notes\n记住配置。".encode()).decode("ascii")
|
||||
|
||||
result = await process(
|
||||
{
|
||||
"session_id": "s_md_base64",
|
||||
"messages": [
|
||||
{
|
||||
"sender_id": "u1",
|
||||
"role": "user",
|
||||
"timestamp": 1_700_000_000_000,
|
||||
"content": [{"type": "md", "base64": encoded, "ext": "md"}],
|
||||
}
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
assert result.messages[0].text == "## Notes\n记住配置。"
|
||||
assert result.messages[0].content_items[0]["text"] == "## Notes\n记住配置。"
|
||||
|
||||
Reference in New Issue
Block a user