chore: initialize EverOS 1.0.0

md-first memory extraction framework for AI agents. Markdown is the single source of truth; SQLite holds state and LanceDB provides the rebuildable vector + BM25 + scalar index. The codebase follows a single-direction DDD layering (entrypoints -> service -> memory -> infra, with component / core / config cross-cutting) enforced by import-linter. Engineering surface: - Coding conventions in .claude/rules/ (path-scoped) and workflows in .claude/skills/ (/commit, /new-branch, /pr). - GitHub Actions CI runs make lint + test + integration; pre-commit mirrors the gates locally (ruff, hygiene hooks, gitlint commit-msg). - Commit messages follow Conventional Commits, enforced by gitlint. - make lint also enforces datetime two-zone discipline and OpenAPI drift.
2026-06-05 22:35:51 +08:00
commit 518b8eca85
636 changed files with 160553 additions and 0 deletions
--- a/tests/unit/test_component/init.py
+++ b/tests/unit/test_component/init.py
--- a/tests/unit/test_component/test_config/init.py
+++ b/tests/unit/test_component/test_config/init.py
--- a/tests/unit/test_component/test_config/test_loader.py
+++ b/tests/unit/test_component/test_config/test_loader.py
@ -0,0 +1,167 @@
+"""Unit tests for YamlConfigLoader."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from everos.component.config import YamlConfigLoader
+
+
+@pytest.fixture
+def config_root(tmp_path: Path) -> Path:
+    """Build a fixture config tree::
+
+    tmp_path/
+      prompt_slots/
+        episode.yaml
+        atomic_fact.yaml
+      custom_dir/
+        alpha.yaml
+    """
+    (tmp_path / "prompt_slots").mkdir()
+    (tmp_path / "prompt_slots" / "episode.yaml").write_text(
+        "template: extract episode\nvariables:\n  memcell: input memcell\n",
+        encoding="utf-8",
+    )
+    (tmp_path / "prompt_slots" / "atomic_fact.yaml").write_text(
+        "template: extract atomic fact\n", encoding="utf-8"
+    )
+    (tmp_path / "custom_dir").mkdir()
+    (tmp_path / "custom_dir" / "alpha.yaml").write_text(
+        "value: alpha\n", encoding="utf-8"
+    )
+    return tmp_path
+
+
+def test_register_default_subdir(config_root: Path) -> None:
+    loader = YamlConfigLoader(root=config_root)
+    loader.register_category("prompt_slots")
+    meta = loader.find("prompt_slots", "episode")
+    assert meta == {
+        "template": "extract episode",
+        "variables": {"memcell": "input memcell"},
+    }
+
+
+def test_register_custom_subdir(config_root: Path) -> None:
+    loader = YamlConfigLoader(root=config_root)
+    loader.register_category("alphas", subdir="custom_dir")
+    meta = loader.find("alphas", "alpha")
+    assert meta == {"value": "alpha"}
+
+
+def test_constructor_categories_dict(config_root: Path) -> None:
+    loader = YamlConfigLoader(
+        root=config_root,
+        categories={"prompt_slots": None, "alphas": "custom_dir"},
+    )
+    assert sorted(loader.categories()) == ["alphas", "prompt_slots"]
+    assert loader.find("alphas", "alpha") == {"value": "alpha"}
+
+
+def test_find_unregistered_category_raises(config_root: Path) -> None:
+    loader = YamlConfigLoader(root=config_root)
+    with pytest.raises(KeyError, match="not registered"):
+        loader.find("ghost", "x")
+
+
+def test_find_missing_file_raises(config_root: Path) -> None:
+    loader = YamlConfigLoader(root=config_root)
+    loader.register_category("prompt_slots")
+    with pytest.raises(FileNotFoundError):
+        loader.find("prompt_slots", "no_such")
+
+
+def test_find_non_mapping_top_level_raises(tmp_path: Path) -> None:
+    (tmp_path / "prompt_slots").mkdir()
+    # Top-level is a list, not a mapping — must be rejected.
+    (tmp_path / "prompt_slots" / "bad.yaml").write_text(
+        "- one\n- two\n", encoding="utf-8"
+    )
+    loader = YamlConfigLoader(root=tmp_path)
+    loader.register_category("prompt_slots")
+    with pytest.raises(TypeError, match="must be a mapping"):
+        loader.find("prompt_slots", "bad")
+
+
+def test_find_empty_file_yields_empty_dict(tmp_path: Path) -> None:
+    (tmp_path / "prompt_slots").mkdir()
+    (tmp_path / "prompt_slots" / "blank.yaml").write_text("", encoding="utf-8")
+    loader = YamlConfigLoader(root=tmp_path)
+    loader.register_category("prompt_slots")
+    assert loader.find("prompt_slots", "blank") == {}
+
+
+def test_list_returns_sorted_stems(config_root: Path) -> None:
+    loader = YamlConfigLoader(root=config_root)
+    loader.register_category("prompt_slots")
+    assert loader.list("prompt_slots") == ["atomic_fact", "episode"]
+
+
+def test_list_unregistered_category_raises(config_root: Path) -> None:
+    loader = YamlConfigLoader(root=config_root)
+    with pytest.raises(KeyError):
+        loader.list("ghost")
+
+
+def test_list_empty_directory(tmp_path: Path) -> None:
+    loader = YamlConfigLoader(root=tmp_path)
+    loader.register_category("nope")
+    assert loader.list("nope") == []  # missing directory → empty
+
+
+def test_cache_returns_same_object(config_root: Path) -> None:
+    loader = YamlConfigLoader(root=config_root)
+    loader.register_category("prompt_slots")
+    a = loader.find("prompt_slots", "episode")
+    b = loader.find("prompt_slots", "episode")
+    assert a is b  # cached, same dict reference
+
+
+def test_refresh_invalidates_cache_and_reloads(config_root: Path) -> None:
+    loader = YamlConfigLoader(root=config_root)
+    loader.register_category("prompt_slots")
+    a = loader.find("prompt_slots", "episode")
+
+    # Modify the file on disk; without refresh the loader still returns
+    # the cached value.
+    (config_root / "prompt_slots" / "episode.yaml").write_text(
+        "template: MODIFIED\n", encoding="utf-8"
+    )
+    cached = loader.find("prompt_slots", "episode")
+    assert cached is a  # still the cached object
+
+    loader.refresh()
+    fresh = loader.find("prompt_slots", "episode")
+    assert fresh is not a
+    assert fresh == {"template": "MODIFIED"}
+
+
+def test_refresh_specific_entry(config_root: Path) -> None:
+    loader = YamlConfigLoader(root=config_root)
+    loader.register_category("prompt_slots")
+    e = loader.find("prompt_slots", "episode")
+    a = loader.find("prompt_slots", "atomic_fact")
+
+    (config_root / "prompt_slots" / "episode.yaml").write_text(
+        "template: NEW\n", encoding="utf-8"
+    )
+    loader.refresh("prompt_slots", "episode")
+
+    assert loader.find("prompt_slots", "episode") != e  # reloaded
+    assert loader.find("prompt_slots", "atomic_fact") is a  # untouched
+
+
+def test_refresh_full_category(config_root: Path) -> None:
+    loader = YamlConfigLoader(
+        root=config_root,
+        categories={"prompt_slots": None, "alphas": "custom_dir"},
+    )
+    loader.find("prompt_slots", "episode")
+    a = loader.find("alphas", "alpha")
+
+    loader.refresh("prompt_slots")
+    # alphas cache survives the prompt_slots refresh
+    assert loader.find("alphas", "alpha") is a
--- a/tests/unit/test_component/test_embedding/init.py
+++ b/tests/unit/test_component/test_embedding/init.py
--- a/tests/unit/test_component/test_embedding/test_factory.py
+++ b/tests/unit/test_component/test_embedding/test_factory.py
@ -0,0 +1,46 @@
+"""``build_embedding_provider`` — settings validation + provider build."""
+
+from __future__ import annotations
+
+import pytest
+from pydantic import SecretStr
+
+from everos.component.embedding import (
+    OpenAIEmbeddingProvider,
+    build_embedding_provider,
+)
+from everos.config.settings import EmbeddingSettings
+
+
+def test_raises_when_model_missing() -> None:
+    s = EmbeddingSettings(model=None, api_key=SecretStr("k"), base_url="https://x")
+    with pytest.raises(ValueError, match="EVEROS_EMBEDDING__MODEL"):
+        build_embedding_provider(s)
+
+
+def test_raises_when_api_key_missing() -> None:
+    s = EmbeddingSettings(model="m", api_key=None, base_url="https://x")
+    with pytest.raises(ValueError, match="EVEROS_EMBEDDING__API_KEY"):
+        build_embedding_provider(s)
+
+
+def test_raises_when_base_url_missing() -> None:
+    s = EmbeddingSettings(model="m", api_key=SecretStr("k"), base_url=None)
+    with pytest.raises(ValueError, match="EVEROS_EMBEDDING__BASE_URL"):
+        build_embedding_provider(s)
+
+
+def test_builds_openai_embedding_provider_with_default_dim() -> None:
+    s = EmbeddingSettings(model="m", api_key=SecretStr("k"), base_url="https://x")
+    p = build_embedding_provider(s)
+    assert isinstance(p, OpenAIEmbeddingProvider)
+
+
+def test_custom_dim_passes_through() -> None:
+    s = EmbeddingSettings(model="m", api_key=SecretStr("k"), base_url="https://x")
+    p = build_embedding_provider(s, dim=512)
+    assert isinstance(p, OpenAIEmbeddingProvider)
+    # Provider stores dim on a private attr; assert via the public output shape
+    # only if straightforward. Skip introspection if attr name differs.
+    if hasattr(p, "_dim"):
+        assert p._dim == 512
--- a/tests/unit/test_component/test_llm/init.py
+++ b/tests/unit/test_component/test_llm/init.py
--- a/tests/unit/test_component/test_llm/test_client.py
+++ b/tests/unit/test_component/test_llm/test_client.py
@ -0,0 +1,64 @@
+"""get_llm_client — raises on missing credentials, caches on success."""
+
+from __future__ import annotations
+
+import importlib
+
+import pytest
+from pydantic import SecretStr
+
+from everos.component.llm import LLMNotConfiguredError
+from everos.config import Settings
+from everos.config.settings import LLMSettings
+
+_client_mod = importlib.import_module("everos.component.llm.client")
+
+
+def _reset_singleton(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.setattr(_client_mod, "_llm_client", None, raising=False)
+
+
+def _patch_settings(
+    monkeypatch: pytest.MonkeyPatch,
+    *,
+    api_key: str | None,
+    base_url: str | None,
+) -> None:
+    """Stub the ``load_settings`` reference bound inside the client module."""
+    cfg = Settings(
+        llm=LLMSettings(
+            model="gpt-4o-mini",
+            api_key=SecretStr(api_key) if api_key is not None else None,
+            base_url=base_url,
+        )
+    )
+    monkeypatch.setattr(_client_mod, "load_settings", lambda: cfg)
+
+
+def test_raises_when_api_key_missing(monkeypatch: pytest.MonkeyPatch) -> None:
+    _reset_singleton(monkeypatch)
+    _patch_settings(monkeypatch, api_key=None, base_url="https://example.test")
+
+    with pytest.raises(LLMNotConfiguredError, match="EVEROS_LLM__API_KEY"):
+        _client_mod.get_llm_client()
+
+
+def test_raises_when_base_url_missing(monkeypatch: pytest.MonkeyPatch) -> None:
+    _reset_singleton(monkeypatch)
+    _patch_settings(monkeypatch, api_key="sk-test", base_url=None)
+
+    with pytest.raises(LLMNotConfiguredError, match="EVEROS_LLM__BASE_URL"):
+        _client_mod.get_llm_client()
+
+
+def test_returns_singleton_when_configured(monkeypatch: pytest.MonkeyPatch) -> None:
+    _reset_singleton(monkeypatch)
+    _patch_settings(monkeypatch, api_key="sk-test", base_url="https://example.test")
+    sentinel = object()
+    monkeypatch.setattr(_client_mod, "build_client", lambda cfg: sentinel)
+
+    first = _client_mod.get_llm_client()
+    second = _client_mod.get_llm_client()
+
+    assert first is sentinel
+    assert first is second
--- a/tests/unit/test_component/test_llm/test_factory.py
+++ b/tests/unit/test_component/test_llm/test_factory.py
@ -0,0 +1,28 @@
+"""``build_llm_provider`` — settings validation + provider build."""
+
+from __future__ import annotations
+
+import pytest
+from pydantic import SecretStr
+
+from everos.component.llm import build_llm_provider
+from everos.component.llm.openai_provider import OpenAIProvider
+from everos.config.settings import LLMSettings
+
+
+def test_raises_when_api_key_missing() -> None:
+    s = LLMSettings(model="m", api_key=None, base_url="https://x")
+    with pytest.raises(ValueError, match="EVEROS_LLM__API_KEY"):
+        build_llm_provider(s)
+
+
+def test_raises_when_base_url_missing() -> None:
+    s = LLMSettings(model="m", api_key=SecretStr("k"), base_url=None)
+    with pytest.raises(ValueError, match="EVEROS_LLM__BASE_URL"):
+        build_llm_provider(s)
+
+
+def test_builds_openai_provider() -> None:
+    s = LLMSettings(model="m", api_key=SecretStr("k"), base_url="https://x")
+    p = build_llm_provider(s)
+    assert isinstance(p, OpenAIProvider)
--- a/tests/unit/test_component/test_rerank/init.py
+++ b/tests/unit/test_component/test_rerank/init.py
--- a/tests/unit/test_component/test_rerank/test_deepinfra_provider.py
+++ b/tests/unit/test_component/test_rerank/test_deepinfra_provider.py
@ -0,0 +1,254 @@
+"""DeepInfra rerank provider — happy path, batching, retries, errors.
+
+httpx is faked via :class:`httpx.MockTransport`; the provider's
+``httpx.AsyncClient(timeout=...)`` ctx manager is monkeypatched to
+return a client wired to the transport.
+"""
+
+from __future__ import annotations
+
+import json
+from collections.abc import Callable
+
+import httpx
+import pytest
+
+from everos.component.rerank import DeepInfraRerankProvider, RerankError
+
+
+def _patch_httpx(
+    monkeypatch: pytest.MonkeyPatch,
+    handler: Callable[[httpx.Request], httpx.Response],
+) -> None:
+    """Make ``httpx.AsyncClient(timeout=...)`` use a MockTransport."""
+    transport = httpx.MockTransport(handler)
+    import everos.component.rerank.deepinfra_provider as mod
+
+    real_cls = httpx.AsyncClient
+
+    def factory(*args: object, **kwargs: object) -> httpx.AsyncClient:
+        kwargs["transport"] = transport
+        return real_cls(*args, **kwargs)  # type: ignore[arg-type]
+
+    monkeypatch.setattr(mod.httpx, "AsyncClient", factory)
+
+
+def _ok_response(scores: list[float]) -> httpx.Response:
+    return httpx.Response(200, json={"scores": [scores]})
+
+
+async def test_empty_documents_short_circuits(monkeypatch: pytest.MonkeyPatch) -> None:
+    calls = 0
+
+    def handler(_req: httpx.Request) -> httpx.Response:
+        nonlocal calls
+        calls += 1
+        return _ok_response([])
+
+    _patch_httpx(monkeypatch, handler)
+    p = DeepInfraRerankProvider(model="m", api_key="k", base_url="https://api/v1")
+    assert await p.rerank("q", []) == []
+    assert calls == 0
+
+
+async def test_scores_sorted_descending(monkeypatch: pytest.MonkeyPatch) -> None:
+    def handler(_req: httpx.Request) -> httpx.Response:
+        return _ok_response([0.1, 0.9, 0.5])
+
+    _patch_httpx(monkeypatch, handler)
+    p = DeepInfraRerankProvider(
+        model="m", api_key="k", base_url="https://api/v1", batch_size=10
+    )
+    results = await p.rerank("q", ["a", "b", "c"])
+    assert [r.index for r in results] == [1, 2, 0]
+    assert results[0].score == pytest.approx(0.9)
+
+
+async def test_batching_merges_chunk_indices(monkeypatch: pytest.MonkeyPatch) -> None:
+    """batch_size=2 with 3 documents → 2 chunks; merged indices respect offset."""
+    seen_bodies: list[list[str]] = []
+
+    def handler(req: httpx.Request) -> httpx.Response:
+        body = json.loads(req.content)
+        seen_bodies.append(body["documents"])
+        # Score by length so we can verify ordering.
+        return _ok_response([float(len(d)) for d in body["documents"]])
+
+    _patch_httpx(monkeypatch, handler)
+    p = DeepInfraRerankProvider(
+        model="m", api_key="k", base_url="https://api/v1", batch_size=2
+    )
+    docs = ["x", "yy", "zzz"]
+    results = await p.rerank("q", docs)
+    assert {len(b) for b in seen_bodies} == {1, 2}
+    # Sorted desc by score = len: "zzz"=3 → idx 2, "yy"=2 → idx 1, "x"=1 → idx 0
+    assert [r.index for r in results] == [2, 1, 0]
+
+
+async def test_url_appends_model(monkeypatch: pytest.MonkeyPatch) -> None:
+    seen_urls: list[str] = []
+
+    def handler(req: httpx.Request) -> httpx.Response:
+        seen_urls.append(str(req.url))
+        return _ok_response([0.5])
+
+    _patch_httpx(monkeypatch, handler)
+    p = DeepInfraRerankProvider(
+        model="Qwen/Q",
+        api_key="k",
+        # Trailing slash should be stripped before appending model path.
+        base_url="https://api.deepinfra.com/v1/inference/",
+    )
+    await p.rerank("q", ["a"])
+    assert seen_urls == ["https://api.deepinfra.com/v1/inference/Qwen/Q"]
+
+
+async def test_4xx_raises_immediately(monkeypatch: pytest.MonkeyPatch) -> None:
+    calls = 0
+
+    def handler(_req: httpx.Request) -> httpx.Response:
+        nonlocal calls
+        calls += 1
+        return httpx.Response(400, text="bad input")
+
+    _patch_httpx(monkeypatch, handler)
+    p = DeepInfraRerankProvider(
+        model="m", api_key="k", base_url="https://api/v1", max_retries=3
+    )
+    with pytest.raises(RerankError, match="HTTP 400"):
+        await p.rerank("q", ["a"])
+    assert calls == 1  # no retry on 4xx
+
+
+async def test_5xx_retries_then_succeeds(monkeypatch: pytest.MonkeyPatch) -> None:
+    state = {"calls": 0}
+
+    def handler(_req: httpx.Request) -> httpx.Response:
+        state["calls"] += 1
+        if state["calls"] < 3:
+            return httpx.Response(503, text="busy")
+        return _ok_response([0.7])
+
+    _patch_httpx(monkeypatch, handler)
+    p = DeepInfraRerankProvider(
+        model="m", api_key="k", base_url="https://api/v1", max_retries=3
+    )
+    results = await p.rerank("q", ["a"])
+    assert state["calls"] == 3
+    assert results[0].score == pytest.approx(0.7)
+
+
+async def test_5xx_exhausts_retries(monkeypatch: pytest.MonkeyPatch) -> None:
+    def handler(_req: httpx.Request) -> httpx.Response:
+        return httpx.Response(500, text="boom")
+
+    _patch_httpx(monkeypatch, handler)
+    p = DeepInfraRerankProvider(
+        model="m", api_key="k", base_url="https://api/v1", max_retries=1
+    )
+    with pytest.raises(RerankError, match="HTTP 500"):
+        await p.rerank("q", ["a"])
+
+
+async def test_429_retries(monkeypatch: pytest.MonkeyPatch) -> None:
+    state = {"calls": 0}
+
+    def handler(_req: httpx.Request) -> httpx.Response:
+        state["calls"] += 1
+        if state["calls"] == 1:
+            return httpx.Response(429, text="slow down")
+        return _ok_response([0.4])
+
+    _patch_httpx(monkeypatch, handler)
+    p = DeepInfraRerankProvider(
+        model="m", api_key="k", base_url="https://api/v1", max_retries=3
+    )
+    results = await p.rerank("q", ["a"])
+    assert state["calls"] == 2
+    assert results[0].score == pytest.approx(0.4)
+
+
+async def test_transport_error_retries_then_fails(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    def handler(_req: httpx.Request) -> httpx.Response:
+        raise httpx.ConnectError("network down")
+
+    _patch_httpx(monkeypatch, handler)
+    p = DeepInfraRerankProvider(
+        model="m", api_key="k", base_url="https://api/v1", max_retries=1
+    )
+    with pytest.raises(RerankError, match="transport failure"):
+        await p.rerank("q", ["a"])
+
+
+async def test_malformed_scores_raises(monkeypatch: pytest.MonkeyPatch) -> None:
+    def handler(_req: httpx.Request) -> httpx.Response:
+        return httpx.Response(200, json={"something_else": []})
+
+    _patch_httpx(monkeypatch, handler)
+    p = DeepInfraRerankProvider(model="m", api_key="k", base_url="https://api/v1")
+    with pytest.raises(RerankError, match="missing scores"):
+        await p.rerank("q", ["a"])
+
+
+async def test_score_length_mismatch_raises(monkeypatch: pytest.MonkeyPatch) -> None:
+    def handler(_req: httpx.Request) -> httpx.Response:
+        return httpx.Response(200, json={"scores": [[0.1, 0.2]]})
+
+    _patch_httpx(monkeypatch, handler)
+    p = DeepInfraRerankProvider(
+        model="m", api_key="k", base_url="https://api/v1", batch_size=10
+    )
+    with pytest.raises(RerankError, match="returned 2 scores, expected 3"):
+        await p.rerank("q", ["a", "b", "c"])
+
+
+async def test_payload_wraps_qwen3_template(monkeypatch: pytest.MonkeyPatch) -> None:
+    """Query + documents are wrapped in the Qwen3-Reranker chat template.
+
+    DeepInfra's inference API scores raw text, so the prompt scaffolding
+    (system frame + ``<Instruct>``/``<Query>``/``<Document>`` markers) must be
+    supplied client-side or the reranker returns uncalibrated scores.
+    """
+    captured: dict[str, list[str]] = {}
+
+    def handler(req: httpx.Request) -> httpx.Response:
+        captured.update(json.loads(req.content))
+        return _ok_response([0.5])
+
+    _patch_httpx(monkeypatch, handler)
+    p = DeepInfraRerankProvider(model="m", api_key="k", base_url="https://api/v1")
+    await p.rerank("what did Alice eat?", ["pasta"], instruction="find facts")
+
+    query_sent = captured["queries"][0]
+    assert query_sent.startswith("<|im_start|>system")
+    assert "<Instruct>: find facts" in query_sent
+    assert "<Query>: what did Alice eat?" in query_sent
+    assert captured["documents"][0].startswith("<Document>: pasta")
+
+
+async def test_default_instruction_when_none(monkeypatch: pytest.MonkeyPatch) -> None:
+    """A ``None`` instruction falls back to the provider's default, not blank."""
+    captured: dict[str, list[str]] = {}
+
+    def handler(req: httpx.Request) -> httpx.Response:
+        captured.update(json.loads(req.content))
+        return _ok_response([0.5])
+
+    _patch_httpx(monkeypatch, handler)
+    p = DeepInfraRerankProvider(model="m", api_key="k", base_url="https://api/v1")
+    await p.rerank("q", ["d"])
+    assert "<Instruct>: Given a question and a passage" in captured["queries"][0]
+
+
+async def test_flat_scores_fallback(monkeypatch: pytest.MonkeyPatch) -> None:
+    """If response is ``{"scores": [s1, s2]}`` (flat), the unwrap still works."""
+
+    def handler(_req: httpx.Request) -> httpx.Response:
+        return httpx.Response(200, json={"scores": [0.3, 0.6]})
+
+    _patch_httpx(monkeypatch, handler)
+    p = DeepInfraRerankProvider(model="m", api_key="k", base_url="https://api/v1")
+    results = await p.rerank("q", ["a", "b"])
+    assert [r.score for r in results] == [0.6, 0.3]
--- a/tests/unit/test_component/test_rerank/test_factory.py
+++ b/tests/unit/test_component/test_rerank/test_factory.py
@ -0,0 +1,67 @@
+"""``build_rerank_provider`` — settings validation + provider routing."""
+
+from __future__ import annotations
+
+import pytest
+from pydantic import SecretStr
+
+from everos.component.rerank import (
+    DeepInfraRerankProvider,
+    VllmRerankProvider,
+    build_rerank_provider,
+)
+from everos.config.settings import RerankSettings
+
+
+def test_raises_when_model_missing() -> None:
+    s = RerankSettings(model=None, api_key=SecretStr("k"), base_url="https://x")
+    with pytest.raises(ValueError, match="EVEROS_RERANK__MODEL"):
+        build_rerank_provider(s)
+
+
+def test_raises_when_base_url_missing() -> None:
+    s = RerankSettings(model="m", api_key=SecretStr("k"), base_url=None)
+    with pytest.raises(ValueError, match="EVEROS_RERANK__BASE_URL"):
+        build_rerank_provider(s)
+
+
+def test_deepinfra_requires_api_key() -> None:
+    s = RerankSettings(
+        provider="deepinfra", model="m", api_key=None, base_url="https://x"
+    )
+    with pytest.raises(ValueError, match="EVEROS_RERANK__API_KEY"):
+        build_rerank_provider(s)
+
+
+def test_deepinfra_builds_provider() -> None:
+    s = RerankSettings(
+        provider="deepinfra",
+        model="m",
+        api_key=SecretStr("k"),
+        base_url="https://api/v1/inference",
+    )
+    p = build_rerank_provider(s)
+    assert isinstance(p, DeepInfraRerankProvider)
+
+
+def test_vllm_accepts_empty_api_key() -> None:
+    """vLLM self-hosted: empty api_key is allowed (no auth header)."""
+    s = RerankSettings(
+        provider="vllm",
+        model="m",
+        api_key=None,
+        base_url="http://localhost:8000/v1",
+    )
+    p = build_rerank_provider(s)
+    assert isinstance(p, VllmRerankProvider)
+
+
+def test_vllm_with_api_key() -> None:
+    s = RerankSettings(
+        provider="vllm",
+        model="m",
+        api_key=SecretStr("k"),
+        base_url="http://localhost:8000/v1",
+    )
+    p = build_rerank_provider(s)
+    assert isinstance(p, VllmRerankProvider)
--- a/tests/unit/test_component/test_rerank/test_vllm_provider.py
+++ b/tests/unit/test_component/test_rerank/test_vllm_provider.py
@ -0,0 +1,187 @@
+"""vLLM rerank provider — auth header conditional, results parsing, retries."""
+
+from __future__ import annotations
+
+from collections.abc import Callable
+
+import httpx
+import pytest
+
+from everos.component.rerank import RerankError, VllmRerankProvider
+
+
+def _patch_httpx(
+    monkeypatch: pytest.MonkeyPatch,
+    handler: Callable[[httpx.Request], httpx.Response],
+) -> None:
+    transport = httpx.MockTransport(handler)
+    import everos.component.rerank.vllm_provider as mod
+
+    real_cls = httpx.AsyncClient
+
+    def factory(*args: object, **kwargs: object) -> httpx.AsyncClient:
+        kwargs["transport"] = transport
+        return real_cls(*args, **kwargs)  # type: ignore[arg-type]
+
+    monkeypatch.setattr(mod.httpx, "AsyncClient", factory)
+
+
+def _ok_response(items: list[dict[str, float | int]]) -> httpx.Response:
+    return httpx.Response(200, json={"results": items})
+
+
+async def test_empty_documents_short_circuits(monkeypatch: pytest.MonkeyPatch) -> None:
+    calls = 0
+
+    def handler(_req: httpx.Request) -> httpx.Response:
+        nonlocal calls
+        calls += 1
+        return _ok_response([])
+
+    _patch_httpx(monkeypatch, handler)
+    p = VllmRerankProvider(model="m", api_key="", base_url="http://x/v1")
+    assert await p.rerank("q", []) == []
+    assert calls == 0
+
+
+async def test_url_and_sort_desc(monkeypatch: pytest.MonkeyPatch) -> None:
+    seen_urls: list[str] = []
+
+    def handler(req: httpx.Request) -> httpx.Response:
+        seen_urls.append(str(req.url))
+        return _ok_response(
+            [
+                {"index": 0, "relevance_score": 0.1},
+                {"index": 1, "relevance_score": 0.9},
+                {"index": 2, "relevance_score": 0.5},
+            ]
+        )
+
+    _patch_httpx(monkeypatch, handler)
+    p = VllmRerankProvider(model="m", api_key="k", base_url="http://localhost:8000/v1/")
+    results = await p.rerank("q", ["a", "b", "c"])
+    # Trailing slash stripped, ``/rerank`` appended.
+    assert seen_urls == ["http://localhost:8000/v1/rerank"]
+    assert [r.index for r in results] == [1, 2, 0]
+
+
+async def test_auth_header_added_when_api_key_set(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    seen_headers: list[dict[str, str]] = []
+
+    def handler(req: httpx.Request) -> httpx.Response:
+        seen_headers.append(dict(req.headers))
+        return _ok_response([{"index": 0, "relevance_score": 0.5}])
+
+    _patch_httpx(monkeypatch, handler)
+    p = VllmRerankProvider(model="m", api_key="sk-abc", base_url="http://x/v1")
+    await p.rerank("q", ["a"])
+    assert seen_headers[0].get("authorization") == "Bearer sk-abc"
+
+
+async def test_auth_header_omitted_when_api_key_empty(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    seen_headers: list[dict[str, str]] = []
+
+    def handler(req: httpx.Request) -> httpx.Response:
+        seen_headers.append(dict(req.headers))
+        return _ok_response([{"index": 0, "relevance_score": 0.5}])
+
+    _patch_httpx(monkeypatch, handler)
+    p = VllmRerankProvider(model="m", api_key="", base_url="http://x/v1")
+    await p.rerank("q", ["a"])
+    assert "authorization" not in seen_headers[0]
+
+
+async def test_batching_offsets_indices(monkeypatch: pytest.MonkeyPatch) -> None:
+    """With batch_size=2 and 3 docs, the second batch's result index 0 becomes 2."""
+
+    def handler(req: httpx.Request) -> httpx.Response:
+        import json
+
+        body = json.loads(req.content)
+        docs = body["documents"]
+        # Each chunk: return per-chunk indices 0..len-1
+        return _ok_response(
+            [{"index": i, "relevance_score": float(i)} for i in range(len(docs))]
+        )
+
+    _patch_httpx(monkeypatch, handler)
+    p = VllmRerankProvider(model="m", api_key="", base_url="http://x/v1", batch_size=2)
+    results = await p.rerank("q", ["a", "b", "c"])
+    # Returned indices should be 0, 1 from chunk 1; 2 from chunk 2.
+    assert sorted(r.index for r in results) == [0, 1, 2]
+
+
+async def test_4xx_raises_immediately(monkeypatch: pytest.MonkeyPatch) -> None:
+    state = {"calls": 0}
+
+    def handler(_req: httpx.Request) -> httpx.Response:
+        state["calls"] += 1
+        return httpx.Response(401, text="unauthorized")
+
+    _patch_httpx(monkeypatch, handler)
+    p = VllmRerankProvider(
+        model="m", api_key="bad", base_url="http://x/v1", max_retries=3
+    )
+    with pytest.raises(RerankError, match="HTTP 401"):
+        await p.rerank("q", ["a"])
+    assert state["calls"] == 1
+
+
+async def test_5xx_retries(monkeypatch: pytest.MonkeyPatch) -> None:
+    state = {"calls": 0}
+
+    def handler(_req: httpx.Request) -> httpx.Response:
+        state["calls"] += 1
+        if state["calls"] < 2:
+            return httpx.Response(502, text="bad gw")
+        return _ok_response([{"index": 0, "relevance_score": 0.42}])
+
+    _patch_httpx(monkeypatch, handler)
+    p = VllmRerankProvider(model="m", api_key="", base_url="http://x/v1", max_retries=3)
+    results = await p.rerank("q", ["a"])
+    assert state["calls"] == 2
+    assert results[0].score == pytest.approx(0.42)
+
+
+async def test_5xx_exhausts_retries(monkeypatch: pytest.MonkeyPatch) -> None:
+    def handler(_req: httpx.Request) -> httpx.Response:
+        return httpx.Response(500, text="boom")
+
+    _patch_httpx(monkeypatch, handler)
+    p = VllmRerankProvider(model="m", api_key="", base_url="http://x/v1", max_retries=1)
+    with pytest.raises(RerankError, match="HTTP 500"):
+        await p.rerank("q", ["a"])
+
+
+async def test_transport_error_exhausts(monkeypatch: pytest.MonkeyPatch) -> None:
+    def handler(_req: httpx.Request) -> httpx.Response:
+        raise httpx.ReadTimeout("timeout")
+
+    _patch_httpx(monkeypatch, handler)
+    p = VllmRerankProvider(model="m", api_key="", base_url="http://x/v1", max_retries=1)
+    with pytest.raises(RerankError, match="transport failure"):
+        await p.rerank("q", ["a"])
+
+
+async def test_malformed_results_missing_key(monkeypatch: pytest.MonkeyPatch) -> None:
+    def handler(_req: httpx.Request) -> httpx.Response:
+        return httpx.Response(200, json={"data": []})
+
+    _patch_httpx(monkeypatch, handler)
+    p = VllmRerankProvider(model="m", api_key="", base_url="http://x/v1")
+    with pytest.raises(RerankError, match="missing results"):
+        await p.rerank("q", ["a"])
+
+
+async def test_malformed_result_entry(monkeypatch: pytest.MonkeyPatch) -> None:
+    def handler(_req: httpx.Request) -> httpx.Response:
+        return httpx.Response(200, json={"results": [{"index": 0}]})
+
+    _patch_httpx(monkeypatch, handler)
+    p = VllmRerankProvider(model="m", api_key="", base_url="http://x/v1")
+    with pytest.raises(RerankError, match="malformed rerank result"):
+        await p.rerank("q", ["a"])
--- a/tests/unit/test_component/test_tokenizer/init.py
+++ b/tests/unit/test_component/test_tokenizer/init.py
--- a/tests/unit/test_component/test_tokenizer/test_jieba.py
+++ b/tests/unit/test_component/test_tokenizer/test_jieba.py
@ -0,0 +1,98 @@
+"""Unit tests for :class:`JiebaTokenizer`.
+
+Verify the contract that callers downstream depend on:
+
+* clean token list (no whitespace, no empty strings),
+* CJK + ASCII pass-through under ``cut_for_search`` segmentation,
+* default stopword + ``min_length=2`` filter applied,
+* batch preserves order.
+
+The tokenizer is symmetric — cascade write side and search query side
+both go through this code path, so changes here change BM25 recall on
+both ends.
+"""
+
+from __future__ import annotations
+
+from everos.component.tokenizer import JiebaTokenizer, build_tokenizer
+
+
+def test_tokenize_returns_list_for_english() -> None:
+    tokens = JiebaTokenizer().tokenize("hello world")
+    assert tokens == ["hello", "world"]
+
+
+def test_tokenize_drops_pure_whitespace() -> None:
+    """Whitespace-only tokens never reach the BM25 column."""
+    tokens = JiebaTokenizer().tokenize("foo   bar")
+    assert all(t.strip() for t in tokens)
+
+
+def test_tokenize_empty_input() -> None:
+    assert JiebaTokenizer().tokenize("") == []
+
+
+def test_tokenize_cjk_keeps_multichar_words() -> None:
+    """``cut_for_search`` keeps multi-character compounds usable by BM25."""
+    tokens = JiebaTokenizer().tokenize("我爱北京天安门")
+    # Single-char tokens (我 / 爱) are filtered by min_length=2 (and 我
+    # is also in the default stopword set). Multi-char compounds survive.
+    assert "我" not in tokens
+    assert "爱" not in tokens
+    assert "北京" in tokens
+    assert any(t in {"天安门", "天安"} for t in tokens)
+
+
+def test_tokenize_drops_default_english_stopwords() -> None:
+    tokens = JiebaTokenizer().tokenize("the quick brown fox")
+    assert "the" not in tokens
+    assert "quick" in tokens
+    assert "brown" in tokens
+    assert "fox" in tokens
+
+
+def test_tokenize_drops_short_tokens_below_min_length() -> None:
+    """Single-char ASCII tokens are dropped by the default ``min_length=2``."""
+    tokens = JiebaTokenizer().tokenize("a quick b run")
+    assert "a" not in tokens
+    assert "b" not in tokens
+    assert "quick" in tokens
+    assert "run" in tokens
+
+
+def test_tokenize_is_case_insensitive() -> None:
+    """Lowercasing is part of the symmetric contract."""
+    tokens = JiebaTokenizer().tokenize("HELLO World")
+    assert tokens == ["hello", "world"]
+
+
+def test_extra_stopwords_extend_defaults() -> None:
+    tk = JiebaTokenizer(extra_stopwords=frozenset({"hello"}))
+    tokens = tk.tokenize("hello world")
+    assert "hello" not in tokens
+    assert "world" in tokens
+
+
+def test_custom_min_token_length_relaxes_filter() -> None:
+    """Lower ``min_length`` lets shorter tokens through.
+
+    Stopword filter still applies — even at ``min_length=1`` the English
+    article ``"a"`` stays filtered because it's in the default stopwords.
+    """
+    tokens = JiebaTokenizer(min_token_length=1).tokenize("a quick b")
+    # 'a' is in the default English stopword set even at min_length=1.
+    assert "a" not in tokens
+    assert "b" in tokens
+    assert "quick" in tokens
+
+
+def test_tokenize_batch_preserves_order() -> None:
+    tk = JiebaTokenizer()
+    out = tk.tokenize_batch(["foo bar", "baz", ""])
+    assert len(out) == 3
+    assert out[2] == []
+
+
+def test_build_tokenizer_returns_jieba_default() -> None:
+    """Factory exposes the same JiebaTokenizer the cascade handler uses."""
+    assert isinstance(build_tokenizer(), JiebaTokenizer)
--- a/tests/unit/test_component/test_utils/init.py
+++ b/tests/unit/test_component/test_utils/init.py
--- a/tests/unit/test_component/test_utils/test_datetime.py
+++ b/tests/unit/test_component/test_utils/test_datetime.py