Files
beaver_project/app-instance/backend/tests/unit/test_skill_learning_surrogate.py

32 lines
1.0 KiB
Python

from __future__ import annotations
import asyncio
from beaver.skills.learning.surrogate import SurrogateToolEvaluator
def test_surrogate_scores_complete_candidate_higher_than_missing_baseline() -> None:
evaluator = SurrogateToolEvaluator()
baseline = {
"arm": "baseline",
"tool_calls": [
{"tool_name": "mcp_outlook_send_email", "mode": "surrogate", "arguments": {"to": "", "subject": ""}},
],
}
candidate = {
"arm": "candidate",
"tool_calls": [
{
"tool_name": "mcp_outlook_send_email",
"mode": "surrogate",
"arguments": {"to": "ada@example.com", "subject": "Status", "body": "Done"},
},
],
}
result = asyncio.run(evaluator.evaluate(task_text="Send a status email to Ada.", baseline=baseline, candidate=candidate))
assert result["candidate_score"] > result["baseline_score"]
assert result["surrogate_tool_count"] == 2
assert result["confidence"] in {"low", "medium"}