test(task): strengthen validation status semantics
This commit is contained in:
@ -8,6 +8,7 @@ from typing import Any, Literal
|
|||||||
|
|
||||||
ValidationStatus = Literal["accepted", "rejected", "insufficient_evidence", "validator_error"]
|
ValidationStatus = Literal["accepted", "rejected", "insufficient_evidence", "validator_error"]
|
||||||
|
|
||||||
|
VALIDATION_STATUSES = {"accepted", "rejected", "insufficient_evidence", "validator_error"}
|
||||||
TASK_OPEN_STATUSES = {"open", "running", "validating", "awaiting_feedback", "needs_review", "needs_revision"}
|
TASK_OPEN_STATUSES = {"open", "running", "validating", "awaiting_feedback", "needs_review", "needs_revision"}
|
||||||
|
|
||||||
|
|
||||||
@ -33,6 +34,8 @@ class ValidationResult:
|
|||||||
recommended_revision_prompt: str = "",
|
recommended_revision_prompt: str = "",
|
||||||
validator: str = "heuristic",
|
validator: str = "heuristic",
|
||||||
) -> None:
|
) -> None:
|
||||||
|
if status is not None and status not in VALIDATION_STATUSES:
|
||||||
|
raise ValueError(f"unknown validation status: {status}")
|
||||||
self.status = status or ("accepted" if passed and score >= 0.75 else "rejected")
|
self.status = status or ("accepted" if passed and score >= 0.75 else "rejected")
|
||||||
self.score = max(0.0, min(1.0, float(score or 0.0)))
|
self.score = max(0.0, min(1.0, float(score or 0.0)))
|
||||||
self.issues = list(issues or [])
|
self.issues = list(issues or [])
|
||||||
@ -67,11 +70,7 @@ class ValidationResult:
|
|||||||
if not isinstance(payload, dict):
|
if not isinstance(payload, dict):
|
||||||
return None
|
return None
|
||||||
raw_status = payload.get("status")
|
raw_status = payload.get("status")
|
||||||
status: ValidationStatus | None = (
|
status: ValidationStatus | None = raw_status if raw_status in VALIDATION_STATUSES else None
|
||||||
raw_status
|
|
||||||
if raw_status in {"accepted", "rejected", "insufficient_evidence", "validator_error"}
|
|
||||||
else None
|
|
||||||
)
|
|
||||||
return cls(
|
return cls(
|
||||||
status=status,
|
status=status,
|
||||||
passed=bool(payload.get("passed")) if status is None else None,
|
passed=bool(payload.get("passed")) if status is None else None,
|
||||||
|
|||||||
@ -13,7 +13,7 @@ from beaver.engine.providers.base import LLMProvider, LLMResponse
|
|||||||
from beaver.engine.providers.factory import ProviderBundle
|
from beaver.engine.providers.factory import ProviderBundle
|
||||||
from beaver.services.agent_service import AgentService
|
from beaver.services.agent_service import AgentService
|
||||||
from beaver.skills.assembler import SkillAssemblyResult
|
from beaver.skills.assembler import SkillAssemblyResult
|
||||||
from beaver.tasks import TaskExecutionPlan, TaskService, ValidationResult, ValidationService
|
from beaver.tasks import TaskExecutionPlan, TaskRecord, TaskService, ValidationResult, ValidationService
|
||||||
|
|
||||||
|
|
||||||
class StubProvider(LLMProvider):
|
class StubProvider(LLMProvider):
|
||||||
@ -153,6 +153,21 @@ def _main_only_bundle(*responses: str) -> ProviderBundle:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _task_record(status: str) -> TaskRecord:
|
||||||
|
return TaskRecord(
|
||||||
|
task_id="task-1",
|
||||||
|
session_id="session-1",
|
||||||
|
description="test task",
|
||||||
|
goal="test task",
|
||||||
|
constraints=[],
|
||||||
|
priority=0,
|
||||||
|
status=status,
|
||||||
|
creator="main-agent",
|
||||||
|
created_at="2026-05-22T00:00:00+00:00",
|
||||||
|
updated_at="2026-05-22T00:00:00+00:00",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_simple_question_does_not_create_task(tmp_path: Path) -> None:
|
def test_simple_question_does_not_create_task(tmp_path: Path) -> None:
|
||||||
service = AgentService(
|
service = AgentService(
|
||||||
loader=EngineLoader(
|
loader=EngineLoader(
|
||||||
@ -408,14 +423,61 @@ def test_validation_result_status_drives_accepted_and_passed() -> None:
|
|||||||
|
|
||||||
def test_validation_result_from_legacy_payload_maps_to_status() -> None:
|
def test_validation_result_from_legacy_payload_maps_to_status() -> None:
|
||||||
accepted = ValidationResult.from_dict({"passed": True, "score": 0.9, "validator": "legacy"})
|
accepted = ValidationResult.from_dict({"passed": True, "score": 0.9, "validator": "legacy"})
|
||||||
|
low_score = ValidationResult.from_dict({"passed": True, "score": 0.7, "validator": "legacy"})
|
||||||
rejected = ValidationResult.from_dict({"passed": False, "score": 0.2, "validator": "legacy"})
|
rejected = ValidationResult.from_dict({"passed": False, "score": 0.2, "validator": "legacy"})
|
||||||
|
|
||||||
assert accepted is not None
|
assert accepted is not None
|
||||||
assert accepted.status == "accepted"
|
assert accepted.status == "accepted"
|
||||||
|
assert low_score is not None
|
||||||
|
assert low_score.status == "rejected"
|
||||||
assert rejected is not None
|
assert rejected is not None
|
||||||
assert rejected.status == "rejected"
|
assert rejected.status == "rejected"
|
||||||
|
|
||||||
|
|
||||||
|
def test_validation_result_rejects_unknown_status() -> None:
|
||||||
|
with pytest.raises(ValueError, match="unknown validation status"):
|
||||||
|
ValidationResult(status="pending", score=0.9, validator="test") # type: ignore[arg-type]
|
||||||
|
|
||||||
|
|
||||||
|
def test_validation_result_evidence_gaps_round_trip() -> None:
|
||||||
|
validation = ValidationResult(
|
||||||
|
status="insufficient_evidence",
|
||||||
|
score=0.4,
|
||||||
|
evidence_gaps=["missing command output", "missing file reference"],
|
||||||
|
validator="test",
|
||||||
|
)
|
||||||
|
|
||||||
|
restored = ValidationResult.from_dict(validation.to_dict())
|
||||||
|
|
||||||
|
assert restored is not None
|
||||||
|
assert restored.status == "insufficient_evidence"
|
||||||
|
assert restored.evidence_gaps == ["missing command output", "missing file reference"]
|
||||||
|
assert restored.to_dict()["evidence_gaps"] == ["missing command output", "missing file reference"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_task_record_status_helpers_distinguish_review_and_failed() -> None:
|
||||||
|
needs_review = _task_record("needs_review")
|
||||||
|
failed = _task_record("failed")
|
||||||
|
|
||||||
|
assert needs_review.is_open is True
|
||||||
|
assert needs_review.is_execution_active is False
|
||||||
|
assert needs_review.requires_user_action is True
|
||||||
|
assert failed.is_open is False
|
||||||
|
assert failed.is_execution_active is False
|
||||||
|
assert failed.requires_user_action is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_task_service_api_payload_emits_status_helpers(tmp_path: Path) -> None:
|
||||||
|
service = TaskService(tmp_path)
|
||||||
|
task = _task_record("needs_review")
|
||||||
|
|
||||||
|
payload = service.to_api_dict(task)
|
||||||
|
|
||||||
|
assert payload["is_open"] is True
|
||||||
|
assert payload["is_execution_active"] is False
|
||||||
|
assert payload["requires_user_action"] is True
|
||||||
|
|
||||||
|
|
||||||
def test_validation_failure_retries_once(tmp_path: Path) -> None:
|
def test_validation_failure_retries_once(tmp_path: Path) -> None:
|
||||||
service = AgentService(
|
service = AgentService(
|
||||||
loader=EngineLoader(
|
loader=EngineLoader(
|
||||||
|
|||||||
Reference in New Issue
Block a user