From 54466148286bc68a984c9c2a8c400e3c3d824698 Mon Sep 17 00:00:00 2001 From: steven_li Date: Fri, 22 May 2026 11:00:53 +0800 Subject: [PATCH] test(task): strengthen validation status semantics --- app-instance/backend/beaver/tasks/models.py | 9 ++- .../tests/unit/test_task_mode_feedback.py | 64 ++++++++++++++++++- 2 files changed, 67 insertions(+), 6 deletions(-) diff --git a/app-instance/backend/beaver/tasks/models.py b/app-instance/backend/beaver/tasks/models.py index 65bf3ca..7e2a172 100644 --- a/app-instance/backend/beaver/tasks/models.py +++ b/app-instance/backend/beaver/tasks/models.py @@ -8,6 +8,7 @@ from typing import Any, Literal ValidationStatus = Literal["accepted", "rejected", "insufficient_evidence", "validator_error"] +VALIDATION_STATUSES = {"accepted", "rejected", "insufficient_evidence", "validator_error"} TASK_OPEN_STATUSES = {"open", "running", "validating", "awaiting_feedback", "needs_review", "needs_revision"} @@ -33,6 +34,8 @@ class ValidationResult: recommended_revision_prompt: str = "", validator: str = "heuristic", ) -> None: + if status is not None and status not in VALIDATION_STATUSES: + raise ValueError(f"unknown validation status: {status}") self.status = status or ("accepted" if passed and score >= 0.75 else "rejected") self.score = max(0.0, min(1.0, float(score or 0.0))) self.issues = list(issues or []) @@ -67,11 +70,7 @@ class ValidationResult: if not isinstance(payload, dict): return None raw_status = payload.get("status") - status: ValidationStatus | None = ( - raw_status - if raw_status in {"accepted", "rejected", "insufficient_evidence", "validator_error"} - else None - ) + status: ValidationStatus | None = raw_status if raw_status in VALIDATION_STATUSES else None return cls( status=status, passed=bool(payload.get("passed")) if status is None else None, diff --git a/app-instance/backend/tests/unit/test_task_mode_feedback.py b/app-instance/backend/tests/unit/test_task_mode_feedback.py index 3157bea..dc81dda 100644 --- a/app-instance/backend/tests/unit/test_task_mode_feedback.py +++ b/app-instance/backend/tests/unit/test_task_mode_feedback.py @@ -13,7 +13,7 @@ from beaver.engine.providers.base import LLMProvider, LLMResponse from beaver.engine.providers.factory import ProviderBundle from beaver.services.agent_service import AgentService from beaver.skills.assembler import SkillAssemblyResult -from beaver.tasks import TaskExecutionPlan, TaskService, ValidationResult, ValidationService +from beaver.tasks import TaskExecutionPlan, TaskRecord, TaskService, ValidationResult, ValidationService class StubProvider(LLMProvider): @@ -153,6 +153,21 @@ def _main_only_bundle(*responses: str) -> ProviderBundle: ) +def _task_record(status: str) -> TaskRecord: + return TaskRecord( + task_id="task-1", + session_id="session-1", + description="test task", + goal="test task", + constraints=[], + priority=0, + status=status, + creator="main-agent", + created_at="2026-05-22T00:00:00+00:00", + updated_at="2026-05-22T00:00:00+00:00", + ) + + def test_simple_question_does_not_create_task(tmp_path: Path) -> None: service = AgentService( loader=EngineLoader( @@ -408,14 +423,61 @@ def test_validation_result_status_drives_accepted_and_passed() -> None: def test_validation_result_from_legacy_payload_maps_to_status() -> None: accepted = ValidationResult.from_dict({"passed": True, "score": 0.9, "validator": "legacy"}) + low_score = ValidationResult.from_dict({"passed": True, "score": 0.7, "validator": "legacy"}) rejected = ValidationResult.from_dict({"passed": False, "score": 0.2, "validator": "legacy"}) assert accepted is not None assert accepted.status == "accepted" + assert low_score is not None + assert low_score.status == "rejected" assert rejected is not None assert rejected.status == "rejected" +def test_validation_result_rejects_unknown_status() -> None: + with pytest.raises(ValueError, match="unknown validation status"): + ValidationResult(status="pending", score=0.9, validator="test") # type: ignore[arg-type] + + +def test_validation_result_evidence_gaps_round_trip() -> None: + validation = ValidationResult( + status="insufficient_evidence", + score=0.4, + evidence_gaps=["missing command output", "missing file reference"], + validator="test", + ) + + restored = ValidationResult.from_dict(validation.to_dict()) + + assert restored is not None + assert restored.status == "insufficient_evidence" + assert restored.evidence_gaps == ["missing command output", "missing file reference"] + assert restored.to_dict()["evidence_gaps"] == ["missing command output", "missing file reference"] + + +def test_task_record_status_helpers_distinguish_review_and_failed() -> None: + needs_review = _task_record("needs_review") + failed = _task_record("failed") + + assert needs_review.is_open is True + assert needs_review.is_execution_active is False + assert needs_review.requires_user_action is True + assert failed.is_open is False + assert failed.is_execution_active is False + assert failed.requires_user_action is False + + +def test_task_service_api_payload_emits_status_helpers(tmp_path: Path) -> None: + service = TaskService(tmp_path) + task = _task_record("needs_review") + + payload = service.to_api_dict(task) + + assert payload["is_open"] is True + assert payload["is_execution_active"] is False + assert payload["requires_user_action"] is True + + def test_validation_failure_retries_once(tmp_path: Path) -> None: service = AgentService( loader=EngineLoader(