diff --git a/app-instance/backend/beaver/tasks/__init__.py b/app-instance/backend/beaver/tasks/__init__.py index dd908e3..65051aa 100644 --- a/app-instance/backend/beaver/tasks/__init__.py +++ b/app-instance/backend/beaver/tasks/__init__.py @@ -1,6 +1,6 @@ """Internal task tracking for automatic Main Agent task mode.""" -from .models import MainAgentDecision, TaskEvent, TaskRecord, ValidationResult +from .models import MainAgentDecision, TaskEvent, TaskRecord, ValidationResult, ValidationStatus from .planner import TaskExecutionPlan, TaskExecutionPlanner from .router import MainAgentRouter from .service import TaskService @@ -18,5 +18,6 @@ __all__ = [ "SkillResolutionReport", "TaskSkillResolver", "ValidationResult", + "ValidationStatus", "ValidationService", ] diff --git a/app-instance/backend/beaver/tasks/models.py b/app-instance/backend/beaver/tasks/models.py index e8093e1..65bf3ca 100644 --- a/app-instance/backend/beaver/tasks/models.py +++ b/app-instance/backend/beaver/tasks/models.py @@ -3,31 +3,60 @@ from __future__ import annotations from dataclasses import dataclass, field -from typing import Any +from typing import Any, Literal -TASK_OPEN_STATUSES = {"open", "running", "validating", "awaiting_feedback", "needs_revision"} +ValidationStatus = Literal["accepted", "rejected", "insufficient_evidence", "validator_error"] + +TASK_OPEN_STATUSES = {"open", "running", "validating", "awaiting_feedback", "needs_review", "needs_revision"} @dataclass(slots=True) class ValidationResult: - passed: bool - score: float + status: ValidationStatus = "rejected" + score: float = 0.0 issues: list[str] = field(default_factory=list) missing_requirements: list[str] = field(default_factory=list) + evidence_gaps: list[str] = field(default_factory=list) recommended_revision_prompt: str = "" validator: str = "heuristic" + def __init__( + self, + *, + status: ValidationStatus | None = None, + passed: bool | None = None, + score: float = 0.0, + issues: list[str] | None = None, + missing_requirements: list[str] | None = None, + evidence_gaps: list[str] | None = None, + recommended_revision_prompt: str = "", + validator: str = "heuristic", + ) -> None: + self.status = status or ("accepted" if passed and score >= 0.75 else "rejected") + self.score = max(0.0, min(1.0, float(score or 0.0))) + self.issues = list(issues or []) + self.missing_requirements = list(missing_requirements or []) + self.evidence_gaps = list(evidence_gaps or []) + self.recommended_revision_prompt = recommended_revision_prompt + self.validator = validator + + @property + def passed(self) -> bool: + return self.status == "accepted" + @property def accepted(self) -> bool: - return self.passed and self.score >= 0.75 + return self.status == "accepted" def to_dict(self) -> dict[str, Any]: return { + "status": self.status, "passed": self.passed, "score": self.score, "issues": list(self.issues), "missing_requirements": list(self.missing_requirements), + "evidence_gaps": list(self.evidence_gaps), "recommended_revision_prompt": self.recommended_revision_prompt, "validator": self.validator, "accepted": self.accepted, @@ -37,11 +66,19 @@ class ValidationResult: def from_dict(cls, payload: dict[str, Any] | None) -> "ValidationResult | None": if not isinstance(payload, dict): return None + raw_status = payload.get("status") + status: ValidationStatus | None = ( + raw_status + if raw_status in {"accepted", "rejected", "insufficient_evidence", "validator_error"} + else None + ) return cls( - passed=bool(payload.get("passed")), + status=status, + passed=bool(payload.get("passed")) if status is None else None, score=float(payload.get("score", 0.0) or 0.0), issues=[str(item) for item in payload.get("issues") or []], missing_requirements=[str(item) for item in payload.get("missing_requirements") or []], + evidence_gaps=[str(item) for item in payload.get("evidence_gaps") or []], recommended_revision_prompt=str(payload.get("recommended_revision_prompt") or ""), validator=str(payload.get("validator") or "unknown"), ) @@ -73,6 +110,14 @@ class TaskRecord: def is_open(self) -> bool: return self.status in TASK_OPEN_STATUSES + @property + def is_execution_active(self) -> bool: + return self.status in {"running", "validating"} + + @property + def requires_user_action(self) -> bool: + return self.status in {"awaiting_feedback", "needs_review", "needs_revision"} + def to_dict(self) -> dict[str, Any]: return { "task_id": self.task_id, diff --git a/app-instance/backend/beaver/tasks/service.py b/app-instance/backend/beaver/tasks/service.py index f074f57..6cabf6f 100644 --- a/app-instance/backend/beaver/tasks/service.py +++ b/app-instance/backend/beaver/tasks/service.py @@ -77,6 +77,8 @@ class TaskService: payload = task.to_dict() payload["short_title"] = self.ensure_short_title(task).metadata.get("short_title") payload["is_open"] = task.is_open + payload["is_execution_active"] = task.is_execution_active + payload["requires_user_action"] = task.requires_user_action return payload def ensure_short_title(self, task: TaskRecord) -> TaskRecord: diff --git a/app-instance/backend/tests/unit/test_task_mode_feedback.py b/app-instance/backend/tests/unit/test_task_mode_feedback.py index 071ef1d..3157bea 100644 --- a/app-instance/backend/tests/unit/test_task_mode_feedback.py +++ b/app-instance/backend/tests/unit/test_task_mode_feedback.py @@ -393,6 +393,29 @@ def test_explicit_revision_feedback_then_input_reruns_without_duplicate_feedback assert task.feedback[0]["comment"] == "准备补充穿衣建议" +def test_validation_result_status_drives_accepted_and_passed() -> None: + accepted = ValidationResult(status="accepted", score=0.9, validator="test") + insufficient = ValidationResult(status="insufficient_evidence", score=0.9, validator="test") + rejected = ValidationResult(status="rejected", score=0.9, validator="test") + + assert accepted.passed is True + assert accepted.accepted is True + assert insufficient.passed is False + assert insufficient.accepted is False + assert rejected.passed is False + assert rejected.accepted is False + + +def test_validation_result_from_legacy_payload_maps_to_status() -> None: + accepted = ValidationResult.from_dict({"passed": True, "score": 0.9, "validator": "legacy"}) + rejected = ValidationResult.from_dict({"passed": False, "score": 0.2, "validator": "legacy"}) + + assert accepted is not None + assert accepted.status == "accepted" + assert rejected is not None + assert rejected.status == "rejected" + + def test_validation_failure_retries_once(tmp_path: Path) -> None: service = AgentService( loader=EngineLoader(