feat(task): add validation status semantics
This commit is contained in:
@ -1,6 +1,6 @@
|
|||||||
"""Internal task tracking for automatic Main Agent task mode."""
|
"""Internal task tracking for automatic Main Agent task mode."""
|
||||||
|
|
||||||
from .models import MainAgentDecision, TaskEvent, TaskRecord, ValidationResult
|
from .models import MainAgentDecision, TaskEvent, TaskRecord, ValidationResult, ValidationStatus
|
||||||
from .planner import TaskExecutionPlan, TaskExecutionPlanner
|
from .planner import TaskExecutionPlan, TaskExecutionPlanner
|
||||||
from .router import MainAgentRouter
|
from .router import MainAgentRouter
|
||||||
from .service import TaskService
|
from .service import TaskService
|
||||||
@ -18,5 +18,6 @@ __all__ = [
|
|||||||
"SkillResolutionReport",
|
"SkillResolutionReport",
|
||||||
"TaskSkillResolver",
|
"TaskSkillResolver",
|
||||||
"ValidationResult",
|
"ValidationResult",
|
||||||
|
"ValidationStatus",
|
||||||
"ValidationService",
|
"ValidationService",
|
||||||
]
|
]
|
||||||
|
|||||||
@ -3,31 +3,60 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from typing import Any
|
from typing import Any, Literal
|
||||||
|
|
||||||
|
|
||||||
TASK_OPEN_STATUSES = {"open", "running", "validating", "awaiting_feedback", "needs_revision"}
|
ValidationStatus = Literal["accepted", "rejected", "insufficient_evidence", "validator_error"]
|
||||||
|
|
||||||
|
TASK_OPEN_STATUSES = {"open", "running", "validating", "awaiting_feedback", "needs_review", "needs_revision"}
|
||||||
|
|
||||||
|
|
||||||
@dataclass(slots=True)
|
@dataclass(slots=True)
|
||||||
class ValidationResult:
|
class ValidationResult:
|
||||||
passed: bool
|
status: ValidationStatus = "rejected"
|
||||||
score: float
|
score: float = 0.0
|
||||||
issues: list[str] = field(default_factory=list)
|
issues: list[str] = field(default_factory=list)
|
||||||
missing_requirements: list[str] = field(default_factory=list)
|
missing_requirements: list[str] = field(default_factory=list)
|
||||||
|
evidence_gaps: list[str] = field(default_factory=list)
|
||||||
recommended_revision_prompt: str = ""
|
recommended_revision_prompt: str = ""
|
||||||
validator: str = "heuristic"
|
validator: str = "heuristic"
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
status: ValidationStatus | None = None,
|
||||||
|
passed: bool | None = None,
|
||||||
|
score: float = 0.0,
|
||||||
|
issues: list[str] | None = None,
|
||||||
|
missing_requirements: list[str] | None = None,
|
||||||
|
evidence_gaps: list[str] | None = None,
|
||||||
|
recommended_revision_prompt: str = "",
|
||||||
|
validator: str = "heuristic",
|
||||||
|
) -> None:
|
||||||
|
self.status = status or ("accepted" if passed and score >= 0.75 else "rejected")
|
||||||
|
self.score = max(0.0, min(1.0, float(score or 0.0)))
|
||||||
|
self.issues = list(issues or [])
|
||||||
|
self.missing_requirements = list(missing_requirements or [])
|
||||||
|
self.evidence_gaps = list(evidence_gaps or [])
|
||||||
|
self.recommended_revision_prompt = recommended_revision_prompt
|
||||||
|
self.validator = validator
|
||||||
|
|
||||||
|
@property
|
||||||
|
def passed(self) -> bool:
|
||||||
|
return self.status == "accepted"
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def accepted(self) -> bool:
|
def accepted(self) -> bool:
|
||||||
return self.passed and self.score >= 0.75
|
return self.status == "accepted"
|
||||||
|
|
||||||
def to_dict(self) -> dict[str, Any]:
|
def to_dict(self) -> dict[str, Any]:
|
||||||
return {
|
return {
|
||||||
|
"status": self.status,
|
||||||
"passed": self.passed,
|
"passed": self.passed,
|
||||||
"score": self.score,
|
"score": self.score,
|
||||||
"issues": list(self.issues),
|
"issues": list(self.issues),
|
||||||
"missing_requirements": list(self.missing_requirements),
|
"missing_requirements": list(self.missing_requirements),
|
||||||
|
"evidence_gaps": list(self.evidence_gaps),
|
||||||
"recommended_revision_prompt": self.recommended_revision_prompt,
|
"recommended_revision_prompt": self.recommended_revision_prompt,
|
||||||
"validator": self.validator,
|
"validator": self.validator,
|
||||||
"accepted": self.accepted,
|
"accepted": self.accepted,
|
||||||
@ -37,11 +66,19 @@ class ValidationResult:
|
|||||||
def from_dict(cls, payload: dict[str, Any] | None) -> "ValidationResult | None":
|
def from_dict(cls, payload: dict[str, Any] | None) -> "ValidationResult | None":
|
||||||
if not isinstance(payload, dict):
|
if not isinstance(payload, dict):
|
||||||
return None
|
return None
|
||||||
|
raw_status = payload.get("status")
|
||||||
|
status: ValidationStatus | None = (
|
||||||
|
raw_status
|
||||||
|
if raw_status in {"accepted", "rejected", "insufficient_evidence", "validator_error"}
|
||||||
|
else None
|
||||||
|
)
|
||||||
return cls(
|
return cls(
|
||||||
passed=bool(payload.get("passed")),
|
status=status,
|
||||||
|
passed=bool(payload.get("passed")) if status is None else None,
|
||||||
score=float(payload.get("score", 0.0) or 0.0),
|
score=float(payload.get("score", 0.0) or 0.0),
|
||||||
issues=[str(item) for item in payload.get("issues") or []],
|
issues=[str(item) for item in payload.get("issues") or []],
|
||||||
missing_requirements=[str(item) for item in payload.get("missing_requirements") or []],
|
missing_requirements=[str(item) for item in payload.get("missing_requirements") or []],
|
||||||
|
evidence_gaps=[str(item) for item in payload.get("evidence_gaps") or []],
|
||||||
recommended_revision_prompt=str(payload.get("recommended_revision_prompt") or ""),
|
recommended_revision_prompt=str(payload.get("recommended_revision_prompt") or ""),
|
||||||
validator=str(payload.get("validator") or "unknown"),
|
validator=str(payload.get("validator") or "unknown"),
|
||||||
)
|
)
|
||||||
@ -73,6 +110,14 @@ class TaskRecord:
|
|||||||
def is_open(self) -> bool:
|
def is_open(self) -> bool:
|
||||||
return self.status in TASK_OPEN_STATUSES
|
return self.status in TASK_OPEN_STATUSES
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_execution_active(self) -> bool:
|
||||||
|
return self.status in {"running", "validating"}
|
||||||
|
|
||||||
|
@property
|
||||||
|
def requires_user_action(self) -> bool:
|
||||||
|
return self.status in {"awaiting_feedback", "needs_review", "needs_revision"}
|
||||||
|
|
||||||
def to_dict(self) -> dict[str, Any]:
|
def to_dict(self) -> dict[str, Any]:
|
||||||
return {
|
return {
|
||||||
"task_id": self.task_id,
|
"task_id": self.task_id,
|
||||||
|
|||||||
@ -77,6 +77,8 @@ class TaskService:
|
|||||||
payload = task.to_dict()
|
payload = task.to_dict()
|
||||||
payload["short_title"] = self.ensure_short_title(task).metadata.get("short_title")
|
payload["short_title"] = self.ensure_short_title(task).metadata.get("short_title")
|
||||||
payload["is_open"] = task.is_open
|
payload["is_open"] = task.is_open
|
||||||
|
payload["is_execution_active"] = task.is_execution_active
|
||||||
|
payload["requires_user_action"] = task.requires_user_action
|
||||||
return payload
|
return payload
|
||||||
|
|
||||||
def ensure_short_title(self, task: TaskRecord) -> TaskRecord:
|
def ensure_short_title(self, task: TaskRecord) -> TaskRecord:
|
||||||
|
|||||||
@ -393,6 +393,29 @@ def test_explicit_revision_feedback_then_input_reruns_without_duplicate_feedback
|
|||||||
assert task.feedback[0]["comment"] == "准备补充穿衣建议"
|
assert task.feedback[0]["comment"] == "准备补充穿衣建议"
|
||||||
|
|
||||||
|
|
||||||
|
def test_validation_result_status_drives_accepted_and_passed() -> None:
|
||||||
|
accepted = ValidationResult(status="accepted", score=0.9, validator="test")
|
||||||
|
insufficient = ValidationResult(status="insufficient_evidence", score=0.9, validator="test")
|
||||||
|
rejected = ValidationResult(status="rejected", score=0.9, validator="test")
|
||||||
|
|
||||||
|
assert accepted.passed is True
|
||||||
|
assert accepted.accepted is True
|
||||||
|
assert insufficient.passed is False
|
||||||
|
assert insufficient.accepted is False
|
||||||
|
assert rejected.passed is False
|
||||||
|
assert rejected.accepted is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_validation_result_from_legacy_payload_maps_to_status() -> None:
|
||||||
|
accepted = ValidationResult.from_dict({"passed": True, "score": 0.9, "validator": "legacy"})
|
||||||
|
rejected = ValidationResult.from_dict({"passed": False, "score": 0.2, "validator": "legacy"})
|
||||||
|
|
||||||
|
assert accepted is not None
|
||||||
|
assert accepted.status == "accepted"
|
||||||
|
assert rejected is not None
|
||||||
|
assert rejected.status == "rejected"
|
||||||
|
|
||||||
|
|
||||||
def test_validation_failure_retries_once(tmp_path: Path) -> None:
|
def test_validation_failure_retries_once(tmp_path: Path) -> None:
|
||||||
service = AgentService(
|
service = AgentService(
|
||||||
loader=EngineLoader(
|
loader=EngineLoader(
|
||||||
|
|||||||
Reference in New Issue
Block a user