feat(beaver): 完成Task Team功能v1实现,重构后端架构支持统一内核
新增内部Task系统,包括验证、反馈门控机制,实现自动质量验证 (通过率>=0.75)和用户反馈闭环(satisfied/revise/abandon)。 实现Agent Team v1协调器,支持sequence/parallel/dag执行策略, sub-agent复用主AgentLoop,每个run使用独立memory snapshot。 建立Skill学习pipeline,包含draft/审核/发布/回滚完整生命周期, 通过Task验证通过且用户满意才生成学习候选。 重构目录结构,移除third_party依赖,建立统一engine内核, 所有agent共享运行时基础组件。 更新ContextBuilder清理provider消息字段,增强SkillContext版本管理, 集成TaskExecutionPlanner和TaskSkillResolver实现技能解析机制。
This commit is contained in:
22
app-instance/backend/beaver/tasks/__init__.py
Normal file
22
app-instance/backend/beaver/tasks/__init__.py
Normal file
@ -0,0 +1,22 @@
|
||||
"""Internal task tracking for automatic Main Agent task mode."""
|
||||
|
||||
from .models import MainAgentDecision, TaskEvent, TaskRecord, ValidationResult
|
||||
from .planner import TaskExecutionPlan, TaskExecutionPlanner
|
||||
from .router import MainAgentRouter
|
||||
from .service import TaskService
|
||||
from .skill_resolver import SkillResolutionReport, TaskSkillResolver
|
||||
from .validation import ValidationService
|
||||
|
||||
__all__ = [
|
||||
"MainAgentDecision",
|
||||
"MainAgentRouter",
|
||||
"TaskEvent",
|
||||
"TaskExecutionPlan",
|
||||
"TaskExecutionPlanner",
|
||||
"TaskRecord",
|
||||
"TaskService",
|
||||
"SkillResolutionReport",
|
||||
"TaskSkillResolver",
|
||||
"ValidationResult",
|
||||
"ValidationService",
|
||||
]
|
||||
178
app-instance/backend/beaver/tasks/models.py
Normal file
178
app-instance/backend/beaver/tasks/models.py
Normal file
@ -0,0 +1,178 @@
|
||||
"""Models for internal task tracking and validation."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
|
||||
TASK_OPEN_STATUSES = {"open", "running", "validating", "awaiting_feedback", "needs_revision"}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ValidationResult:
|
||||
passed: bool
|
||||
score: float
|
||||
issues: list[str] = field(default_factory=list)
|
||||
missing_requirements: list[str] = field(default_factory=list)
|
||||
recommended_revision_prompt: str = ""
|
||||
validator: str = "heuristic"
|
||||
|
||||
@property
|
||||
def accepted(self) -> bool:
|
||||
return self.passed and self.score >= 0.75
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"passed": self.passed,
|
||||
"score": self.score,
|
||||
"issues": list(self.issues),
|
||||
"missing_requirements": list(self.missing_requirements),
|
||||
"recommended_revision_prompt": self.recommended_revision_prompt,
|
||||
"validator": self.validator,
|
||||
"accepted": self.accepted,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, payload: dict[str, Any] | None) -> "ValidationResult | None":
|
||||
if not isinstance(payload, dict):
|
||||
return None
|
||||
return cls(
|
||||
passed=bool(payload.get("passed")),
|
||||
score=float(payload.get("score", 0.0) or 0.0),
|
||||
issues=[str(item) for item in payload.get("issues") or []],
|
||||
missing_requirements=[str(item) for item in payload.get("missing_requirements") or []],
|
||||
recommended_revision_prompt=str(payload.get("recommended_revision_prompt") or ""),
|
||||
validator=str(payload.get("validator") or "unknown"),
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class TaskRecord:
|
||||
task_id: str
|
||||
session_id: str
|
||||
description: str
|
||||
goal: str
|
||||
constraints: list[str]
|
||||
priority: int
|
||||
status: str
|
||||
creator: str
|
||||
created_at: str
|
||||
updated_at: str
|
||||
parent_task_id: str | None = None
|
||||
closed_at: str | None = None
|
||||
close_reason: str | None = None
|
||||
satisfaction: float | None = None
|
||||
run_ids: list[str] = field(default_factory=list)
|
||||
skill_names: list[str] = field(default_factory=list)
|
||||
feedback: list[dict[str, Any]] = field(default_factory=list)
|
||||
validation_result: dict[str, Any] | None = None
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
@property
|
||||
def is_open(self) -> bool:
|
||||
return self.status in TASK_OPEN_STATUSES
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"task_id": self.task_id,
|
||||
"session_id": self.session_id,
|
||||
"parent_task_id": self.parent_task_id,
|
||||
"description": self.description,
|
||||
"goal": self.goal,
|
||||
"constraints": list(self.constraints),
|
||||
"priority": self.priority,
|
||||
"status": self.status,
|
||||
"creator": self.creator,
|
||||
"created_at": self.created_at,
|
||||
"updated_at": self.updated_at,
|
||||
"closed_at": self.closed_at,
|
||||
"close_reason": self.close_reason,
|
||||
"satisfaction": self.satisfaction,
|
||||
"run_ids": list(self.run_ids),
|
||||
"skill_names": list(self.skill_names),
|
||||
"feedback": list(self.feedback),
|
||||
"validation_result": self.validation_result,
|
||||
"metadata": dict(self.metadata),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, payload: dict[str, Any]) -> "TaskRecord":
|
||||
return cls(
|
||||
task_id=str(payload["task_id"]),
|
||||
session_id=str(payload["session_id"]),
|
||||
parent_task_id=_optional_str(payload.get("parent_task_id")),
|
||||
description=str(payload.get("description") or ""),
|
||||
goal=str(payload.get("goal") or payload.get("description") or ""),
|
||||
constraints=[str(item) for item in payload.get("constraints") or []],
|
||||
priority=int(payload.get("priority", 0) or 0),
|
||||
status=str(payload.get("status") or "open"),
|
||||
creator=str(payload.get("creator") or "main-agent"),
|
||||
created_at=str(payload.get("created_at") or ""),
|
||||
updated_at=str(payload.get("updated_at") or ""),
|
||||
closed_at=_optional_str(payload.get("closed_at")),
|
||||
close_reason=_optional_str(payload.get("close_reason")),
|
||||
satisfaction=_optional_float(payload.get("satisfaction")),
|
||||
run_ids=[str(item) for item in payload.get("run_ids") or []],
|
||||
skill_names=[str(item) for item in payload.get("skill_names") or []],
|
||||
feedback=[dict(item) for item in payload.get("feedback") or [] if isinstance(item, dict)],
|
||||
validation_result=dict(payload["validation_result"]) if isinstance(payload.get("validation_result"), dict) else None,
|
||||
metadata=dict(payload.get("metadata") or {}),
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class TaskEvent:
|
||||
event_id: str
|
||||
task_id: str
|
||||
session_id: str
|
||||
event_type: str
|
||||
created_at: str
|
||||
run_id: str | None = None
|
||||
payload: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"event_id": self.event_id,
|
||||
"task_id": self.task_id,
|
||||
"session_id": self.session_id,
|
||||
"run_id": self.run_id,
|
||||
"event_type": self.event_type,
|
||||
"created_at": self.created_at,
|
||||
"payload": dict(self.payload),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, payload: dict[str, Any]) -> "TaskEvent":
|
||||
return cls(
|
||||
event_id=str(payload["event_id"]),
|
||||
task_id=str(payload["task_id"]),
|
||||
session_id=str(payload["session_id"]),
|
||||
run_id=_optional_str(payload.get("run_id")),
|
||||
event_type=str(payload.get("event_type") or ""),
|
||||
created_at=str(payload.get("created_at") or ""),
|
||||
payload=dict(payload.get("payload") or {}),
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class MainAgentDecision:
|
||||
mode: str
|
||||
reason: str
|
||||
starts_new_task: bool = False
|
||||
|
||||
@property
|
||||
def is_task(self) -> bool:
|
||||
return self.mode == "task"
|
||||
|
||||
|
||||
def _optional_str(value: Any) -> str | None:
|
||||
if value in (None, ""):
|
||||
return None
|
||||
return str(value)
|
||||
|
||||
|
||||
def _optional_float(value: Any) -> float | None:
|
||||
if value in (None, ""):
|
||||
return None
|
||||
return float(value)
|
||||
288
app-instance/backend/beaver/tasks/planner.py
Normal file
288
app-instance/backend/beaver/tasks/planner.py
Normal file
@ -0,0 +1,288 @@
|
||||
"""Internal Task execution planner for single-agent vs team execution."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Literal
|
||||
|
||||
from beaver.coordinator.models import AgentDescriptor, ExecutionGraph, ExecutionNode
|
||||
from beaver.engine.providers import ProviderBundle
|
||||
|
||||
from .models import TaskRecord, ValidationResult
|
||||
from .skill_resolver import SkillResolutionReport, TaskSkillResolver
|
||||
|
||||
|
||||
TaskExecutionMode = Literal["single", "team"]
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class TaskExecutionPlan:
|
||||
mode: TaskExecutionMode
|
||||
reason: str = ""
|
||||
graph: ExecutionGraph | None = None
|
||||
final_synthesis_instruction: str = ""
|
||||
fallback_error: str | None = None
|
||||
skill_resolution_report: list[SkillResolutionReport] = field(default_factory=list)
|
||||
|
||||
@property
|
||||
def is_team(self) -> bool:
|
||||
return self.mode == "team" and self.graph is not None
|
||||
|
||||
@classmethod
|
||||
def single(cls, reason: str, *, fallback_error: str | None = None) -> "TaskExecutionPlan":
|
||||
return cls(mode="single", reason=reason, fallback_error=fallback_error)
|
||||
|
||||
def to_event_payload(self) -> dict[str, Any]:
|
||||
strategy = self.graph.strategy if self.graph is not None else None
|
||||
nodes = self.graph.nodes if self.graph is not None else []
|
||||
return {
|
||||
"plan_mode": self.mode,
|
||||
"reason": self.reason,
|
||||
"strategy": strategy,
|
||||
"node_ids": [node.node_id for node in nodes],
|
||||
"skill_queries": [
|
||||
str(node.agent.metadata.get("skill_query") or "")
|
||||
for node in nodes
|
||||
],
|
||||
"selected_skill_names": [
|
||||
name
|
||||
for node in nodes
|
||||
for name in node.inherited_pinned_skills
|
||||
],
|
||||
"generated_skill_draft_ids": [
|
||||
item.generated_skill_draft_id
|
||||
for item in self.skill_resolution_report
|
||||
if item.generated_skill_draft_id
|
||||
],
|
||||
"skill_resolution_report": [item.to_dict() for item in self.skill_resolution_report],
|
||||
"fallback_error": self.fallback_error,
|
||||
}
|
||||
|
||||
|
||||
class TaskExecutionPlanner:
|
||||
"""Plan whether a Task attempt should run through a team first."""
|
||||
|
||||
_MAX_NODES = 6
|
||||
_SUPPORTED_STRATEGIES = {"sequence", "parallel", "dag"}
|
||||
|
||||
def __init__(self, *, task_skill_resolver: TaskSkillResolver | None = None) -> None:
|
||||
self.task_skill_resolver = task_skill_resolver
|
||||
|
||||
async def plan(
|
||||
self,
|
||||
*,
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
attempt_index: int,
|
||||
latest_validation: ValidationResult | None = None,
|
||||
provider_bundle: ProviderBundle | None = None,
|
||||
) -> TaskExecutionPlan:
|
||||
provider = None
|
||||
model = None
|
||||
if provider_bundle is not None:
|
||||
provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
|
||||
runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
|
||||
model = getattr(runtime, "model", None)
|
||||
if provider is None:
|
||||
return TaskExecutionPlan.single("planner_provider_unavailable")
|
||||
try:
|
||||
response = await provider.chat(
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"You choose whether an internal Beaver Task attempt should run as a single "
|
||||
"main-agent pass or use a small sub-agent team first. Return only compact JSON."
|
||||
),
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": self._prompt(
|
||||
task=task,
|
||||
user_message=user_message,
|
||||
attempt_index=attempt_index,
|
||||
latest_validation=latest_validation,
|
||||
),
|
||||
},
|
||||
],
|
||||
tools=None,
|
||||
model=model,
|
||||
max_tokens=1200,
|
||||
temperature=0.0,
|
||||
)
|
||||
plan = self.from_json(response.content or "")
|
||||
return await self._resolve_plan(
|
||||
plan,
|
||||
task=task,
|
||||
user_message=user_message,
|
||||
attempt_index=attempt_index,
|
||||
provider_bundle=provider_bundle,
|
||||
)
|
||||
except Exception as exc:
|
||||
return TaskExecutionPlan.single("planner_failed", fallback_error=str(exc))
|
||||
|
||||
async def _resolve_plan(
|
||||
self,
|
||||
plan: TaskExecutionPlan,
|
||||
*,
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
attempt_index: int,
|
||||
provider_bundle: ProviderBundle | None,
|
||||
) -> TaskExecutionPlan:
|
||||
if not plan.is_team or self.task_skill_resolver is None:
|
||||
return plan
|
||||
if provider_bundle is None:
|
||||
return TaskExecutionPlan.single("planner_fallback_single", fallback_error="task_skill_resolver_provider_unavailable")
|
||||
try:
|
||||
assert plan.graph is not None
|
||||
graph, reports = await self.task_skill_resolver.resolve_graph(
|
||||
plan.graph,
|
||||
task=task,
|
||||
user_message=user_message,
|
||||
attempt_index=attempt_index,
|
||||
provider_bundle=provider_bundle,
|
||||
)
|
||||
graph.validate()
|
||||
plan.graph = graph
|
||||
plan.skill_resolution_report = reports
|
||||
return plan
|
||||
except Exception as exc:
|
||||
return TaskExecutionPlan.single("planner_fallback_single", fallback_error=f"task_skill_resolver_failed: {exc}")
|
||||
|
||||
def from_json(self, text: str) -> TaskExecutionPlan:
|
||||
try:
|
||||
payload = self._parse_json_object(text)
|
||||
mode = str(payload.get("mode") or "single").strip().lower()
|
||||
reason = str(payload.get("reason") or "")
|
||||
if mode != "team":
|
||||
return TaskExecutionPlan.single(reason or "planner_selected_single")
|
||||
|
||||
graph = self._graph_from_payload(payload)
|
||||
graph.validate()
|
||||
return TaskExecutionPlan(
|
||||
mode="team",
|
||||
reason=reason or "planner_selected_team",
|
||||
graph=graph,
|
||||
final_synthesis_instruction=str(payload.get("final_synthesis_instruction") or ""),
|
||||
)
|
||||
except Exception as exc:
|
||||
return TaskExecutionPlan.single("planner_fallback_single", fallback_error=str(exc))
|
||||
|
||||
def _graph_from_payload(self, payload: dict[str, Any]) -> ExecutionGraph:
|
||||
strategy = str(payload.get("strategy") or "sequence").strip().lower()
|
||||
if strategy not in self._SUPPORTED_STRATEGIES:
|
||||
raise ValueError(f"Unsupported team strategy: {strategy}")
|
||||
raw_nodes = payload.get("nodes")
|
||||
if not isinstance(raw_nodes, list) or not raw_nodes:
|
||||
raise ValueError("Team plan requires at least one node")
|
||||
if len(raw_nodes) > self._MAX_NODES:
|
||||
raise ValueError(f"Team plan exceeds max node count {self._MAX_NODES}")
|
||||
|
||||
nodes: list[ExecutionNode] = []
|
||||
for index, item in enumerate(raw_nodes, start=1):
|
||||
if not isinstance(item, dict):
|
||||
raise ValueError("Each team node must be an object")
|
||||
agent_payload = item.get("agent") if isinstance(item.get("agent"), dict) else {}
|
||||
skill_query = str(item.get("skill_query") or agent_payload.get("skill_query") or item.get("task") or "").strip()
|
||||
requested_capabilities = _string_list(
|
||||
item.get("required_capabilities") or item.get("capabilities") or agent_payload.get("capabilities")
|
||||
)
|
||||
requested_tags = _string_list(item.get("tags") or agent_payload.get("tags"))
|
||||
node_id = str(item.get("node_id") or item.get("id") or agent_payload.get("name") or f"node_{index}").strip()
|
||||
task = str(item.get("task") or "").strip()
|
||||
if not node_id or not task:
|
||||
raise ValueError("Each team node requires node_id/id and task")
|
||||
nodes.append(
|
||||
ExecutionNode(
|
||||
node_id=node_id,
|
||||
task=task,
|
||||
agent=AgentDescriptor(
|
||||
name=node_id,
|
||||
role="",
|
||||
system_prompt="",
|
||||
metadata={
|
||||
"skill_query": skill_query,
|
||||
"required_capabilities": requested_capabilities,
|
||||
"requested_tags": requested_tags,
|
||||
"sub_agent_kind": "generic_skill_worker",
|
||||
},
|
||||
),
|
||||
depends_on=[str(dep) for dep in item.get("depends_on") or []],
|
||||
inherited_pinned_skills=[str(name) for name in item.get("pinned_skills") or []],
|
||||
constraints=[str(value) for value in item.get("constraints") or []],
|
||||
expected_output=str(item.get("expected_output") or "") or None,
|
||||
)
|
||||
)
|
||||
return ExecutionGraph(strategy=strategy, nodes=nodes) # type: ignore[arg-type]
|
||||
|
||||
@staticmethod
|
||||
def _prompt(
|
||||
*,
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
attempt_index: int,
|
||||
latest_validation: ValidationResult | None,
|
||||
) -> str:
|
||||
validation_note = ""
|
||||
if latest_validation is not None:
|
||||
validation_note = (
|
||||
"\nPrevious validation issues:\n"
|
||||
+ json.dumps(latest_validation.to_dict(), ensure_ascii=False)
|
||||
)
|
||||
return (
|
||||
"Decide execution mode for this internal Task attempt.\n"
|
||||
"Use mode=team only when independent research, review, implementation slices, or staged checks "
|
||||
"would materially improve the result. Otherwise use mode=single.\n\n"
|
||||
"JSON schema:\n"
|
||||
"{\n"
|
||||
' "mode": "single" | "team",\n'
|
||||
' "reason": "short reason",\n'
|
||||
' "strategy": "sequence" | "parallel" | "dag",\n'
|
||||
' "nodes": [{"node_id": "api_review", "task": "...", "skill_query": "API contract review", '
|
||||
'"required_capabilities": ["schema compatibility"], "depends_on": []}],\n'
|
||||
' "final_synthesis_instruction": "how the main agent should synthesize team output"\n'
|
||||
"}\n\n"
|
||||
f"Task goal:\n{task.goal}\n\n"
|
||||
f"Current user request:\n{user_message}\n\n"
|
||||
f"Attempt index: {attempt_index}\n"
|
||||
f"{validation_note}"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _parse_json_object(text: str) -> dict[str, Any]:
|
||||
cleaned = text.strip()
|
||||
if cleaned.startswith("```"):
|
||||
cleaned = cleaned.strip("`")
|
||||
if cleaned.lower().startswith("json"):
|
||||
cleaned = cleaned[4:].strip()
|
||||
start = cleaned.find("{")
|
||||
end = cleaned.rfind("}")
|
||||
if start >= 0 and end >= start:
|
||||
cleaned = cleaned[start : end + 1]
|
||||
payload = json.loads(cleaned)
|
||||
if not isinstance(payload, dict):
|
||||
raise ValueError("planner response must be a JSON object")
|
||||
return payload
|
||||
|
||||
|
||||
def _optional_str(value: Any) -> str | None:
|
||||
if value in (None, ""):
|
||||
return None
|
||||
text = str(value).strip()
|
||||
return text or None
|
||||
|
||||
|
||||
def _string_list(value: Any) -> list[str]:
|
||||
if not isinstance(value, list):
|
||||
if isinstance(value, str):
|
||||
value = [item.strip() for item in value.split(",")]
|
||||
else:
|
||||
return []
|
||||
result: list[str] = []
|
||||
for item in value:
|
||||
text = str(item).strip()
|
||||
if text and text not in result:
|
||||
result.append(text)
|
||||
return result
|
||||
40
app-instance/backend/beaver/tasks/router.py
Normal file
40
app-instance/backend/beaver/tasks/router.py
Normal file
@ -0,0 +1,40 @@
|
||||
"""Main Agent routing between simple chat and internal Task mode."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
from .models import MainAgentDecision, TaskRecord
|
||||
|
||||
|
||||
class MainAgentRouter:
|
||||
"""Small deterministic classifier used before the main AgentLoop.
|
||||
|
||||
The first version intentionally avoids a mandatory model call so the router
|
||||
stays reliable during provider outages. The rule set is conservative:
|
||||
anything that implies execution, files, tools, iteration, or validation
|
||||
becomes Task mode.
|
||||
"""
|
||||
|
||||
_TASK_PATTERNS = [
|
||||
r"\b(implement|fix|debug|refactor|migrate|build|create|write|edit|update|test|validate|deploy)\b",
|
||||
r"\b(file|repo|code|project|backend|frontend|api|database|migration|pull request|ci|bug)\b",
|
||||
r"\b(step|multi-step|workflow|plan and|then)\b",
|
||||
r"(实现|修复|调试|重构|迁移|构建|创建|编写|修改|更新|测试|验证|部署|文件|代码|项目|前端|后端|接口|数据库|多步|任务)",
|
||||
]
|
||||
_NEW_TASK_PATTERNS = [
|
||||
r"\b(new task|another task|different task|start over)\b",
|
||||
r"(新任务|另一个任务|换个任务|重新开始)",
|
||||
]
|
||||
|
||||
def classify(self, message: str, *, active_task: TaskRecord | None = None) -> MainAgentDecision:
|
||||
text = message.strip()
|
||||
lowered = text.lower()
|
||||
starts_new = any(re.search(pattern, lowered, re.IGNORECASE) for pattern in self._NEW_TASK_PATTERNS)
|
||||
if active_task is not None and active_task.status in {"awaiting_feedback", "needs_revision"} and not starts_new:
|
||||
return MainAgentDecision(mode="task", reason="continuing_open_task", starts_new_task=False)
|
||||
if any(re.search(pattern, lowered, re.IGNORECASE) for pattern in self._TASK_PATTERNS):
|
||||
return MainAgentDecision(mode="task", reason="task_pattern_matched", starts_new_task=starts_new)
|
||||
if len(text) > 240:
|
||||
return MainAgentDecision(mode="task", reason="long_request", starts_new_task=starts_new)
|
||||
return MainAgentDecision(mode="simple", reason="simple_question", starts_new_task=False)
|
||||
167
app-instance/backend/beaver/tasks/service.py
Normal file
167
app-instance/backend/beaver/tasks/service.py
Normal file
@ -0,0 +1,167 @@
|
||||
"""Internal service for automatic Task mode."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from uuid import uuid4
|
||||
|
||||
from .models import TaskEvent, TaskRecord, ValidationResult
|
||||
from .store import TaskStore
|
||||
|
||||
|
||||
class TaskService:
|
||||
def __init__(self, root: str | Path) -> None:
|
||||
self.store = TaskStore(root)
|
||||
|
||||
def create_task(
|
||||
self,
|
||||
*,
|
||||
session_id: str,
|
||||
description: str,
|
||||
creator: str = "main-agent",
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> TaskRecord:
|
||||
now = self._now()
|
||||
task = TaskRecord(
|
||||
task_id=uuid4().hex,
|
||||
session_id=session_id,
|
||||
description=description,
|
||||
goal=description,
|
||||
constraints=[],
|
||||
priority=0,
|
||||
status="open",
|
||||
creator=creator,
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
metadata=dict(metadata or {}),
|
||||
)
|
||||
self.store.upsert_task(task)
|
||||
self._event(task, "created", payload={"description": description})
|
||||
return task
|
||||
|
||||
def get_task(self, task_id: str) -> TaskRecord | None:
|
||||
return self.store.get_task(task_id)
|
||||
|
||||
def get_task_by_run_id(self, run_id: str) -> TaskRecord | None:
|
||||
return self.store.get_task_by_run_id(run_id)
|
||||
|
||||
def get_latest_open_task(self, session_id: str) -> TaskRecord | None:
|
||||
return self.store.get_latest_open_task(session_id)
|
||||
|
||||
def start_run(self, task_id: str, *, user_message: str, attempt_index: int) -> TaskRecord:
|
||||
task = self._require(task_id)
|
||||
task.status = "running"
|
||||
task.updated_at = self._now()
|
||||
task.metadata["latest_user_message"] = user_message
|
||||
task.metadata["latest_attempt_index"] = attempt_index
|
||||
self.store.upsert_task(task)
|
||||
self._event(task, "run_started", payload={"user_message": user_message, "attempt_index": attempt_index})
|
||||
return task
|
||||
|
||||
def append_run(self, task_id: str, run_id: str, *, skill_names: list[str] | None = None) -> TaskRecord:
|
||||
task = self._require(task_id)
|
||||
if run_id not in task.run_ids:
|
||||
task.run_ids.append(run_id)
|
||||
for name in skill_names or []:
|
||||
if name not in task.skill_names:
|
||||
task.skill_names.append(name)
|
||||
task.updated_at = self._now()
|
||||
self.store.upsert_task(task)
|
||||
self._event(task, "run_completed", run_id=run_id, payload={"skill_names": skill_names or []})
|
||||
return task
|
||||
|
||||
def record_validation(self, task_id: str, run_id: str, validation: ValidationResult) -> TaskRecord:
|
||||
task = self._require(task_id)
|
||||
task.status = "awaiting_feedback"
|
||||
task.updated_at = self._now()
|
||||
task.validation_result = validation.to_dict()
|
||||
self.store.upsert_task(task)
|
||||
self._event(task, "validated", run_id=run_id, payload=validation.to_dict())
|
||||
return task
|
||||
|
||||
def add_feedback(
|
||||
self,
|
||||
task_id: str,
|
||||
*,
|
||||
feedback_type: str,
|
||||
comment: str | None = None,
|
||||
run_id: str | None = None,
|
||||
) -> TaskRecord:
|
||||
task = self._require(task_id)
|
||||
now = self._now()
|
||||
matching_feedback = any(
|
||||
item.get("run_id") == run_id and item.get("feedback_type") == feedback_type
|
||||
for item in task.feedback
|
||||
)
|
||||
conflicting_feedback = next(
|
||||
(
|
||||
item
|
||||
for item in task.feedback
|
||||
if item.get("run_id") == run_id and item.get("feedback_type") != feedback_type
|
||||
),
|
||||
None,
|
||||
)
|
||||
if conflicting_feedback is not None:
|
||||
raise ValueError(
|
||||
f"Feedback for run_id={run_id!r} was already recorded as "
|
||||
f"{conflicting_feedback.get('feedback_type')!r}"
|
||||
)
|
||||
if task.status in {"closed", "abandoned"} and not matching_feedback:
|
||||
raise ValueError(f"Task {task.task_id} is already finalized as {task.status!r}")
|
||||
if matching_feedback:
|
||||
return task
|
||||
|
||||
entry = {
|
||||
"feedback_type": feedback_type,
|
||||
"comment": comment or "",
|
||||
"run_id": run_id,
|
||||
"created_at": now,
|
||||
}
|
||||
task.feedback.append(entry)
|
||||
if feedback_type == "revise":
|
||||
task.status = "needs_revision"
|
||||
elif feedback_type == "abandon":
|
||||
task.status = "abandoned"
|
||||
task.closed_at = now
|
||||
task.close_reason = comment or "abandoned"
|
||||
elif feedback_type == "satisfied":
|
||||
task.status = "closed"
|
||||
task.closed_at = now
|
||||
task.close_reason = "satisfied"
|
||||
task.satisfaction = 1.0
|
||||
task.updated_at = now
|
||||
self.store.upsert_task(task)
|
||||
self._event(task, f"feedback_{feedback_type}", run_id=run_id, payload=entry)
|
||||
return task
|
||||
|
||||
def _require(self, task_id: str) -> TaskRecord:
|
||||
task = self.store.get_task(task_id)
|
||||
if task is None:
|
||||
raise ValueError(f"Unknown task_id: {task_id}")
|
||||
return task
|
||||
|
||||
def _event(
|
||||
self,
|
||||
task: TaskRecord,
|
||||
event_type: str,
|
||||
*,
|
||||
run_id: str | None = None,
|
||||
payload: dict[str, Any] | None = None,
|
||||
) -> None:
|
||||
self.store.append_event(
|
||||
TaskEvent(
|
||||
event_id=uuid4().hex,
|
||||
task_id=task.task_id,
|
||||
session_id=task.session_id,
|
||||
run_id=run_id,
|
||||
event_type=event_type,
|
||||
created_at=self._now(),
|
||||
payload=dict(payload or {}),
|
||||
)
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _now() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
286
app-instance/backend/beaver/tasks/skill_resolver.py
Normal file
286
app-instance/backend/beaver/tasks/skill_resolver.py
Normal file
@ -0,0 +1,286 @@
|
||||
"""Resolve Task team nodes to pinned skills for generic sub-agents."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass, field, replace
|
||||
from typing import Any
|
||||
|
||||
from beaver.coordinator.models import AgentDescriptor, ExecutionGraph, ExecutionNode
|
||||
from beaver.engine.providers import ProviderBundle
|
||||
from beaver.skills.assembler.embedding_retriever import SkillEmbeddingRetriever
|
||||
from beaver.skills.catalog.loader import SkillsLoader
|
||||
from beaver.skills.drafts import DraftService
|
||||
from beaver.skills.learning import MissingSkillSynthesizer
|
||||
from beaver.tasks.models import TaskRecord
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SkillResolutionReport:
|
||||
node_id: str
|
||||
skill_query: str
|
||||
required_capabilities: list[str] = field(default_factory=list)
|
||||
selected_skill_names: list[str] = field(default_factory=list)
|
||||
generated_skill_draft_id: str | None = None
|
||||
generated_skill_name: str | None = None
|
||||
ephemeral_used: bool = False
|
||||
reason: str = ""
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"node_id": self.node_id,
|
||||
"skill_query": self.skill_query,
|
||||
"required_capabilities": list(self.required_capabilities),
|
||||
"selected_skill_names": list(self.selected_skill_names),
|
||||
"generated_skill_draft_id": self.generated_skill_draft_id,
|
||||
"generated_skill_name": self.generated_skill_name,
|
||||
"ephemeral_used": self.ephemeral_used,
|
||||
"reason": self.reason,
|
||||
}
|
||||
|
||||
|
||||
class TaskSkillResolver:
|
||||
"""Pins published or draft-only skills onto generic team nodes."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
skills_loader: SkillsLoader,
|
||||
draft_service: DraftService,
|
||||
retriever: SkillEmbeddingRetriever | None = None,
|
||||
missing_skill_synthesizer: MissingSkillSynthesizer | None = None,
|
||||
) -> None:
|
||||
self.skills_loader = skills_loader
|
||||
self.draft_service = draft_service
|
||||
self.retriever = retriever or SkillEmbeddingRetriever()
|
||||
self.missing_skill_synthesizer = missing_skill_synthesizer or MissingSkillSynthesizer()
|
||||
|
||||
async def resolve_graph(
|
||||
self,
|
||||
graph: ExecutionGraph,
|
||||
*,
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
attempt_index: int,
|
||||
provider_bundle: ProviderBundle,
|
||||
) -> tuple[ExecutionGraph, list[SkillResolutionReport]]:
|
||||
resolved_nodes: list[ExecutionNode] = []
|
||||
reports: list[SkillResolutionReport] = []
|
||||
for node in graph.nodes:
|
||||
resolved, report = await self.resolve_node(
|
||||
node,
|
||||
task=task,
|
||||
user_message=user_message,
|
||||
attempt_index=attempt_index,
|
||||
provider_bundle=provider_bundle,
|
||||
)
|
||||
resolved_nodes.append(resolved)
|
||||
reports.append(report)
|
||||
return ExecutionGraph(strategy=graph.strategy, nodes=resolved_nodes), reports
|
||||
|
||||
async def resolve_node(
|
||||
self,
|
||||
node: ExecutionNode,
|
||||
*,
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
attempt_index: int,
|
||||
provider_bundle: ProviderBundle,
|
||||
) -> tuple[ExecutionNode, SkillResolutionReport]:
|
||||
skill_query = str(node.agent.metadata.get("skill_query") or node.task or node.node_id).strip()
|
||||
required_capabilities = [
|
||||
str(item).strip()
|
||||
for item in node.agent.metadata.get("required_capabilities", [])
|
||||
if str(item).strip()
|
||||
]
|
||||
selected = await self._select_published_skills(
|
||||
query="\n".join(
|
||||
part
|
||||
for part in [
|
||||
skill_query,
|
||||
node.task,
|
||||
" ".join(required_capabilities),
|
||||
task.goal,
|
||||
user_message,
|
||||
]
|
||||
if part
|
||||
),
|
||||
provider_bundle=provider_bundle,
|
||||
)
|
||||
if selected:
|
||||
pinned = _merge_names(node.inherited_pinned_skills, selected)
|
||||
resolved = self._generic_node(
|
||||
node,
|
||||
pinned_skill_names=pinned,
|
||||
metadata={
|
||||
**node.agent.metadata,
|
||||
"skill_query": skill_query,
|
||||
"required_capabilities": required_capabilities,
|
||||
"selected_skill_names": selected,
|
||||
"ephemeral_skill_names": [],
|
||||
},
|
||||
)
|
||||
return resolved, SkillResolutionReport(
|
||||
node_id=node.node_id,
|
||||
skill_query=skill_query,
|
||||
required_capabilities=required_capabilities,
|
||||
selected_skill_names=selected,
|
||||
ephemeral_used=False,
|
||||
reason="matched published skill",
|
||||
)
|
||||
|
||||
missing = await self.missing_skill_synthesizer.synthesize(
|
||||
task=task,
|
||||
user_message=user_message,
|
||||
attempt_index=attempt_index,
|
||||
node_id=node.node_id,
|
||||
node_task=node.task,
|
||||
skill_query=skill_query,
|
||||
required_capabilities=required_capabilities,
|
||||
provider_bundle=provider_bundle,
|
||||
draft_service=self.draft_service,
|
||||
)
|
||||
resolved = self._generic_node(
|
||||
node,
|
||||
pinned_skill_names=list(node.inherited_pinned_skills),
|
||||
pinned_skill_contexts=[*node.inherited_pinned_skill_contexts, missing.skill_context],
|
||||
metadata={
|
||||
**node.agent.metadata,
|
||||
"skill_query": skill_query,
|
||||
"required_capabilities": required_capabilities,
|
||||
"selected_skill_names": [],
|
||||
"generated_skill_draft_id": missing.draft.draft_id,
|
||||
"generated_skill_name": missing.draft.skill_name,
|
||||
"ephemeral_skill_names": [missing.skill_context.name],
|
||||
},
|
||||
)
|
||||
return resolved, SkillResolutionReport(
|
||||
node_id=node.node_id,
|
||||
skill_query=skill_query,
|
||||
required_capabilities=required_capabilities,
|
||||
generated_skill_draft_id=missing.draft.draft_id,
|
||||
generated_skill_name=missing.draft.skill_name,
|
||||
ephemeral_used=True,
|
||||
reason="generated draft-only skill for missing sub-agent guidance",
|
||||
)
|
||||
|
||||
async def _select_published_skills(self, *, query: str, provider_bundle: ProviderBundle) -> list[str]:
|
||||
candidates = self.skills_loader.build_selection_candidates()
|
||||
if not candidates:
|
||||
return []
|
||||
candidates = await self.retriever.retrieve(
|
||||
query=query,
|
||||
candidates=candidates,
|
||||
top_k=8,
|
||||
api_key=provider_bundle.embedding_runtime.api_key if provider_bundle.embedding_runtime is not None else None,
|
||||
api_base=provider_bundle.embedding_runtime.api_base if provider_bundle.embedding_runtime is not None else None,
|
||||
model=provider_bundle.embedding_runtime.model if provider_bundle.embedding_runtime is not None else None,
|
||||
extra_headers=(
|
||||
provider_bundle.embedding_runtime.extra_headers
|
||||
if provider_bundle.embedding_runtime is not None
|
||||
else None
|
||||
),
|
||||
timeout_seconds=(
|
||||
provider_bundle.embedding_runtime.request_timeout_seconds
|
||||
if provider_bundle.embedding_runtime is not None
|
||||
else None
|
||||
),
|
||||
fallback_top_k=8,
|
||||
)
|
||||
if not candidates:
|
||||
return []
|
||||
provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
|
||||
runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
|
||||
model = getattr(runtime, "model", None)
|
||||
candidate_names = {item["name"] for item in candidates}
|
||||
try:
|
||||
response = await provider.chat(
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"Select published Beaver skills for one generic sub-agent node. "
|
||||
"Return only a JSON array of skill names. Do not invent names. "
|
||||
"If none of the candidates directly match the required guidance, return []."
|
||||
),
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f"Node skill query:\n{query}\n\n"
|
||||
f"Candidate skills:\n{self._render_candidates(candidates)}\n\n"
|
||||
"Return only JSON, for example: [\"skill-a\"] or []"
|
||||
),
|
||||
},
|
||||
],
|
||||
tools=None,
|
||||
model=model,
|
||||
max_tokens=512,
|
||||
temperature=0,
|
||||
)
|
||||
parsed = self._parse_names(response.content or "")
|
||||
except Exception:
|
||||
parsed = []
|
||||
selected: list[str] = []
|
||||
for name in parsed:
|
||||
if name in candidate_names and name not in selected:
|
||||
selected.append(name)
|
||||
return selected
|
||||
|
||||
@staticmethod
|
||||
def _generic_node(
|
||||
node: ExecutionNode,
|
||||
*,
|
||||
pinned_skill_names: list[str],
|
||||
metadata: dict[str, Any],
|
||||
pinned_skill_contexts: list[Any] | None = None,
|
||||
) -> ExecutionNode:
|
||||
return replace(
|
||||
node,
|
||||
agent=AgentDescriptor(
|
||||
name=node.node_id,
|
||||
role="",
|
||||
system_prompt="",
|
||||
metadata={
|
||||
**metadata,
|
||||
"sub_agent_kind": "generic_skill_worker",
|
||||
},
|
||||
),
|
||||
inherited_pinned_skills=pinned_skill_names,
|
||||
inherited_pinned_skill_contexts=list(pinned_skill_contexts or node.inherited_pinned_skill_contexts),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _render_candidates(candidates: list[dict[str, str]]) -> str:
|
||||
return "\n".join(f"- {item['name']}: {item['description']}" for item in candidates)
|
||||
|
||||
@staticmethod
|
||||
def _parse_names(content: str) -> list[str]:
|
||||
cleaned = content.strip()
|
||||
if cleaned.startswith("```"):
|
||||
lines = cleaned.splitlines()
|
||||
if len(lines) >= 3 and lines[0].startswith("```") and lines[-1].startswith("```"):
|
||||
cleaned = "\n".join(lines[1:-1]).strip()
|
||||
if cleaned.lower().startswith("json"):
|
||||
cleaned = cleaned[4:].strip()
|
||||
try:
|
||||
payload = json.loads(cleaned)
|
||||
except json.JSONDecodeError:
|
||||
return []
|
||||
if isinstance(payload, dict):
|
||||
for key in ("skills", "selected_skills", "selected"):
|
||||
value = payload.get(key)
|
||||
if isinstance(value, list):
|
||||
payload = value
|
||||
break
|
||||
if not isinstance(payload, list):
|
||||
return []
|
||||
return [str(item).strip() for item in payload if str(item).strip()]
|
||||
|
||||
|
||||
def _merge_names(parent: list[str], selected: list[str]) -> list[str]:
|
||||
result: list[str] = []
|
||||
for name in [*parent, *selected]:
|
||||
if name and name not in result:
|
||||
result.append(name)
|
||||
return result
|
||||
100
app-instance/backend/beaver/tasks/store.py
Normal file
100
app-instance/backend/beaver/tasks/store.py
Normal file
@ -0,0 +1,100 @@
|
||||
"""File-backed internal task store."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from .models import TaskEvent, TaskRecord
|
||||
|
||||
|
||||
class TaskStore:
|
||||
def __init__(self, root: str | Path) -> None:
|
||||
self.root = Path(root)
|
||||
self.root.mkdir(parents=True, exist_ok=True)
|
||||
self.tasks_path = self.root / "tasks.json"
|
||||
self.events_path = self.root / "events.jsonl"
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def list_tasks(self) -> list[TaskRecord]:
|
||||
with self._lock:
|
||||
payload = self._read_tasks_unlocked()
|
||||
return [TaskRecord.from_dict(item) for item in payload.values()]
|
||||
|
||||
def get_task(self, task_id: str) -> TaskRecord | None:
|
||||
with self._lock:
|
||||
payload = self._read_tasks_unlocked().get(task_id)
|
||||
return TaskRecord.from_dict(payload) if isinstance(payload, dict) else None
|
||||
|
||||
def get_task_by_run_id(self, run_id: str) -> TaskRecord | None:
|
||||
for task in self.list_tasks():
|
||||
if run_id in task.run_ids:
|
||||
return task
|
||||
return None
|
||||
|
||||
def get_latest_open_task(self, session_id: str) -> TaskRecord | None:
|
||||
tasks = [
|
||||
task
|
||||
for task in self.list_tasks()
|
||||
if task.session_id == session_id and task.status in {"awaiting_feedback", "needs_revision", "open", "running"}
|
||||
]
|
||||
if not tasks:
|
||||
return None
|
||||
return sorted(tasks, key=lambda item: item.updated_at)[-1]
|
||||
|
||||
def upsert_task(self, task: TaskRecord) -> None:
|
||||
with self._lock:
|
||||
payload = self._read_tasks_unlocked()
|
||||
payload[task.task_id] = task.to_dict()
|
||||
self._write_tasks_unlocked(payload)
|
||||
|
||||
def append_event(self, event: TaskEvent) -> None:
|
||||
self.events_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with self._lock:
|
||||
with self.events_path.open("a", encoding="utf-8") as handle:
|
||||
handle.write(json.dumps(event.to_dict(), ensure_ascii=False, sort_keys=True) + "\n")
|
||||
|
||||
def list_events(self, task_id: str | None = None) -> list[TaskEvent]:
|
||||
if not self.events_path.exists():
|
||||
return []
|
||||
results: list[TaskEvent] = []
|
||||
for line in self.events_path.read_text(encoding="utf-8").splitlines():
|
||||
cleaned = line.strip()
|
||||
if not cleaned:
|
||||
continue
|
||||
payload = json.loads(cleaned)
|
||||
if not isinstance(payload, dict):
|
||||
continue
|
||||
event = TaskEvent.from_dict(payload)
|
||||
if task_id is not None and event.task_id != task_id:
|
||||
continue
|
||||
results.append(event)
|
||||
return results
|
||||
|
||||
def _read_tasks_unlocked(self) -> dict[str, dict[str, Any]]:
|
||||
if not self.tasks_path.exists():
|
||||
return {}
|
||||
payload = json.loads(self.tasks_path.read_text(encoding="utf-8"))
|
||||
if not isinstance(payload, dict):
|
||||
return {}
|
||||
tasks = payload.get("tasks", payload)
|
||||
if not isinstance(tasks, dict):
|
||||
return {}
|
||||
return {str(key): dict(value) for key, value in tasks.items() if isinstance(value, dict)}
|
||||
|
||||
def _write_tasks_unlocked(self, payload: dict[str, dict[str, Any]]) -> None:
|
||||
self.tasks_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
fd, tmp_name = tempfile.mkstemp(prefix=".tasks-", suffix=".json", dir=str(self.tasks_path.parent))
|
||||
tmp_path = Path(tmp_name)
|
||||
try:
|
||||
with os.fdopen(fd, "w", encoding="utf-8") as handle:
|
||||
json.dump({"tasks": payload}, handle, ensure_ascii=False, indent=2, sort_keys=True)
|
||||
handle.write("\n")
|
||||
os.replace(tmp_path, self.tasks_path)
|
||||
finally:
|
||||
if tmp_path.exists():
|
||||
tmp_path.unlink()
|
||||
138
app-instance/backend/beaver/tasks/validation.py
Normal file
138
app-instance/backend/beaver/tasks/validation.py
Normal file
@ -0,0 +1,138 @@
|
||||
"""Automatic validation for internal Task mode."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
from beaver.engine.providers import ProviderBundle
|
||||
|
||||
from .models import TaskRecord, ValidationResult
|
||||
|
||||
|
||||
class ValidationService:
|
||||
async def validate_task_result(
|
||||
self,
|
||||
*,
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
final_output: str,
|
||||
transcript_excerpt: str = "",
|
||||
tool_summaries: list[str] | None = None,
|
||||
team_summaries: list[str] | None = None,
|
||||
provider_bundle: ProviderBundle | None = None,
|
||||
) -> ValidationResult:
|
||||
provider = None
|
||||
model = None
|
||||
if provider_bundle is not None:
|
||||
provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
|
||||
runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
|
||||
model = getattr(runtime, "model", None)
|
||||
if provider is not None:
|
||||
try:
|
||||
return await self._validate_with_provider(
|
||||
provider=provider,
|
||||
model=model,
|
||||
task=task,
|
||||
user_message=user_message,
|
||||
final_output=final_output,
|
||||
transcript_excerpt=transcript_excerpt,
|
||||
tool_summaries=tool_summaries or [],
|
||||
team_summaries=team_summaries or [],
|
||||
)
|
||||
except Exception as exc:
|
||||
return ValidationResult(
|
||||
passed=False,
|
||||
score=0.0,
|
||||
issues=[f"Validator failed: {exc}"],
|
||||
missing_requirements=["A valid automatic validation result is required before accepting the task."],
|
||||
recommended_revision_prompt=(
|
||||
"Review the task result again because automatic validation failed, "
|
||||
"then provide a corrected final answer that explicitly satisfies the task goal."
|
||||
),
|
||||
validator="llm_error",
|
||||
)
|
||||
return self._heuristic_validate(final_output)
|
||||
|
||||
async def _validate_with_provider(
|
||||
self,
|
||||
*,
|
||||
provider: Any,
|
||||
model: str | None,
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
final_output: str,
|
||||
transcript_excerpt: str,
|
||||
tool_summaries: list[str],
|
||||
team_summaries: list[str],
|
||||
) -> ValidationResult:
|
||||
prompt = (
|
||||
"Validate whether the assistant output satisfies the task. "
|
||||
"Return only compact JSON with keys: passed, score, issues, "
|
||||
"missing_requirements, recommended_revision_prompt.\n\n"
|
||||
f"Task goal:\n{task.goal}\n\n"
|
||||
f"Current user request:\n{user_message}\n\n"
|
||||
f"Transcript excerpt:\n{transcript_excerpt[:2500]}\n\n"
|
||||
f"Tool summaries:\n{json.dumps(tool_summaries[:12], ensure_ascii=False)}\n\n"
|
||||
f"Team summaries:\n{json.dumps(team_summaries[:12], ensure_ascii=False)}\n\n"
|
||||
f"Assistant final output:\n{final_output[:4000]}"
|
||||
)
|
||||
response = await provider.chat(
|
||||
messages=[
|
||||
{"role": "system", "content": "You are a strict task result validator."},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
tools=None,
|
||||
model=model,
|
||||
max_tokens=800,
|
||||
temperature=0.0,
|
||||
)
|
||||
payload = self._parse_json_object(response.content or "")
|
||||
return ValidationResult(
|
||||
passed=bool(payload.get("passed")),
|
||||
score=max(0.0, min(1.0, float(payload.get("score", 0.0) or 0.0))),
|
||||
issues=[str(item) for item in payload.get("issues") or []],
|
||||
missing_requirements=[str(item) for item in payload.get("missing_requirements") or []],
|
||||
recommended_revision_prompt=str(payload.get("recommended_revision_prompt") or ""),
|
||||
validator="llm",
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _heuristic_validate(final_output: str) -> ValidationResult:
|
||||
text = final_output.strip()
|
||||
if not text:
|
||||
return ValidationResult(
|
||||
passed=False,
|
||||
score=0.0,
|
||||
issues=["Assistant output is empty."],
|
||||
missing_requirements=["A non-empty result is required."],
|
||||
recommended_revision_prompt="Produce a complete, non-empty answer for the task.",
|
||||
validator="heuristic",
|
||||
)
|
||||
lowered = text.lower()
|
||||
if "run failed before completion" in lowered or "tool loop stopped" in lowered:
|
||||
return ValidationResult(
|
||||
passed=False,
|
||||
score=0.35,
|
||||
issues=["The run did not complete cleanly."],
|
||||
missing_requirements=["A successful final result is required."],
|
||||
recommended_revision_prompt="Retry the task and address the failure before returning the final answer.",
|
||||
validator="heuristic",
|
||||
)
|
||||
return ValidationResult(passed=True, score=0.85, validator="heuristic")
|
||||
|
||||
@staticmethod
|
||||
def _parse_json_object(text: str) -> dict[str, Any]:
|
||||
cleaned = text.strip()
|
||||
if cleaned.startswith("```"):
|
||||
cleaned = cleaned.strip("`")
|
||||
if cleaned.lower().startswith("json"):
|
||||
cleaned = cleaned[4:].strip()
|
||||
start = cleaned.find("{")
|
||||
end = cleaned.rfind("}")
|
||||
if start >= 0 and end >= start:
|
||||
cleaned = cleaned[start : end + 1]
|
||||
payload = json.loads(cleaned)
|
||||
if not isinstance(payload, dict):
|
||||
raise ValueError("validator response must be a JSON object")
|
||||
return payload
|
||||
Reference in New Issue
Block a user