feat(tasks): add skill-templated task graph execution
This commit is contained in:
@ -84,11 +84,21 @@ class TeamGraphScheduler:
|
||||
**kwargs,
|
||||
) -> list[NodeRunResult]:
|
||||
results: list[NodeRunResult] = []
|
||||
nodes_by_id = {node.node_id: node for node in nodes}
|
||||
for node in nodes:
|
||||
if any(not item.success for item in results):
|
||||
results.append(self._blocked(node, results))
|
||||
blocking = [
|
||||
item
|
||||
for item in results
|
||||
if self._blocks_downstream(item, nodes_by_id[item.node_id])
|
||||
]
|
||||
if blocking:
|
||||
results.append(self._blocked(node, blocking))
|
||||
continue
|
||||
dependency_outputs = {item.node_id: item.output_text for item in results if item.success}
|
||||
dependency_outputs = {
|
||||
item.node_id: item.output_text
|
||||
for item in results
|
||||
if item.completion_status in {"succeeded", "partial"}
|
||||
}
|
||||
results.append(await self._run_node(node, dependency_outputs=dependency_outputs, **kwargs))
|
||||
return results
|
||||
|
||||
@ -116,6 +126,7 @@ class TeamGraphScheduler:
|
||||
**kwargs,
|
||||
) -> list[NodeRunResult]:
|
||||
pending = {node.node_id: node for node in nodes}
|
||||
nodes_by_id = {node.node_id: node for node in nodes}
|
||||
completed: dict[str, NodeRunResult] = {}
|
||||
ordered: list[NodeRunResult] = []
|
||||
|
||||
@ -123,18 +134,28 @@ class TeamGraphScheduler:
|
||||
blocked_ids = {
|
||||
node_id
|
||||
for node_id, node in pending.items()
|
||||
if any(dep in completed and not completed[dep].success for dep in node.depends_on)
|
||||
if any(
|
||||
dep in completed
|
||||
and self._blocks_downstream(completed[dep], nodes_by_id[dep])
|
||||
for dep in node.depends_on
|
||||
)
|
||||
}
|
||||
for node_id in sorted(blocked_ids):
|
||||
node = pending.pop(node_id)
|
||||
result = self._blocked(node, list(completed.values()))
|
||||
completed[node_id] = result
|
||||
ordered.append(result)
|
||||
if blocked_ids:
|
||||
continue
|
||||
|
||||
ready = [
|
||||
node
|
||||
for node in pending.values()
|
||||
if all(dep in completed and completed[dep].success for dep in node.depends_on)
|
||||
if all(
|
||||
dep in completed
|
||||
and not self._blocks_downstream(completed[dep], nodes_by_id[dep])
|
||||
for dep in node.depends_on
|
||||
)
|
||||
]
|
||||
if not ready:
|
||||
if pending:
|
||||
@ -196,6 +217,17 @@ class TeamGraphScheduler:
|
||||
expected_output=node.expected_output,
|
||||
node_id=node.node_id,
|
||||
dependency_outputs=dict(dependency_outputs),
|
||||
input_contract=dict(node.input_contract),
|
||||
output_contract=dict(node.output_contract),
|
||||
allowed_tool_names=(
|
||||
None if node.allowed_tool_names is None else list(node.allowed_tool_names)
|
||||
),
|
||||
required_evidence=list(node.required_evidence),
|
||||
evidence_contract=dict(node.evidence_contract),
|
||||
validation_rules=list(node.validation_rules),
|
||||
required_for_completion=node.required_for_completion,
|
||||
block_downstream_on_partial=node.block_downstream_on_partial,
|
||||
max_tool_iterations=node.max_tool_iterations,
|
||||
)
|
||||
node_provider_bundle = provider_bundle_factory(node) if provider_bundle_factory is not None else provider_bundle
|
||||
return await self.runner.run(
|
||||
@ -213,8 +245,17 @@ class TeamGraphScheduler:
|
||||
output_text="",
|
||||
finish_reason="error",
|
||||
error=str(exc),
|
||||
completion_status="failed",
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _blocks_downstream(result: NodeRunResult, node: ExecutionNode) -> bool:
|
||||
if result.completion_status in {"failed", "blocked"}:
|
||||
return True
|
||||
if result.completion_status == "partial":
|
||||
return node.block_downstream_on_partial
|
||||
return not result.success
|
||||
|
||||
@staticmethod
|
||||
def _merge_pinned(parent: list[str], local: list[str]) -> list[str]:
|
||||
result: list[str] = []
|
||||
@ -245,6 +286,7 @@ class TeamGraphScheduler:
|
||||
output_text="",
|
||||
finish_reason="blocked",
|
||||
error=f"Blocked by failed dependency: {detail}",
|
||||
completion_status="blocked",
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
|
||||
@ -6,7 +6,7 @@ from uuid import uuid4
|
||||
|
||||
from beaver.engine import AgentLoop
|
||||
from beaver.engine.providers import ProviderBundle
|
||||
from beaver.tasks.evidence import EvidenceBuilder
|
||||
from beaver.tasks.evidence import EvidenceBuilder, evaluate_node_evidence
|
||||
|
||||
from .models import DelegationEnvelope, NodeRunResult
|
||||
|
||||
@ -54,6 +54,8 @@ class LocalAgentRunner:
|
||||
task_mode=bool(envelope.parent_task_id),
|
||||
pinned_skill_names=envelope.inherited_pinned_skills,
|
||||
pinned_skill_contexts=envelope.inherited_pinned_skill_contexts,
|
||||
allowed_tool_names=envelope.allowed_tool_names,
|
||||
max_tool_iterations=envelope.max_tool_iterations,
|
||||
allow_candidate_generation=allow_candidate_generation,
|
||||
)
|
||||
loaded = target_loop.boot()
|
||||
@ -63,7 +65,23 @@ class LocalAgentRunner:
|
||||
result.output_text,
|
||||
result.finish_reason,
|
||||
)
|
||||
success = result.finish_reason == "stop"
|
||||
evidence_gaps = evaluate_node_evidence(
|
||||
evidence,
|
||||
envelope.required_evidence,
|
||||
result.output_text,
|
||||
)
|
||||
run_succeeded = result.finish_reason == "stop"
|
||||
if not run_succeeded:
|
||||
completion_status = "failed"
|
||||
elif evidence_gaps:
|
||||
completion_status = "partial"
|
||||
else:
|
||||
completion_status = "succeeded"
|
||||
success = completion_status == "succeeded"
|
||||
if completion_status == "partial":
|
||||
error = "; ".join(evidence_gaps)
|
||||
else:
|
||||
error = None if success else (result.output_text or result.finish_reason)
|
||||
return NodeRunResult(
|
||||
node_id=envelope.node_id or envelope.agent.name,
|
||||
success=success,
|
||||
@ -71,8 +89,10 @@ class LocalAgentRunner:
|
||||
run_id=result.run_id,
|
||||
session_id=result.session_id,
|
||||
finish_reason=result.finish_reason,
|
||||
error=None if success else (result.output_text or result.finish_reason),
|
||||
error=error,
|
||||
evidence=evidence,
|
||||
completion_status=completion_status,
|
||||
evidence_gaps=evidence_gaps,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
|
||||
@ -51,6 +51,15 @@ class DelegationEnvelope:
|
||||
expected_output: str | None = None
|
||||
node_id: str | None = None
|
||||
dependency_outputs: dict[str, str] = field(default_factory=dict)
|
||||
input_contract: dict[str, Any] = field(default_factory=dict)
|
||||
output_contract: dict[str, Any] = field(default_factory=dict)
|
||||
allowed_tool_names: list[str] | None = None
|
||||
required_evidence: list[str] = field(default_factory=list)
|
||||
evidence_contract: dict[str, Any] = field(default_factory=dict)
|
||||
validation_rules: list[str] = field(default_factory=list)
|
||||
required_for_completion: bool = True
|
||||
block_downstream_on_partial: bool = False
|
||||
max_tool_iterations: int | None = None
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
@ -65,6 +74,15 @@ class ExecutionNode:
|
||||
inherited_pinned_skill_contexts: list["SkillContext"] = field(default_factory=list)
|
||||
constraints: list[str] = field(default_factory=list)
|
||||
expected_output: str | None = None
|
||||
input_contract: dict[str, Any] = field(default_factory=dict)
|
||||
output_contract: dict[str, Any] = field(default_factory=dict)
|
||||
allowed_tool_names: list[str] | None = None
|
||||
required_evidence: list[str] = field(default_factory=list)
|
||||
evidence_contract: dict[str, Any] = field(default_factory=dict)
|
||||
validation_rules: list[str] = field(default_factory=list)
|
||||
required_for_completion: bool = True
|
||||
block_downstream_on_partial: bool = False
|
||||
max_tool_iterations: int | None = None
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
@ -74,7 +92,7 @@ class ExecutionGraph:
|
||||
strategy: TeamStrategy
|
||||
nodes: list[ExecutionNode]
|
||||
|
||||
def validate(self) -> None:
|
||||
def validate(self, *, max_depth: int | None = None) -> None:
|
||||
if self.strategy not in {"sequence", "parallel", "dag"}:
|
||||
raise NotImplementedError(f"Team strategy {self.strategy!r} is reserved but not implemented in v1")
|
||||
if not self.nodes:
|
||||
@ -91,19 +109,25 @@ class ExecutionGraph:
|
||||
visited: set[str] = set()
|
||||
deps = {node.node_id: list(node.depends_on) for node in self.nodes}
|
||||
|
||||
def visit(node_id: str) -> None:
|
||||
def visit(node_id: str) -> int:
|
||||
if node_id in visited:
|
||||
return
|
||||
return depths[node_id]
|
||||
if node_id in visiting:
|
||||
raise ValueError(f"ExecutionGraph has cyclic or unresolved dependencies involving {node_id!r}")
|
||||
visiting.add(node_id)
|
||||
depth = 1
|
||||
for dep in deps[node_id]:
|
||||
visit(dep)
|
||||
depth = max(depth, visit(dep) + 1)
|
||||
visiting.remove(node_id)
|
||||
visited.add(node_id)
|
||||
depths[node_id] = depth
|
||||
return depth
|
||||
|
||||
depths: dict[str, int] = {}
|
||||
for node_id in node_ids:
|
||||
visit(node_id)
|
||||
depth = visit(node_id)
|
||||
if max_depth is not None and depth > max_depth:
|
||||
raise ValueError(f"ExecutionGraph exceeds max depth {max_depth}")
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
@ -118,6 +142,8 @@ class NodeRunResult:
|
||||
finish_reason: str = "stop"
|
||||
error: str | None = None
|
||||
evidence: "RunEvidence | None" = None
|
||||
completion_status: str = "succeeded"
|
||||
evidence_gaps: list[str] = field(default_factory=list)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
@ -129,6 +155,8 @@ class NodeRunResult:
|
||||
"finish_reason": self.finish_reason,
|
||||
"error": self.error,
|
||||
"evidence": self.evidence.to_dict() if self.evidence is not None else None,
|
||||
"completion_status": self.completion_status,
|
||||
"evidence_gaps": list(self.evidence_gaps),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -48,6 +48,8 @@ class SkillContext:
|
||||
content_hash: str = ""
|
||||
activation_reason: str = "selected"
|
||||
tool_hints: list[str] = field(default_factory=list)
|
||||
team_template: dict[str, Any] | None = None
|
||||
team_template_warnings: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
|
||||
@ -317,7 +317,10 @@ class EngineLoader:
|
||||
draft_service=draft_service,
|
||||
)
|
||||
task_service = self._task_service or TaskService(workspace / "tasks")
|
||||
task_execution_planner = self._task_execution_planner or TaskExecutionPlanner(task_skill_resolver=task_skill_resolver)
|
||||
task_execution_planner = self._task_execution_planner or TaskExecutionPlanner(
|
||||
task_skill_resolver=task_skill_resolver,
|
||||
tool_registry=tool_registry,
|
||||
)
|
||||
mcp_manager = MCPConnectionManager(
|
||||
self.config.tools.mcp_servers,
|
||||
authz_config=self.config.authz,
|
||||
|
||||
@ -8,6 +8,7 @@ import os
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from time import perf_counter
|
||||
from typing import Any
|
||||
from uuid import uuid4
|
||||
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
|
||||
@ -81,6 +82,49 @@ class _DirectRunRequest:
|
||||
future: asyncio.Future[AgentRunResult]
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class _WebSearchLoopGuard:
|
||||
low_quality_limit: int = 3
|
||||
_low_quality_count: int = 0
|
||||
_last_query: str = ""
|
||||
|
||||
def observe_result(self, tool_name: str, content: str) -> dict[str, str] | None:
|
||||
if tool_name != "web_search":
|
||||
self._reset()
|
||||
return None
|
||||
try:
|
||||
payload = json.loads(content)
|
||||
except Exception:
|
||||
self._reset()
|
||||
return None
|
||||
|
||||
query = str(payload.get("query") or self._last_query or "").strip()
|
||||
is_low_quality = payload.get("success") is False or payload.get("quality") == "low"
|
||||
if not is_low_quality:
|
||||
self._reset()
|
||||
self._last_query = query
|
||||
return None
|
||||
|
||||
self._low_quality_count += 1
|
||||
self._last_query = query
|
||||
if self._low_quality_count < self.low_quality_limit:
|
||||
return None
|
||||
|
||||
query_text = f" for query '{query}'" if query else ""
|
||||
return {
|
||||
"finish_reason": "web_search_low_quality_budget",
|
||||
"message": (
|
||||
"Web search returned low-quality or failed results repeatedly"
|
||||
f"{query_text}. Stop retrying query variants; use confirmed sources already found, "
|
||||
"state uncertainty clearly, and mark missing fields as N/A."
|
||||
),
|
||||
}
|
||||
|
||||
def _reset(self) -> None:
|
||||
self._low_quality_count = 0
|
||||
self._last_query = ""
|
||||
|
||||
|
||||
class AgentLoop:
|
||||
"""Single execution kernel shared by root agents and delegated agents."""
|
||||
|
||||
@ -240,6 +284,7 @@ class AgentLoop:
|
||||
thinking_enabled: bool | None = None,
|
||||
include_skill_assembly: bool = True,
|
||||
include_tools: bool = True,
|
||||
allowed_tool_names: list[str] | None = None,
|
||||
max_tool_iterations: int | None = None,
|
||||
provider_bundle: ProviderBundle | None = None,
|
||||
parent_session_id: str | None = None,
|
||||
@ -252,6 +297,7 @@ class AgentLoop:
|
||||
allow_candidate_generation: bool = False,
|
||||
intent_agent_decision: dict[str, Any] | None = None,
|
||||
channel_identity: ChannelIdentity | None = None,
|
||||
pre_run_latency_ms: dict[str, float] | None = None,
|
||||
) -> AgentRunResult:
|
||||
"""跑通最小 direct run 主链。
|
||||
|
||||
@ -292,6 +338,7 @@ class AgentLoop:
|
||||
thinking_enabled=thinking_enabled,
|
||||
include_skill_assembly=include_skill_assembly,
|
||||
include_tools=include_tools,
|
||||
allowed_tool_names=allowed_tool_names,
|
||||
max_tool_iterations=max_tool_iterations,
|
||||
provider_bundle=provider_bundle,
|
||||
parent_session_id=parent_session_id,
|
||||
@ -304,6 +351,7 @@ class AgentLoop:
|
||||
allow_candidate_generation=allow_candidate_generation,
|
||||
intent_agent_decision=intent_agent_decision,
|
||||
channel_identity=channel_identity,
|
||||
pre_run_latency_ms=pre_run_latency_ms,
|
||||
)
|
||||
|
||||
async def _process_direct_impl(
|
||||
@ -332,6 +380,7 @@ class AgentLoop:
|
||||
thinking_enabled: bool | None = None,
|
||||
include_skill_assembly: bool = True,
|
||||
include_tools: bool = True,
|
||||
allowed_tool_names: list[str] | None = None,
|
||||
max_tool_iterations: int | None = None,
|
||||
provider_bundle: ProviderBundle | None = None,
|
||||
parent_session_id: str | None = None,
|
||||
@ -344,6 +393,7 @@ class AgentLoop:
|
||||
allow_candidate_generation: bool = False,
|
||||
intent_agent_decision: dict[str, Any] | None = None,
|
||||
channel_identity: ChannelIdentity | None = None,
|
||||
pre_run_latency_ms: dict[str, float] | None = None,
|
||||
) -> AgentRunResult:
|
||||
"""真正执行一轮 direct run 的内部实现。
|
||||
|
||||
@ -353,8 +403,25 @@ class AgentLoop:
|
||||
- 这样才能保证 run 模式下外部不能绕过队列直接执行
|
||||
"""
|
||||
|
||||
run_perf_started = perf_counter()
|
||||
latency_ms = self._initial_latency_ms(pre_run_latency_ms)
|
||||
|
||||
def add_latency(key: str, started_at: float) -> None:
|
||||
latency_ms[key] = latency_ms.get(key, 0.0) + (perf_counter() - started_at) * 1000
|
||||
|
||||
loaded = self.boot()
|
||||
session_manager = self._require_loaded("session_manager")
|
||||
|
||||
def session_write(callable_obj: Any, *args: Any, **kwargs: Any) -> Any:
|
||||
started_at = perf_counter()
|
||||
try:
|
||||
return callable_obj(*args, **kwargs)
|
||||
finally:
|
||||
add_latency("session_write_ms", started_at)
|
||||
|
||||
def append_message(session_id_value: str, **kwargs: Any) -> int:
|
||||
return session_write(session_manager.append_message, session_id_value, **kwargs)
|
||||
|
||||
memory_service = self._require_loaded("memory_service")
|
||||
context_builder = self._require_loaded("context_builder")
|
||||
tool_registry = self._require_loaded("tool_registry")
|
||||
@ -365,9 +432,13 @@ class AgentLoop:
|
||||
skill_assembler = self._require_loaded("skill_assembler")
|
||||
skill_learning_service = self._require_loaded("skill_learning_service")
|
||||
mcp_manager = getattr(loaded, "mcp_manager", None)
|
||||
if mcp_manager is not None:
|
||||
loaded.mcp_report = await mcp_manager.connect_all(tool_registry)
|
||||
loaded.tools = [spec.name for spec in tool_registry.list_specs()]
|
||||
if include_tools and mcp_manager is not None:
|
||||
started_at = perf_counter()
|
||||
try:
|
||||
loaded.mcp_report = await mcp_manager.connect_all(tool_registry)
|
||||
loaded.tools = [spec.name for spec in tool_registry.list_specs()]
|
||||
finally:
|
||||
add_latency("mcp_ms", started_at)
|
||||
|
||||
config = loaded.config
|
||||
configured_provider = config.resolve_provider_target(model=model, provider_name=provider_name)
|
||||
@ -393,13 +464,15 @@ class AgentLoop:
|
||||
memory_snapshot = memory_service.capture_snapshot_for_run()
|
||||
|
||||
if parent_session_id:
|
||||
session_manager.ensure_session(
|
||||
session_write(
|
||||
session_manager.ensure_session,
|
||||
parent_session_id,
|
||||
source="unknown",
|
||||
model=resolved_model,
|
||||
user_id=user_id,
|
||||
)
|
||||
session_manager.ensure_session(
|
||||
session_write(
|
||||
session_manager.ensure_session,
|
||||
resolved_session_id,
|
||||
source=source,
|
||||
model=resolved_model,
|
||||
@ -407,7 +480,7 @@ class AgentLoop:
|
||||
user_id=user_id,
|
||||
parent_session_id=parent_session_id,
|
||||
)
|
||||
session_manager.append_message(
|
||||
append_message(
|
||||
resolved_session_id,
|
||||
run_id=resolved_run_id,
|
||||
role="system",
|
||||
@ -435,7 +508,7 @@ class AgentLoop:
|
||||
user_id=user_id,
|
||||
)
|
||||
if intent_agent_decision:
|
||||
session_manager.append_message(
|
||||
append_message(
|
||||
resolved_session_id,
|
||||
run_id=resolved_run_id,
|
||||
role="system",
|
||||
@ -480,35 +553,39 @@ class AgentLoop:
|
||||
*(pinned_skill_contexts or []),
|
||||
*self._load_pinned_skill_contexts(skills_loader, pinned_skill_names or []),
|
||||
]
|
||||
if not include_skill_assembly:
|
||||
activated_skills = self._merge_skill_contexts(pinned_skills, [])
|
||||
else:
|
||||
skill_query = skill_selection_context or task
|
||||
assembled_skills = await skill_assembler.assemble(
|
||||
task_description=skill_query,
|
||||
provider=skill_selector_provider,
|
||||
model=skill_selector_model,
|
||||
embedding_runtime=bundle.embedding_runtime,
|
||||
thinking_enabled=thinking_enabled,
|
||||
)
|
||||
for interaction in getattr(assembled_skills, "llm_interactions", []) or []:
|
||||
session_manager.append_message(
|
||||
resolved_session_id,
|
||||
run_id=resolved_run_id,
|
||||
role="system",
|
||||
event_type="skill_assembler_llm_interaction_snapshotted",
|
||||
event_payload=interaction,
|
||||
content=json.dumps(interaction, ensure_ascii=False, default=str),
|
||||
context_visible=False,
|
||||
source=source,
|
||||
title=title,
|
||||
started_at = perf_counter()
|
||||
try:
|
||||
if not include_skill_assembly:
|
||||
activated_skills = self._merge_skill_contexts(pinned_skills, [])
|
||||
else:
|
||||
skill_query = skill_selection_context or task
|
||||
assembled_skills = await skill_assembler.assemble(
|
||||
task_description=skill_query,
|
||||
provider=skill_selector_provider,
|
||||
model=skill_selector_model,
|
||||
user_id=user_id,
|
||||
embedding_runtime=bundle.embedding_runtime,
|
||||
thinking_enabled=thinking_enabled,
|
||||
)
|
||||
activated_skills = self._merge_skill_contexts(
|
||||
pinned_skills,
|
||||
assembled_skills.activated_skills,
|
||||
)
|
||||
for interaction in getattr(assembled_skills, "llm_interactions", []) or []:
|
||||
append_message(
|
||||
resolved_session_id,
|
||||
run_id=resolved_run_id,
|
||||
role="system",
|
||||
event_type="skill_assembler_llm_interaction_snapshotted",
|
||||
event_payload=interaction,
|
||||
content=json.dumps(interaction, ensure_ascii=False, default=str),
|
||||
context_visible=False,
|
||||
source=source,
|
||||
title=title,
|
||||
model=skill_selector_model,
|
||||
user_id=user_id,
|
||||
)
|
||||
activated_skills = self._merge_skill_contexts(
|
||||
pinned_skills,
|
||||
assembled_skills.activated_skills,
|
||||
)
|
||||
finally:
|
||||
add_latency("skill_assembly_ms", started_at)
|
||||
skill_activation_messages = context_builder.build_skill_activation_messages(
|
||||
activated_skills
|
||||
)
|
||||
@ -527,7 +604,7 @@ class AgentLoop:
|
||||
]
|
||||
|
||||
if skill_activation_messages or activated_receipts:
|
||||
session_manager.append_message(
|
||||
append_message(
|
||||
resolved_session_id,
|
||||
run_id=resolved_run_id,
|
||||
role="system",
|
||||
@ -544,19 +621,26 @@ class AgentLoop:
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
if not include_tools:
|
||||
selected_tool_specs = []
|
||||
else:
|
||||
selected_tool_specs = await tool_assembler.assemble(
|
||||
task_description=task,
|
||||
registry=tool_registry,
|
||||
skills_loader=skills_loader,
|
||||
activated_skills=activated_skills,
|
||||
embedding_runtime=bundle.embedding_runtime,
|
||||
top_k=10,
|
||||
)
|
||||
started_at = perf_counter()
|
||||
try:
|
||||
if not include_tools:
|
||||
selected_tool_specs = []
|
||||
else:
|
||||
selected_tool_specs = await tool_assembler.assemble(
|
||||
task_description=task,
|
||||
registry=tool_registry,
|
||||
skills_loader=skills_loader,
|
||||
activated_skills=activated_skills,
|
||||
embedding_runtime=bundle.embedding_runtime,
|
||||
top_k=10,
|
||||
)
|
||||
if allowed_tool_names is not None:
|
||||
allowed = set(allowed_tool_names)
|
||||
selected_tool_specs = [spec for spec in selected_tool_specs if spec.name in allowed]
|
||||
finally:
|
||||
add_latency("tool_assembly_ms", started_at)
|
||||
tool_schemas = tool_registry.export_selected_provider_schemas(selected_tool_specs)
|
||||
session_manager.append_message(
|
||||
append_message(
|
||||
resolved_session_id,
|
||||
run_id=resolved_run_id,
|
||||
role="system",
|
||||
@ -573,37 +657,41 @@ class AgentLoop:
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
build_input = ContextBuildInput(
|
||||
base_system_prompt=self.profile.system_prompt,
|
||||
prompt_locale=prompt_locale,
|
||||
history=session_manager.get_history(
|
||||
resolved_session_id,
|
||||
max_messages=max(1, self.profile.max_context_messages),
|
||||
),
|
||||
current_user_input=task,
|
||||
memory_snapshot=memory_snapshot,
|
||||
activated_skills=activated_skills,
|
||||
session_context=SessionContext(
|
||||
session_id=resolved_session_id,
|
||||
source=source,
|
||||
model=resolved_model,
|
||||
user_id=user_id,
|
||||
channel=channel_identity.channel_id if channel_identity else None,
|
||||
channel_kind=channel_identity.kind if channel_identity else None,
|
||||
account_id=channel_identity.account_id if channel_identity else None,
|
||||
peer_id=channel_identity.peer_id if channel_identity else None,
|
||||
peer_type=channel_identity.peer_type if channel_identity else None,
|
||||
chat_id=channel_identity.peer_id if channel_identity else None,
|
||||
thread_id=channel_identity.thread_id if channel_identity else None,
|
||||
parent_session_id=parent_session_id,
|
||||
),
|
||||
runtime_context=self._current_runtime_context(),
|
||||
execution_context=execution_context,
|
||||
extra_sections=[TOOL_FAILURE_GUIDANCE_PROMPT],
|
||||
)
|
||||
context_result = context_builder.build_messages(build_input)
|
||||
started_at = perf_counter()
|
||||
try:
|
||||
build_input = ContextBuildInput(
|
||||
base_system_prompt=self.profile.system_prompt,
|
||||
prompt_locale=prompt_locale,
|
||||
history=session_manager.get_history(
|
||||
resolved_session_id,
|
||||
max_messages=max(1, self.profile.max_context_messages),
|
||||
),
|
||||
current_user_input=task,
|
||||
memory_snapshot=memory_snapshot,
|
||||
activated_skills=activated_skills,
|
||||
session_context=SessionContext(
|
||||
session_id=resolved_session_id,
|
||||
source=source,
|
||||
model=resolved_model,
|
||||
user_id=user_id,
|
||||
channel=channel_identity.channel_id if channel_identity else None,
|
||||
channel_kind=channel_identity.kind if channel_identity else None,
|
||||
account_id=channel_identity.account_id if channel_identity else None,
|
||||
peer_id=channel_identity.peer_id if channel_identity else None,
|
||||
peer_type=channel_identity.peer_type if channel_identity else None,
|
||||
chat_id=channel_identity.peer_id if channel_identity else None,
|
||||
thread_id=channel_identity.thread_id if channel_identity else None,
|
||||
parent_session_id=parent_session_id,
|
||||
),
|
||||
runtime_context=self._current_runtime_context(),
|
||||
execution_context=execution_context,
|
||||
extra_sections=[TOOL_FAILURE_GUIDANCE_PROMPT],
|
||||
)
|
||||
context_result = context_builder.build_messages(build_input)
|
||||
finally:
|
||||
add_latency("context_build_ms", started_at)
|
||||
if skill_selection_context:
|
||||
session_manager.append_message(
|
||||
append_message(
|
||||
resolved_session_id,
|
||||
run_id=resolved_run_id,
|
||||
role="system",
|
||||
@ -621,8 +709,8 @@ class AgentLoop:
|
||||
model=resolved_model,
|
||||
user_id=user_id,
|
||||
)
|
||||
session_manager.update_system_prompt(resolved_session_id, context_result.system_prompt)
|
||||
session_manager.append_message(
|
||||
session_write(session_manager.update_system_prompt, resolved_session_id, context_result.system_prompt)
|
||||
append_message(
|
||||
resolved_session_id,
|
||||
run_id=resolved_run_id,
|
||||
role="system",
|
||||
@ -639,7 +727,7 @@ class AgentLoop:
|
||||
model=resolved_model,
|
||||
user_id=user_id,
|
||||
)
|
||||
session_manager.append_message(
|
||||
append_message(
|
||||
resolved_session_id,
|
||||
run_id=resolved_run_id,
|
||||
role="user",
|
||||
@ -676,6 +764,9 @@ class AgentLoop:
|
||||
"session_id": resolved_session_id,
|
||||
"task_id": task_id,
|
||||
"run_id": resolved_run_id,
|
||||
"allowed_tool_names": (
|
||||
None if allowed_tool_names is None else list(allowed_tool_names)
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
@ -683,6 +774,7 @@ class AgentLoop:
|
||||
final_finish_reason = "stop"
|
||||
final_provider_name = bundle.main_runtime.provider_name
|
||||
final_model = bundle.main_runtime.model
|
||||
web_search_loop_guard = _WebSearchLoopGuard()
|
||||
|
||||
while True:
|
||||
chat_kwargs: dict[str, Any] = {
|
||||
@ -713,7 +805,7 @@ class AgentLoop:
|
||||
"temperature": resolved_temperature,
|
||||
"thinking_enabled": thinking_enabled,
|
||||
}
|
||||
session_manager.append_message(
|
||||
append_message(
|
||||
resolved_session_id,
|
||||
run_id=resolved_run_id,
|
||||
role="system",
|
||||
@ -726,14 +818,18 @@ class AgentLoop:
|
||||
model=final_model,
|
||||
user_id=user_id,
|
||||
)
|
||||
response = await provider.chat(**chat_kwargs)
|
||||
started_at = perf_counter()
|
||||
try:
|
||||
response = await provider.chat(**chat_kwargs)
|
||||
finally:
|
||||
add_latency("llm_ms", started_at)
|
||||
final_provider_name = response.provider_name or final_provider_name
|
||||
final_model = response.model or final_model
|
||||
final_usage = self._merge_usage(final_usage, response.usage or {})
|
||||
self._record_usage(session_manager, resolved_session_id, response.usage or {})
|
||||
session_write(self._record_usage, session_manager, resolved_session_id, response.usage or {})
|
||||
|
||||
assistant_tool_calls = self._serialize_tool_calls(response.tool_calls)
|
||||
session_manager.append_message(
|
||||
append_message(
|
||||
resolved_session_id,
|
||||
run_id=resolved_run_id,
|
||||
role="assistant",
|
||||
@ -764,17 +860,21 @@ class AgentLoop:
|
||||
break
|
||||
|
||||
if iterations >= resolved_max_tool_iterations:
|
||||
finalized = await self._finalize_after_tool_limit(
|
||||
provider=provider,
|
||||
messages=messages,
|
||||
model=final_model,
|
||||
max_tokens=resolved_max_tokens,
|
||||
temperature=resolved_temperature,
|
||||
thinking_enabled=thinking_enabled,
|
||||
)
|
||||
started_at = perf_counter()
|
||||
try:
|
||||
finalized = await self._finalize_after_tool_limit(
|
||||
provider=provider,
|
||||
messages=messages,
|
||||
model=final_model,
|
||||
max_tokens=resolved_max_tokens,
|
||||
temperature=resolved_temperature,
|
||||
thinking_enabled=thinking_enabled,
|
||||
)
|
||||
finally:
|
||||
add_latency("llm_ms", started_at)
|
||||
final_text = finalized or RAW_TOOL_CALL_FALLBACK
|
||||
final_finish_reason = "max_tool_iterations_finalized" if finalized else "max_tool_iterations"
|
||||
session_manager.append_message(
|
||||
append_message(
|
||||
resolved_session_id,
|
||||
run_id=resolved_run_id,
|
||||
role="assistant",
|
||||
@ -800,9 +900,26 @@ class AgentLoop:
|
||||
reasoning_content=response.reasoning_content,
|
||||
)
|
||||
iterations += 1
|
||||
for tool_call in response.tool_calls:
|
||||
result = await effective_tool_executor.execute_tool_call(tool_call, context=tool_context)
|
||||
session_manager.append_message(
|
||||
started_at = perf_counter()
|
||||
try:
|
||||
if self._can_run_tool_calls_concurrently(response.tool_calls, tool_registry):
|
||||
tool_results = await asyncio.gather(
|
||||
*(
|
||||
effective_tool_executor.execute_tool_call(tool_call, context=tool_context)
|
||||
for tool_call in response.tool_calls
|
||||
)
|
||||
)
|
||||
else:
|
||||
tool_results = []
|
||||
for tool_call in response.tool_calls:
|
||||
tool_results.append(
|
||||
await effective_tool_executor.execute_tool_call(tool_call, context=tool_context)
|
||||
)
|
||||
finally:
|
||||
add_latency("tool_ms", started_at)
|
||||
web_guard_decision: dict[str, str] | None = None
|
||||
for tool_call, result in zip(response.tool_calls, tool_results, strict=True):
|
||||
append_message(
|
||||
resolved_session_id,
|
||||
run_id=resolved_run_id,
|
||||
role="tool",
|
||||
@ -825,8 +942,30 @@ class AgentLoop:
|
||||
tool_name=result.tool_name,
|
||||
result=result.content,
|
||||
)
|
||||
if web_guard_decision is None:
|
||||
web_guard_decision = web_search_loop_guard.observe_result(result.tool_name, result.content)
|
||||
if web_guard_decision is not None:
|
||||
final_text = web_guard_decision["message"]
|
||||
final_finish_reason = web_guard_decision["finish_reason"]
|
||||
append_message(
|
||||
resolved_session_id,
|
||||
run_id=resolved_run_id,
|
||||
role="assistant",
|
||||
event_type="assistant_message_added",
|
||||
event_payload={"task_id": task_id} if task_id else None,
|
||||
content=final_text,
|
||||
finish_reason=final_finish_reason,
|
||||
source=source,
|
||||
title=title,
|
||||
model=final_model,
|
||||
user_id=user_id,
|
||||
)
|
||||
context_builder.add_assistant_message(messages, content=final_text)
|
||||
break
|
||||
|
||||
session_manager.append_message(
|
||||
final_latency_ms = self._final_latency_ms(latency_ms, run_perf_started)
|
||||
final_usage_with_latency = self._usage_with_latency(final_usage, final_latency_ms)
|
||||
append_message(
|
||||
resolved_session_id,
|
||||
run_id=resolved_run_id,
|
||||
role="system",
|
||||
@ -837,6 +976,7 @@ class AgentLoop:
|
||||
"task_id": task_id,
|
||||
"task_mode": task_mode,
|
||||
"attempt_index": attempt_index,
|
||||
"latency_ms": final_latency_ms,
|
||||
},
|
||||
content=final_text,
|
||||
finish_reason=final_finish_reason,
|
||||
@ -869,12 +1009,12 @@ class AgentLoop:
|
||||
tool_iterations=iterations,
|
||||
provider_name=final_provider_name,
|
||||
model=final_model,
|
||||
usage=final_usage,
|
||||
usage=final_usage_with_latency,
|
||||
task_id=task_id,
|
||||
)
|
||||
except Exception as exc:
|
||||
if not user_message_recorded:
|
||||
session_manager.append_message(
|
||||
append_message(
|
||||
resolved_session_id,
|
||||
run_id=resolved_run_id,
|
||||
role="user",
|
||||
@ -885,6 +1025,7 @@ class AgentLoop:
|
||||
model=resolved_model,
|
||||
user_id=user_id,
|
||||
)
|
||||
final_latency_ms = self._final_latency_ms(latency_ms, run_perf_started)
|
||||
result = self._build_error_result(
|
||||
session_manager=session_manager,
|
||||
session_id=resolved_session_id,
|
||||
@ -896,8 +1037,9 @@ class AgentLoop:
|
||||
message=f"Run failed before completion: {exc}",
|
||||
tool_iterations=iterations,
|
||||
provider_name=final_provider_name,
|
||||
usage=final_usage,
|
||||
usage=self._usage_with_latency(final_usage, final_latency_ms),
|
||||
task_id=task_id,
|
||||
latency_ms=final_latency_ms,
|
||||
)
|
||||
self._record_run_receipts(
|
||||
skill_learning_service=skill_learning_service,
|
||||
@ -1032,6 +1174,80 @@ class AgentLoop:
|
||||
)
|
||||
return payload
|
||||
|
||||
@staticmethod
|
||||
def _can_run_tool_calls_concurrently(tool_calls: list[Any], tool_registry: Any) -> bool:
|
||||
if len(tool_calls) < 2:
|
||||
return False
|
||||
return all(AgentLoop._is_read_only_tool_call(tool_call, tool_registry) for tool_call in tool_calls)
|
||||
|
||||
@staticmethod
|
||||
def _is_read_only_tool_call(tool_call: Any, tool_registry: Any) -> bool:
|
||||
name = AgentLoop._tool_call_name(tool_call)
|
||||
if not name:
|
||||
return False
|
||||
tool = tool_registry.get(name) if tool_registry is not None else None
|
||||
if tool is None:
|
||||
return False
|
||||
spec = getattr(tool, "spec", None)
|
||||
toolset = str(getattr(spec, "toolset", "") or "").lower()
|
||||
metadata = getattr(spec, "metadata", {}) or {}
|
||||
if metadata.get("read_only") is True:
|
||||
return True
|
||||
if metadata.get("mutates") or metadata.get("sensitive"):
|
||||
return False
|
||||
return name in {
|
||||
"list_directory",
|
||||
"read_file",
|
||||
"search_files",
|
||||
"session_search",
|
||||
"skills_list",
|
||||
"skill_view",
|
||||
"user_files_list",
|
||||
"user_files_read",
|
||||
"web_fetch",
|
||||
"web_search",
|
||||
} and toolset in {"filesystem", "session", "skills", "user_files", "web"}
|
||||
|
||||
@staticmethod
|
||||
def _tool_call_name(tool_call: Any) -> str:
|
||||
if not isinstance(tool_call, dict):
|
||||
return str(getattr(tool_call, "name", "") or "")
|
||||
function = tool_call.get("function")
|
||||
if isinstance(function, dict):
|
||||
return str(function.get("name") or "")
|
||||
return str(tool_call.get("name") or "")
|
||||
|
||||
@staticmethod
|
||||
def _initial_latency_ms(pre_run_latency_ms: dict[str, float] | None) -> dict[str, float]:
|
||||
latency = {
|
||||
"router_ms": 0.0,
|
||||
"mcp_ms": 0.0,
|
||||
"skill_assembly_ms": 0.0,
|
||||
"tool_assembly_ms": 0.0,
|
||||
"context_build_ms": 0.0,
|
||||
"llm_ms": 0.0,
|
||||
"tool_ms": 0.0,
|
||||
"session_write_ms": 0.0,
|
||||
"total_ms": 0.0,
|
||||
}
|
||||
if pre_run_latency_ms:
|
||||
for key, value in pre_run_latency_ms.items():
|
||||
if isinstance(value, (int, float)):
|
||||
latency[str(key)] = latency.get(str(key), 0.0) + float(value)
|
||||
return latency
|
||||
|
||||
@staticmethod
|
||||
def _final_latency_ms(latency_ms: dict[str, float], run_perf_started: float) -> dict[str, float]:
|
||||
finalized = dict(latency_ms)
|
||||
finalized["total_ms"] = finalized.get("total_ms", 0.0) + (perf_counter() - run_perf_started) * 1000
|
||||
return {key: round(max(0.0, float(value)), 3) for key, value in finalized.items()}
|
||||
|
||||
@staticmethod
|
||||
def _usage_with_latency(usage: dict[str, Any], latency_ms: dict[str, float]) -> dict[str, Any]:
|
||||
payload = dict(usage)
|
||||
payload["latency_ms"] = dict(latency_ms)
|
||||
return payload
|
||||
|
||||
@staticmethod
|
||||
def _record_usage(session_manager: Any, session_id: str, usage: dict[str, Any]) -> None:
|
||||
"""把 provider usage 映射到 session usage 字段。
|
||||
@ -1079,6 +1295,7 @@ class AgentLoop:
|
||||
provider_name: str | None,
|
||||
usage: dict[str, Any],
|
||||
task_id: str | None = None,
|
||||
latency_ms: dict[str, float] | None = None,
|
||||
) -> AgentRunResult:
|
||||
"""把主链中的未处理异常收口成可追踪的 assistant error turn。"""
|
||||
|
||||
@ -1104,6 +1321,7 @@ class AgentLoop:
|
||||
"tool_iterations": tool_iterations,
|
||||
"provider_name": provider_name,
|
||||
"task_id": task_id,
|
||||
"latency_ms": latency_ms or {},
|
||||
},
|
||||
content=message,
|
||||
finish_reason="error",
|
||||
|
||||
@ -43,6 +43,7 @@ from beaver.services.user_files import (
|
||||
UserFileNotFoundError,
|
||||
UserFilePathError,
|
||||
UserFileSizeError,
|
||||
UserFileStorageError,
|
||||
UserFileService,
|
||||
)
|
||||
from beaver.services.user_file_resolver import (
|
||||
@ -644,6 +645,8 @@ def create_app(
|
||||
return HTTPException(status_code=400, detail=str(exc) or "Invalid path")
|
||||
if isinstance(exc, UserFileSizeError):
|
||||
return HTTPException(status_code=413, detail=str(exc) or "File too large")
|
||||
if isinstance(exc, UserFileStorageError):
|
||||
return HTTPException(status_code=503, detail=str(exc) or "User file storage is unavailable")
|
||||
if isinstance(exc, UserFileConfigurationError):
|
||||
return HTTPException(status_code=503, detail=str(exc) or "User file storage is not configured")
|
||||
return HTTPException(status_code=400, detail=str(exc) or "User file operation failed")
|
||||
@ -1327,6 +1330,7 @@ def create_app(
|
||||
"runs": runs,
|
||||
}
|
||||
)
|
||||
sessions.sort(key=lambda item: item.get("updated_at") or item.get("created_at") or "", reverse=True)
|
||||
return {"sessions": sessions}
|
||||
|
||||
@app.post("/api/sessions/{session_id:path}/archive")
|
||||
@ -3166,6 +3170,11 @@ def _debug_runs_for_session(session_manager: Any, session_id: str) -> list[dict[
|
||||
title = getattr(started, "title", None)
|
||||
if title is None:
|
||||
title = source or "run"
|
||||
latency_ms = None
|
||||
if completed is not None and isinstance(completed.event_payload, dict):
|
||||
raw_latency = completed.event_payload.get("latency_ms")
|
||||
latency_ms = raw_latency if isinstance(raw_latency, dict) else None
|
||||
sorted_records = sorted(records, key=lambda item: item.timestamp or 0, reverse=True)
|
||||
runs.append(
|
||||
{
|
||||
"run_id": run_id,
|
||||
@ -3181,10 +3190,15 @@ def _debug_runs_for_session(session_manager: Any, session_id: str) -> list[dict[
|
||||
"started_at": _iso_from_timestamp(started.timestamp if started is not None else None),
|
||||
"ended_at": _iso_from_timestamp(completed.timestamp) if completed is not None else None,
|
||||
"finish_reason": completed.finish_reason if completed is not None else None,
|
||||
"events": [_debug_event_to_dict(item) for item in records],
|
||||
"latency_ms": latency_ms or {},
|
||||
"events": [_debug_event_to_dict(item) for item in sorted_records],
|
||||
}
|
||||
)
|
||||
return runs
|
||||
return sorted(
|
||||
runs,
|
||||
key=lambda item: item.get("ended_at") or item.get("started_at") or "",
|
||||
reverse=True,
|
||||
)
|
||||
|
||||
|
||||
def _debug_event_to_dict(record: Any) -> dict[str, Any]:
|
||||
|
||||
@ -14,24 +14,20 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
from time import perf_counter
|
||||
from typing import Any
|
||||
from uuid import uuid4
|
||||
|
||||
from beaver.coordinator.models import ExecutionNode, TeamRunResult
|
||||
from beaver.engine import AgentLoop, AgentProfile, AgentRunResult, EngineLoader
|
||||
from beaver.engine.providers import make_provider_bundle
|
||||
from beaver.foundation.events import InboundMessage, OutboundMessage
|
||||
from beaver.foundation.models import CronJob, CronRunRecord
|
||||
from beaver.prompts.main_agent import normalize_main_agent_prompt_locale
|
||||
from beaver.tasks import (
|
||||
EvidenceBuilder,
|
||||
MainAgentRouter,
|
||||
RunEvidence,
|
||||
TaskEvidencePacket,
|
||||
TaskExecutionPlan,
|
||||
TaskRecord,
|
||||
render_task_evidence,
|
||||
)
|
||||
from beaver.tasks.attempt_orchestrator import TaskAttemptOrchestrator
|
||||
from beaver.tasks.service import normalize_acceptance_type
|
||||
|
||||
|
||||
@ -594,15 +590,22 @@ class AgentService:
|
||||
router_provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
|
||||
router_runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
|
||||
active_task = task_service.get_latest_open_task(session_id)
|
||||
decision = await self._main_agent_router.classify(
|
||||
message,
|
||||
active_task=active_task,
|
||||
provider=router_provider,
|
||||
model=getattr(router_runtime, "model", None),
|
||||
recent_messages=session_manager.get_messages_as_conversation(session_id),
|
||||
intent_skill=self._load_intent_agent_skill(loaded),
|
||||
thinking_enabled=kwargs.get("thinking_enabled"),
|
||||
)
|
||||
router_started = perf_counter()
|
||||
try:
|
||||
decision = await self._main_agent_router.classify(
|
||||
message,
|
||||
active_task=active_task,
|
||||
provider=router_provider,
|
||||
model=getattr(router_runtime, "model", None),
|
||||
recent_messages=session_manager.get_messages_as_conversation(session_id),
|
||||
intent_skill=self._load_intent_agent_skill(loaded),
|
||||
thinking_enabled=kwargs.get("thinking_enabled"),
|
||||
)
|
||||
finally:
|
||||
kwargs["pre_run_latency_ms"] = self._merge_latency_ms(
|
||||
kwargs.get("pre_run_latency_ms"),
|
||||
{"router_ms": (perf_counter() - router_started) * 1000},
|
||||
)
|
||||
kwargs["intent_agent_decision"] = self._intent_decision_payload(
|
||||
decision,
|
||||
active_task=active_task,
|
||||
@ -751,216 +754,19 @@ class AgentService:
|
||||
task: TaskRecord,
|
||||
) -> AgentRunResult:
|
||||
loaded = self.create_loop().boot()
|
||||
task_service = self._require_loaded(loaded, "task_service")
|
||||
task_execution_planner = self._require_loaded(loaded, "task_execution_planner")
|
||||
session_manager = self._require_loaded(loaded, "session_manager")
|
||||
|
||||
base_execution_context = kwargs.get("execution_context")
|
||||
prompt_locale = kwargs.get("prompt_locale") or task.metadata.get("prompt_locale")
|
||||
output_language_instruction = self._output_language_instruction(prompt_locale)
|
||||
provider_bundle = kwargs.get("provider_bundle") or self._make_provider_bundle_for_task(loaded, kwargs)
|
||||
kwargs = dict(kwargs)
|
||||
team_provider_bundle_factory = kwargs.pop("team_provider_bundle_factory", None)
|
||||
kwargs["provider_bundle"] = provider_bundle
|
||||
|
||||
attempt_index = int(task.metadata.get("latest_attempt_index") or 0) + 1
|
||||
task_service.start_run(task.task_id, user_message=message, attempt_index=attempt_index)
|
||||
plan = await task_execution_planner.plan(
|
||||
return await self._build_task_attempt_orchestrator(loaded).run(
|
||||
message=message,
|
||||
runner=runner,
|
||||
kwargs=kwargs,
|
||||
task=task,
|
||||
user_message=message,
|
||||
attempt_index=attempt_index,
|
||||
provider_bundle=provider_bundle,
|
||||
)
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_execution_planned",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
**plan.to_event_payload(),
|
||||
},
|
||||
)
|
||||
team_summaries: list[str] = []
|
||||
team_execution_context = ""
|
||||
team_result: TeamRunResult | None = None
|
||||
if plan.is_team:
|
||||
team_result, team_error = await self._run_team_for_task(
|
||||
plan,
|
||||
task=task,
|
||||
parent_session_id=kwargs["session_id"],
|
||||
provider_bundle_factory=team_provider_bundle_factory
|
||||
or self._build_team_provider_bundle_factory(loaded, kwargs),
|
||||
)
|
||||
if team_result is not None:
|
||||
team_summaries = [self._team_summary_for_validation(team_result)]
|
||||
team_packet = TaskEvidencePacket(
|
||||
task_id=task.task_id,
|
||||
attempt_index=attempt_index,
|
||||
main_run=None,
|
||||
team_runs=self._team_run_evidence(team_result),
|
||||
team_node_results=list(team_result.node_results),
|
||||
final_output="",
|
||||
)
|
||||
team_execution_context = self._join_context(
|
||||
self._team_execution_context(plan, team_result),
|
||||
"Rendered team evidence:\n" + render_task_evidence(team_packet),
|
||||
)
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_team_run_completed" if team_result.success else "task_team_run_failed",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"plan_mode": plan.mode,
|
||||
"strategy": plan.graph.strategy if plan.graph else None,
|
||||
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
|
||||
"team_run_ids": team_result.run_ids,
|
||||
"team_success": team_result.success,
|
||||
"node_results": self._team_node_results_for_event(plan, team_result),
|
||||
"reason": plan.reason,
|
||||
"error": None if team_result.success else "one or more team nodes failed",
|
||||
},
|
||||
)
|
||||
else:
|
||||
team_summaries = [f"Team execution failed: {team_error}"]
|
||||
team_execution_context = self._failed_team_execution_context(plan, team_error or "unknown error")
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_team_run_failed",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"plan_mode": plan.mode,
|
||||
"strategy": plan.graph.strategy if plan.graph else None,
|
||||
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
|
||||
"team_run_ids": [],
|
||||
"team_success": False,
|
||||
"reason": plan.reason,
|
||||
"error": team_error,
|
||||
},
|
||||
)
|
||||
|
||||
attempt_kwargs = dict(kwargs)
|
||||
attempt_kwargs.update(
|
||||
{
|
||||
"task_id": task.task_id,
|
||||
"task_mode": True,
|
||||
"attempt_index": attempt_index,
|
||||
"allow_candidate_generation": False,
|
||||
}
|
||||
)
|
||||
attempt_kwargs["execution_context"] = self._join_context(
|
||||
base_execution_context,
|
||||
output_language_instruction,
|
||||
team_execution_context,
|
||||
)
|
||||
if plan.is_team and team_execution_context:
|
||||
attempt_kwargs["include_tools"] = False
|
||||
attempt_kwargs["max_tool_iterations"] = 0
|
||||
attempt_kwargs["skill_selection_context"] = self._build_skill_selection_context(
|
||||
task=task,
|
||||
user_message=message,
|
||||
attempt_index=attempt_index,
|
||||
plan=plan,
|
||||
team_summaries=team_summaries,
|
||||
)
|
||||
|
||||
result = await runner(message, **attempt_kwargs)
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_synthesis_completed",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"main_run_id": result.run_id,
|
||||
"plan_mode": plan.mode,
|
||||
"strategy": plan.graph.strategy if plan.graph else None,
|
||||
},
|
||||
def _build_task_attempt_orchestrator(self, loaded: Any) -> TaskAttemptOrchestrator:
|
||||
return TaskAttemptOrchestrator(
|
||||
loaded=loaded,
|
||||
create_loop=self.create_loop,
|
||||
make_provider_bundle_for_task=self._make_provider_bundle_for_task,
|
||||
)
|
||||
task = task_service.append_run(
|
||||
task.task_id,
|
||||
result.run_id,
|
||||
skill_names=self._skill_names_for_run(loaded, result.run_id),
|
||||
)
|
||||
evidence_packet = self._build_task_evidence_packet(
|
||||
session_manager=session_manager,
|
||||
task=task,
|
||||
attempt_index=attempt_index,
|
||||
result=result,
|
||||
team_result=team_result,
|
||||
)
|
||||
evidence_text = render_task_evidence(evidence_packet)
|
||||
evidence_debug = {
|
||||
"evidence_run_ids": [
|
||||
item.run_id for item in [evidence_packet.main_run, *evidence_packet.team_runs] if item is not None
|
||||
],
|
||||
"evidence_session_ids": [
|
||||
item.session_id
|
||||
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
|
||||
if item is not None
|
||||
],
|
||||
"tool_result_count": sum(
|
||||
len(item.tool_results)
|
||||
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
|
||||
if item is not None
|
||||
),
|
||||
"evidence_length": len(evidence_text),
|
||||
}
|
||||
session_manager.update_latest_assistant_event_payload(
|
||||
result.session_id,
|
||||
result.run_id,
|
||||
{
|
||||
"task_id": task.task_id,
|
||||
"task_status": task.status,
|
||||
"evidence_status": "recorded",
|
||||
},
|
||||
)
|
||||
session_manager.append_message(
|
||||
result.session_id,
|
||||
run_id=result.run_id,
|
||||
role="system",
|
||||
event_type="task_evidence_recorded",
|
||||
event_payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"evidence_debug": evidence_debug,
|
||||
},
|
||||
content=None,
|
||||
context_visible=False,
|
||||
)
|
||||
result.task_id = task.task_id
|
||||
result.task_status = task.status
|
||||
result.validation_result = None
|
||||
return result
|
||||
|
||||
async def _run_team_for_task(
|
||||
self,
|
||||
plan: TaskExecutionPlan,
|
||||
*,
|
||||
task: TaskRecord,
|
||||
parent_session_id: str,
|
||||
provider_bundle_factory: Any,
|
||||
) -> tuple[TeamRunResult | None, str | None]:
|
||||
if plan.graph is None:
|
||||
return None, "team plan did not include an execution graph"
|
||||
try:
|
||||
from beaver.services.team_service import TeamService
|
||||
|
||||
result = await TeamService(self.create_loop()).run_team(
|
||||
plan.graph,
|
||||
parent_task_id=task.task_id,
|
||||
parent_session_id=parent_session_id,
|
||||
parent_run_id=None,
|
||||
provider_bundle_factory=provider_bundle_factory,
|
||||
allow_candidate_generation=False,
|
||||
)
|
||||
return result, None
|
||||
except Exception as exc:
|
||||
return None, str(exc)
|
||||
|
||||
@staticmethod
|
||||
def _require_loaded(loaded: Any, field_name: str) -> Any:
|
||||
@ -992,32 +798,15 @@ class AgentService:
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _output_language_instruction(prompt_locale: str | None) -> str:
|
||||
locale = normalize_main_agent_prompt_locale(prompt_locale)
|
||||
if locale == "en":
|
||||
return (
|
||||
"Output language: English. Use English for user-facing task titles, summaries, plans, "
|
||||
"and final answers unless the user explicitly requests another language."
|
||||
)
|
||||
if locale == "zh-Hant":
|
||||
return (
|
||||
"輸出語言:繁體中文。除非使用者明確要求其他語言,所有面向使用者的任務標題、摘要、"
|
||||
"計劃與最終回答都使用繁體中文。"
|
||||
)
|
||||
return (
|
||||
"输出语言:简体中文。除非用户明确要求其他语言,所有面向用户的任务标题、摘要、"
|
||||
"计划与最终回答都使用简体中文。"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _skill_names_for_run(loaded: Any, run_id: str) -> list[str]:
|
||||
store = getattr(loaded, "run_memory_store", None)
|
||||
if store is None:
|
||||
return []
|
||||
for record in store.list_runs():
|
||||
if record.run_id == run_id:
|
||||
return [receipt.skill_name for receipt in record.activated_skills]
|
||||
return []
|
||||
def _merge_latency_ms(current: Any, updates: dict[str, float]) -> dict[str, float]:
|
||||
merged: dict[str, float] = {}
|
||||
if isinstance(current, dict):
|
||||
for key, value in current.items():
|
||||
if isinstance(value, (int, float)):
|
||||
merged[str(key)] = float(value)
|
||||
for key, value in updates.items():
|
||||
merged[key] = merged.get(key, 0.0) + float(value)
|
||||
return merged
|
||||
|
||||
@staticmethod
|
||||
def _acceptance_score_for_learning(acceptance_type: str) -> float:
|
||||
@ -1027,237 +816,6 @@ class AgentService:
|
||||
return 0.5
|
||||
return 0.0
|
||||
|
||||
@staticmethod
|
||||
def _build_skill_selection_context(
|
||||
*,
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
attempt_index: int,
|
||||
plan: TaskExecutionPlan | None = None,
|
||||
team_summaries: list[str] | None = None,
|
||||
) -> str:
|
||||
phase = f"attempt_{attempt_index}"
|
||||
if task.feedback and task.feedback[-1].get("acceptance_type") == "revise":
|
||||
phase = f"revision_attempt_{attempt_index}"
|
||||
elif plan is not None and plan.is_team:
|
||||
phase = f"team_synthesis_attempt_{attempt_index}"
|
||||
|
||||
sections = [
|
||||
f"Task goal:\n{task.goal or task.description}",
|
||||
f"Task description:\n{task.description}",
|
||||
f"Current user request:\n{user_message}",
|
||||
f"Execution phase:\n{phase}",
|
||||
f"Task status:\n{task.status}",
|
||||
]
|
||||
if task.constraints:
|
||||
sections.append("Known constraints:\n" + "\n".join(f"- {item}" for item in task.constraints))
|
||||
if task.skill_names:
|
||||
sections.append(
|
||||
"Previously activated skills (reuse bias, not pinned):\n"
|
||||
+ "\n".join(f"- {item}" for item in task.skill_names)
|
||||
)
|
||||
else:
|
||||
sections.append("Previously activated skills:\nNone")
|
||||
if task.feedback:
|
||||
history_lines = []
|
||||
for item in task.feedback[-5:]:
|
||||
kind = item.get("acceptance_type") or item.get("feedback_type")
|
||||
comment = item.get("comment") or ""
|
||||
run_id = item.get("run_id") or ""
|
||||
history_lines.append(f"- {kind} run={run_id}: {comment}".strip())
|
||||
sections.append("Task acceptance history:\n" + "\n".join(history_lines))
|
||||
if plan is not None:
|
||||
plan_lines = [
|
||||
f"mode: {plan.mode}",
|
||||
f"reason: {plan.reason}",
|
||||
]
|
||||
if plan.final_synthesis_instruction:
|
||||
plan_lines.append(f"final synthesis instruction: {plan.final_synthesis_instruction}")
|
||||
if plan.graph is not None:
|
||||
plan_lines.append(f"strategy: {plan.graph.strategy}")
|
||||
plan_lines.append(
|
||||
"nodes:\n"
|
||||
+ "\n".join(
|
||||
f"- {node.node_id}: {node.task}"
|
||||
for node in plan.graph.nodes
|
||||
)
|
||||
)
|
||||
sections.append("Execution plan:\n" + "\n".join(plan_lines))
|
||||
if team_summaries:
|
||||
sections.append("Team execution summaries:\n" + "\n\n".join(team_summaries)[:2400])
|
||||
sections.append(
|
||||
"Skill selection instruction:\n"
|
||||
"Prefer reusing previously activated skills when they still match the Task. "
|
||||
"Select new skills only if the current request, revision, or execution plan needs a different capability. "
|
||||
"If no published skill matches, return [] and let the run continue without skills."
|
||||
)
|
||||
return "\n\n".join(section for section in sections if section.strip())
|
||||
|
||||
@staticmethod
|
||||
def _run_excerpt(session_manager: Any, session_id: str, run_id: str) -> str:
|
||||
lines = []
|
||||
for event in session_manager.get_run_event_records(session_id, run_id):
|
||||
if event.context_visible and event.content:
|
||||
lines.append(f"{event.role}: {event.content.strip()}")
|
||||
return "\n".join(lines[:12])[:2400]
|
||||
|
||||
@staticmethod
|
||||
def _tool_summaries(session_manager: Any, session_id: str, run_id: str) -> list[str]:
|
||||
summaries = []
|
||||
for event in session_manager.get_run_event_records(session_id, run_id):
|
||||
if event.event_type != "tool_result_recorded":
|
||||
continue
|
||||
text = (event.content or "").strip()
|
||||
if text:
|
||||
summaries.append(f"{event.tool_name or 'tool'}: {text[:500]}")
|
||||
return summaries[:12]
|
||||
|
||||
@staticmethod
|
||||
def _append_task_observation(
|
||||
session_manager: Any,
|
||||
session_id: str,
|
||||
*,
|
||||
event_type: str,
|
||||
payload: dict[str, Any],
|
||||
) -> None:
|
||||
session_manager.append_message(
|
||||
session_id,
|
||||
role="system",
|
||||
event_type=event_type,
|
||||
event_payload=payload,
|
||||
content=payload.get("reason") or payload.get("error"),
|
||||
context_visible=False,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _join_context(*parts: str | None) -> str:
|
||||
return "\n\n".join(part.strip() for part in parts if part and part.strip())
|
||||
|
||||
@staticmethod
|
||||
def _team_summary_for_validation(result: TeamRunResult) -> str:
|
||||
lines = [
|
||||
f"success={result.success}",
|
||||
f"task_id={result.task_id or ''}",
|
||||
"summary:",
|
||||
result.summary,
|
||||
"nodes:",
|
||||
]
|
||||
for node in result.node_results:
|
||||
lines.append(
|
||||
f"- {node.node_id}: success={node.success} finish_reason={node.finish_reason} "
|
||||
f"error={node.error or ''} output={node.output_text[:500]}"
|
||||
)
|
||||
return "\n".join(lines)
|
||||
|
||||
@staticmethod
|
||||
def _team_node_results_for_event(plan: TaskExecutionPlan, result: TeamRunResult) -> list[dict[str, Any]]:
|
||||
nodes = {node.node_id: node for node in plan.graph.nodes} if plan.graph else {}
|
||||
payloads: list[dict[str, Any]] = []
|
||||
for item in result.node_results:
|
||||
payload = item.to_dict()
|
||||
node = nodes.get(item.node_id)
|
||||
if node is not None:
|
||||
payload["selected_skill_names"] = list(node.inherited_pinned_skills)
|
||||
payload["ephemeral_skill_names"] = [
|
||||
skill.name for skill in node.inherited_pinned_skill_contexts
|
||||
]
|
||||
payload["skill_query"] = node.agent.metadata.get("skill_query")
|
||||
payload["ephemeral_guidance_id"] = node.agent.metadata.get("ephemeral_guidance_id")
|
||||
payload["ephemeral_guidance_name"] = node.agent.metadata.get("ephemeral_guidance_name")
|
||||
payload["ephemeral_used"] = bool(node.inherited_pinned_skill_contexts)
|
||||
payloads.append(payload)
|
||||
return payloads
|
||||
|
||||
@staticmethod
|
||||
def _team_run_evidence(result: TeamRunResult | None) -> list[RunEvidence]:
|
||||
if result is None:
|
||||
return []
|
||||
return [node.evidence for node in result.node_results if node.evidence is not None]
|
||||
|
||||
def _build_task_evidence_packet(
|
||||
self,
|
||||
*,
|
||||
session_manager: Any,
|
||||
task: TaskRecord,
|
||||
attempt_index: int,
|
||||
result: AgentRunResult,
|
||||
team_result: TeamRunResult | None,
|
||||
) -> TaskEvidencePacket:
|
||||
main_run = EvidenceBuilder(session_manager).build_run_evidence(
|
||||
result.session_id,
|
||||
result.run_id,
|
||||
result.output_text,
|
||||
result.finish_reason,
|
||||
)
|
||||
return TaskEvidencePacket(
|
||||
task_id=task.task_id,
|
||||
attempt_index=attempt_index,
|
||||
main_run=main_run,
|
||||
team_runs=self._team_run_evidence(team_result),
|
||||
team_node_results=list(team_result.node_results) if team_result is not None else [],
|
||||
final_output=result.output_text,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _team_execution_context(plan: TaskExecutionPlan, result: TeamRunResult) -> str:
|
||||
node_lines = [
|
||||
(
|
||||
f"- {node.node_id}: success={node.success}, finish_reason={node.finish_reason}, "
|
||||
f"run_id={node.run_id or ''}, error={node.error or ''}\n{node.output_text}"
|
||||
)
|
||||
for node in result.node_results
|
||||
]
|
||||
return "\n\n".join(
|
||||
item
|
||||
for item in [
|
||||
"Task team execution result:",
|
||||
f"Planner reason: {plan.reason}",
|
||||
f"Strategy: {plan.graph.strategy if plan.graph else ''}",
|
||||
f"Team success: {result.success}",
|
||||
f"Team summary:\n{result.summary}",
|
||||
"Node results:\n" + "\n\n".join(node_lines),
|
||||
(
|
||||
"Final synthesis instruction:\n" + plan.final_synthesis_instruction
|
||||
if plan.final_synthesis_instruction
|
||||
else None
|
||||
),
|
||||
(
|
||||
"Use successful team outputs as internal evidence. If one or more nodes failed, "
|
||||
"do not blindly repeat failed tool calls. Produce a user-visible fallback answer "
|
||||
"with available evidence and clearly state any missing or uncertain data."
|
||||
),
|
||||
]
|
||||
if item
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _failed_team_execution_context(plan: TaskExecutionPlan, error: str) -> str:
|
||||
return "\n\n".join(
|
||||
[
|
||||
"Task team execution failed before final synthesis.",
|
||||
f"Planner reason: {plan.reason}",
|
||||
f"Strategy: {plan.graph.strategy if plan.graph else ''}",
|
||||
f"Error: {error}",
|
||||
(
|
||||
"Proceed as the main agent. Do not blindly repeat failed tool calls; "
|
||||
"produce a user-visible fallback answer with available evidence and clearly "
|
||||
"state any missing or uncertain data."
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
def _build_team_provider_bundle_factory(self, loaded: Any, kwargs: dict[str, Any]) -> Any:
|
||||
def factory(node: ExecutionNode) -> Any:
|
||||
node_kwargs = dict(kwargs)
|
||||
node_kwargs.pop("provider_bundle", None)
|
||||
if node.agent.model:
|
||||
node_kwargs["model"] = node.agent.model
|
||||
if node.agent.provider_name:
|
||||
node_kwargs["provider_name"] = node.agent.provider_name
|
||||
return self._make_provider_bundle_for_task(loaded, node_kwargs)
|
||||
|
||||
return factory
|
||||
|
||||
def _make_provider_bundle_for_task(self, loaded: Any, kwargs: dict[str, Any]) -> Any:
|
||||
config = loaded.config
|
||||
configured_provider = config.resolve_provider_target(
|
||||
|
||||
@ -40,6 +40,10 @@ class UserFileSizeError(UserFileError):
|
||||
"""Raised when a user file upload exceeds configured limits."""
|
||||
|
||||
|
||||
class UserFileStorageError(UserFileError):
|
||||
"""Raised when the backing user-file storage cannot complete an operation."""
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class AgentUserFilePolicy:
|
||||
task_id: str | None = None
|
||||
@ -387,26 +391,34 @@ class MinIOUserFileStorage:
|
||||
|
||||
async def list_dir(self, path: str) -> list[UserFileEntry]:
|
||||
prefix = self._object_prefix(path)
|
||||
objects = self.client.list_objects(self.config.bucket, prefix=prefix, recursive=False)
|
||||
try:
|
||||
objects = self.client.list_objects(self.config.bucket, prefix=prefix, recursive=False)
|
||||
except Exception as exc:
|
||||
raise _minio_storage_error("list directory", exc) from exc
|
||||
entries: list[UserFileEntry] = []
|
||||
for obj in objects:
|
||||
object_name = str(obj.object_name or "")
|
||||
user_path = self._user_path(object_name)
|
||||
if not user_path or user_path == path or user_path.endswith("/.keep"):
|
||||
continue
|
||||
trimmed = user_path.rstrip("/")
|
||||
name = PurePosixPath(trimmed).name
|
||||
is_dir = bool(getattr(obj, "is_dir", False)) or object_name.endswith("/")
|
||||
entries.append(
|
||||
UserFileEntry(
|
||||
name=name,
|
||||
path=trimmed,
|
||||
type="directory" if is_dir else "file",
|
||||
size=None if is_dir else getattr(obj, "size", None),
|
||||
content_type=None if is_dir else "application/octet-stream",
|
||||
modified=obj.last_modified.isoformat() if getattr(obj, "last_modified", None) else None,
|
||||
try:
|
||||
for obj in objects:
|
||||
object_name = str(obj.object_name or "")
|
||||
user_path = self._user_path(object_name)
|
||||
if not user_path or user_path == path or user_path.endswith("/.keep"):
|
||||
continue
|
||||
trimmed = user_path.rstrip("/")
|
||||
name = PurePosixPath(trimmed).name
|
||||
is_dir = bool(getattr(obj, "is_dir", False)) or object_name.endswith("/")
|
||||
entries.append(
|
||||
UserFileEntry(
|
||||
name=name,
|
||||
path=trimmed,
|
||||
type="directory" if is_dir else "file",
|
||||
size=None if is_dir else getattr(obj, "size", None),
|
||||
content_type=None if is_dir else "application/octet-stream",
|
||||
modified=obj.last_modified.isoformat() if getattr(obj, "last_modified", None) else None,
|
||||
)
|
||||
)
|
||||
)
|
||||
except UserFileError:
|
||||
raise
|
||||
except Exception as exc:
|
||||
raise _minio_storage_error("list directory", exc) from exc
|
||||
return sorted(entries, key=lambda item: (item.type != "directory", item.name.lower()))
|
||||
|
||||
async def read_file(self, path: str, *, max_bytes: int | None = None) -> UserFileContent:
|
||||
@ -421,7 +433,9 @@ class MinIOUserFileStorage:
|
||||
response.close()
|
||||
response.release_conn()
|
||||
except Exception as exc:
|
||||
raise UserFileNotFoundError("File not found") from exc
|
||||
if _minio_error_code(exc) in {"NoSuchKey", "NoSuchObject"}:
|
||||
raise UserFileNotFoundError("File not found") from exc
|
||||
raise _minio_storage_error("read file", exc) from exc
|
||||
return UserFileContent(
|
||||
name=PurePosixPath(path).name,
|
||||
path=path,
|
||||
@ -433,13 +447,16 @@ class MinIOUserFileStorage:
|
||||
|
||||
async def write_file(self, path: str, content: bytes, *, content_type: str) -> UserFileEntry:
|
||||
object_name = self._object_name(path)
|
||||
result = self.client.put_object(
|
||||
self.config.bucket,
|
||||
object_name,
|
||||
BytesIO(content),
|
||||
length=len(content),
|
||||
content_type=content_type,
|
||||
)
|
||||
try:
|
||||
self.client.put_object(
|
||||
self.config.bucket,
|
||||
object_name,
|
||||
BytesIO(content),
|
||||
length=len(content),
|
||||
content_type=content_type,
|
||||
)
|
||||
except Exception as exc:
|
||||
raise _minio_storage_error("write file", exc) from exc
|
||||
return UserFileEntry(
|
||||
name=PurePosixPath(path).name,
|
||||
path=path,
|
||||
@ -475,6 +492,8 @@ class MinIOUserFileStorage:
|
||||
except Exception:
|
||||
pass
|
||||
raise
|
||||
except Exception as exc:
|
||||
raise _minio_storage_error("write file", exc) from exc
|
||||
return UserFileEntry(
|
||||
name=PurePosixPath(path).name,
|
||||
path=path,
|
||||
@ -490,23 +509,30 @@ class MinIOUserFileStorage:
|
||||
try:
|
||||
self.client.remove_object(self.config.bucket, object_name)
|
||||
removed = True
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as exc:
|
||||
if _minio_error_code(exc) != "NoSuchKey":
|
||||
raise _minio_storage_error("delete path", exc) from exc
|
||||
prefix = f"{object_name.rstrip('/')}/"
|
||||
for obj in self.client.list_objects(self.config.bucket, prefix=prefix, recursive=True):
|
||||
self.client.remove_object(self.config.bucket, str(obj.object_name))
|
||||
removed = True
|
||||
try:
|
||||
for obj in self.client.list_objects(self.config.bucket, prefix=prefix, recursive=True):
|
||||
self.client.remove_object(self.config.bucket, str(obj.object_name))
|
||||
removed = True
|
||||
except Exception as exc:
|
||||
raise _minio_storage_error("delete path", exc) from exc
|
||||
return removed
|
||||
|
||||
async def mkdir(self, path: str) -> UserFileEntry:
|
||||
object_name = f"{self._object_name(path).rstrip('/')}/.keep"
|
||||
self.client.put_object(
|
||||
self.config.bucket,
|
||||
object_name,
|
||||
BytesIO(b""),
|
||||
length=0,
|
||||
content_type="application/x-directory",
|
||||
)
|
||||
try:
|
||||
self.client.put_object(
|
||||
self.config.bucket,
|
||||
object_name,
|
||||
BytesIO(b""),
|
||||
length=0,
|
||||
content_type="application/x-directory",
|
||||
)
|
||||
except Exception as exc:
|
||||
raise _minio_storage_error("create directory", exc) from exc
|
||||
return UserFileEntry(
|
||||
name=PurePosixPath(path).name,
|
||||
path=path,
|
||||
@ -600,6 +626,18 @@ def _safe_scope(value: str | None) -> str:
|
||||
return cleaned or "interactive"
|
||||
|
||||
|
||||
def _minio_error_code(exc: Exception) -> str:
|
||||
return str(getattr(exc, "code", "") or "")
|
||||
|
||||
|
||||
def _minio_storage_error(operation: str, exc: Exception) -> UserFileStorageError:
|
||||
code = _minio_error_code(exc)
|
||||
message = f"User file storage {operation} failed"
|
||||
if code:
|
||||
message = f"{message}: {code}"
|
||||
return UserFileStorageError(message)
|
||||
|
||||
|
||||
class _LimitedReadStream:
|
||||
def __init__(self, stream: object, *, max_bytes: int | None = None) -> None:
|
||||
self.stream = stream
|
||||
|
||||
@ -83,6 +83,12 @@ class SkillAssembler:
|
||||
return SkillAssemblyResult()
|
||||
llm_interactions: list[dict[str, Any]] = []
|
||||
|
||||
if len(candidates) == 1:
|
||||
return SkillAssemblyResult(
|
||||
activated_skills=self._activate_skill_contexts([candidates[0]["name"]]),
|
||||
llm_interactions=llm_interactions,
|
||||
)
|
||||
|
||||
if len(candidates) <= self.max_detailed_candidates:
|
||||
shortlisted_names = [item["name"] for item in candidates]
|
||||
else:
|
||||
@ -115,6 +121,10 @@ class SkillAssembler:
|
||||
if not selected_names:
|
||||
return SkillAssemblyResult(llm_interactions=llm_interactions)
|
||||
|
||||
activated_skills = self._activate_skill_contexts(selected_names)
|
||||
return SkillAssemblyResult(activated_skills=activated_skills, llm_interactions=llm_interactions)
|
||||
|
||||
def _activate_skill_contexts(self, selected_names: list[str]) -> list[SkillContext]:
|
||||
activated_skills: list[SkillContext] = []
|
||||
for name in selected_names:
|
||||
record = self.loader.get_skill_record(name)
|
||||
@ -130,10 +140,11 @@ class SkillAssembler:
|
||||
content_hash=record.content_hash or "" if record is not None else "",
|
||||
activation_reason="llm_selected",
|
||||
tool_hints=list(record.tool_hints) if record is not None else [],
|
||||
team_template=getattr(record, "team_template", None) if record is not None else None,
|
||||
team_template_warnings=list(getattr(record, "team_template_warnings", [])) if record is not None else [],
|
||||
)
|
||||
)
|
||||
|
||||
return SkillAssemblyResult(activated_skills=activated_skills, llm_interactions=llm_interactions)
|
||||
return activated_skills
|
||||
|
||||
async def _select_skill_names(
|
||||
self,
|
||||
|
||||
@ -28,6 +28,7 @@ from .utils import (
|
||||
check_requirements,
|
||||
escape_xml,
|
||||
extract_required_tool_names,
|
||||
extract_skill_team_template,
|
||||
get_missing_requirements,
|
||||
parse_frontmatter,
|
||||
parse_skill_metadata_blob,
|
||||
@ -49,6 +50,8 @@ class SkillRecord:
|
||||
tool_hints: list[str] = field(default_factory=list)
|
||||
frontmatter: dict[str, Any] = field(default_factory=dict)
|
||||
description: str = ""
|
||||
team_template: dict[str, Any] | None = None
|
||||
team_template_warnings: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
class SkillsLoader:
|
||||
@ -113,6 +116,7 @@ class SkillsLoader:
|
||||
continue
|
||||
normalized_frontmatter = dict(frontmatter)
|
||||
meta_blob = parse_skill_metadata_blob(frontmatter.get("metadata", ""))
|
||||
template_result = extract_skill_team_template(body)
|
||||
record = SkillRecord(
|
||||
name=name,
|
||||
path=skill_file,
|
||||
@ -127,6 +131,8 @@ class SkillsLoader:
|
||||
),
|
||||
frontmatter=normalized_frontmatter,
|
||||
description=str(frontmatter.get("description") or summarize_body(body) or name),
|
||||
team_template=template_result.template,
|
||||
team_template_warnings=template_result.warnings,
|
||||
)
|
||||
if filter_unavailable and not self._record_available(record):
|
||||
continue
|
||||
@ -146,6 +152,7 @@ class SkillsLoader:
|
||||
else:
|
||||
path = self.workspace_skills / name / "versions" / loaded.version.version / "SKILL.md"
|
||||
_frontmatter, body = parse_frontmatter(loaded.content)
|
||||
template_result = extract_skill_team_template(body)
|
||||
record = SkillRecord(
|
||||
name=name,
|
||||
path=path,
|
||||
@ -160,6 +167,8 @@ class SkillsLoader:
|
||||
),
|
||||
frontmatter=dict(loaded.version.frontmatter),
|
||||
description=str(loaded.version.frontmatter.get("description") or loaded.version.summary or name),
|
||||
team_template=template_result.template,
|
||||
team_template_warnings=template_result.warnings,
|
||||
)
|
||||
if filter_unavailable and not self._record_available(record):
|
||||
continue
|
||||
|
||||
@ -17,6 +17,7 @@ import json
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
|
||||
@ -84,6 +85,27 @@ def strip_frontmatter(content: str) -> str:
|
||||
return body
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SkillTeamTemplateParseResult:
|
||||
template: dict[str, Any] | None = None
|
||||
warnings: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
def extract_skill_team_template(body: str) -> SkillTeamTemplateParseResult:
|
||||
matches = re.findall(r"```beaver-team-template\s*\n(.*?)\n```", body, re.DOTALL)
|
||||
if not matches:
|
||||
return SkillTeamTemplateParseResult()
|
||||
if len(matches) != 1:
|
||||
return SkillTeamTemplateParseResult(warnings=["skill defines multiple team templates"])
|
||||
try:
|
||||
template = json.loads(matches[0])
|
||||
except json.JSONDecodeError:
|
||||
return SkillTeamTemplateParseResult(warnings=["team template JSON is invalid"])
|
||||
if not isinstance(template, dict) or not isinstance(template.get("nodes", []), list):
|
||||
return SkillTeamTemplateParseResult(warnings=["team template must be an object with a nodes list"])
|
||||
return SkillTeamTemplateParseResult(template=template)
|
||||
|
||||
|
||||
def extract_required_tool_names(body: str) -> list[str]:
|
||||
"""从 canonical skill 正文的 `## Required Tools` 段落提取工具名。
|
||||
|
||||
|
||||
@ -284,6 +284,9 @@ def _build_replay_case_reports(
|
||||
"side_effects": [*baseline.get("side_effects", []), *candidate_arm.get("side_effects", [])],
|
||||
"validator_notes": list(surrogate.get("notes") or []),
|
||||
}
|
||||
historical_accepted_score = _historical_accepted_score(case)
|
||||
if historical_accepted_score is not None:
|
||||
case_report["historical_accepted_score"] = historical_accepted_score
|
||||
return case_report, {
|
||||
"run_id": case["run_id"],
|
||||
"session_id": case.get("session_id") or "",
|
||||
@ -293,6 +296,7 @@ def _build_replay_case_reports(
|
||||
"baseline_score": baseline_score,
|
||||
"candidate_score": candidate_score,
|
||||
"delta": round(candidate_score - baseline_score, 4),
|
||||
**({"historical_accepted_score": historical_accepted_score} if historical_accepted_score is not None else {}),
|
||||
}
|
||||
|
||||
|
||||
@ -658,8 +662,11 @@ def _ability_score(*, case: dict[str, Any], arm: dict[str, Any], arm_name: str)
|
||||
if validator is not None:
|
||||
return _ability_from_validator(validator, arm)
|
||||
if not case.get("synthetic"):
|
||||
score = _bounded_score(case.get("accepted_score"), default=0.75) if arm_name == "baseline" else _ability_from_output(arm)["final_score"]
|
||||
return _ability_breakdown(score=score, source="user_feedback" if arm_name == "baseline" else "llm_judge")
|
||||
result = _ability_from_output(arm, source="output_heuristic")
|
||||
historical_accepted_score = _historical_accepted_score(case)
|
||||
if historical_accepted_score is not None:
|
||||
result["historical_accepted_score"] = historical_accepted_score
|
||||
return result
|
||||
return _ability_breakdown(score=0.0, source="unscored", notes=["Synthetic cases require a validator."])
|
||||
|
||||
|
||||
@ -697,6 +704,12 @@ def _ability_from_output(arm: dict[str, Any], *, source: str = "llm_judge", note
|
||||
return _ability_breakdown(score=score, source=source, notes=notes)
|
||||
|
||||
|
||||
def _historical_accepted_score(case: dict[str, Any]) -> float | None:
|
||||
if case.get("synthetic") or isinstance(case.get("validator"), dict) or "accepted_score" not in case:
|
||||
return None
|
||||
return _bounded_score(case.get("accepted_score"), default=0.75)
|
||||
|
||||
|
||||
def _ability_breakdown(*, score: float, source: str, notes: list[str] | None = None) -> dict[str, Any]:
|
||||
bounded = _bounded_score(score, default=0.0)
|
||||
return {
|
||||
|
||||
695
app-instance/backend/beaver/tasks/attempt_orchestrator.py
Normal file
695
app-instance/backend/beaver/tasks/attempt_orchestrator.py
Normal file
@ -0,0 +1,695 @@
|
||||
"""Task attempt orchestration for Beaver Task mode."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from time import perf_counter
|
||||
from typing import Any, Callable
|
||||
|
||||
from beaver.coordinator.models import ExecutionNode, TeamRunResult
|
||||
from beaver.engine import AgentRunResult
|
||||
from beaver.engine.context import SkillContext
|
||||
from beaver.prompts.main_agent import normalize_main_agent_prompt_locale
|
||||
|
||||
from .evidence import EvidenceBuilder, RunEvidence, TaskEvidencePacket, render_task_evidence
|
||||
from .models import TaskRecord
|
||||
from .planner import TaskExecutionPlan
|
||||
|
||||
|
||||
class TaskAttemptOrchestrator:
|
||||
"""Own the execution order inside one Task attempt."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
loaded: Any,
|
||||
create_loop: Callable[[], Any],
|
||||
make_provider_bundle_for_task: Callable[[Any, dict[str, Any]], Any],
|
||||
) -> None:
|
||||
self.loaded = loaded
|
||||
self.create_loop = create_loop
|
||||
self.make_provider_bundle_for_task = make_provider_bundle_for_task
|
||||
|
||||
async def run(
|
||||
self,
|
||||
*,
|
||||
message: str,
|
||||
runner: Any,
|
||||
kwargs: dict[str, Any],
|
||||
task: TaskRecord,
|
||||
) -> AgentRunResult:
|
||||
task_service = self._require_loaded(self.loaded, "task_service")
|
||||
task_execution_planner = self._require_loaded(self.loaded, "task_execution_planner")
|
||||
session_manager = self._require_loaded(self.loaded, "session_manager")
|
||||
|
||||
base_execution_context = kwargs.get("execution_context")
|
||||
prompt_locale = kwargs.get("prompt_locale") or task.metadata.get("prompt_locale")
|
||||
output_language_instruction = self._output_language_instruction(prompt_locale)
|
||||
provider_bundle = kwargs.get("provider_bundle") or self.make_provider_bundle_for_task(self.loaded, kwargs)
|
||||
kwargs = dict(kwargs)
|
||||
team_provider_bundle_factory = kwargs.pop("team_provider_bundle_factory", None)
|
||||
kwargs["provider_bundle"] = provider_bundle
|
||||
|
||||
attempt_index = int(task.metadata.get("latest_attempt_index") or 0) + 1
|
||||
task_service.start_run(task.task_id, user_message=message, attempt_index=attempt_index)
|
||||
pre_skill_context = self._build_skill_selection_context(
|
||||
task=task,
|
||||
user_message=message,
|
||||
attempt_index=attempt_index,
|
||||
)
|
||||
preselected_skills, pre_skill_latency_ms = await self._assemble_task_attempt_skills(
|
||||
task_description=pre_skill_context,
|
||||
provider_bundle=provider_bundle,
|
||||
thinking_enabled=kwargs.get("thinking_enabled"),
|
||||
include_skill_assembly=bool(kwargs.get("include_skill_assembly", True)),
|
||||
pinned_skill_contexts=kwargs.get("pinned_skill_contexts"),
|
||||
)
|
||||
if pre_skill_latency_ms:
|
||||
kwargs["pre_run_latency_ms"] = self._merge_latency_ms(
|
||||
kwargs.get("pre_run_latency_ms"),
|
||||
{"pre_skill_assembly_ms": pre_skill_latency_ms},
|
||||
)
|
||||
plan = await task_execution_planner.plan(
|
||||
task=task,
|
||||
user_message=message,
|
||||
attempt_index=attempt_index,
|
||||
provider_bundle=provider_bundle,
|
||||
skill_summaries=self._skill_summaries_for_planner(preselected_skills),
|
||||
tool_hints=self._tool_hints_for_skills(preselected_skills),
|
||||
activated_skills=preselected_skills,
|
||||
)
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_execution_planned",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
**plan.to_event_payload(),
|
||||
},
|
||||
)
|
||||
team_summaries: list[str] = []
|
||||
team_execution_context = ""
|
||||
team_result: TeamRunResult | None = None
|
||||
if plan.is_team:
|
||||
team_result, team_error = await self._run_team_for_task(
|
||||
plan,
|
||||
task=task,
|
||||
parent_session_id=kwargs["session_id"],
|
||||
provider_bundle_factory=team_provider_bundle_factory
|
||||
or self._build_team_provider_bundle_factory(kwargs),
|
||||
)
|
||||
if team_result is not None:
|
||||
team_summaries = [self._team_summary_for_validation(team_result)]
|
||||
team_packet = TaskEvidencePacket(
|
||||
task_id=task.task_id,
|
||||
attempt_index=attempt_index,
|
||||
main_run=None,
|
||||
team_runs=self._team_run_evidence(team_result),
|
||||
team_node_results=list(team_result.node_results),
|
||||
final_output="",
|
||||
)
|
||||
team_execution_context = self._join_context(
|
||||
self._team_execution_context(plan, team_result),
|
||||
"Rendered team evidence:\n" + render_task_evidence(team_packet),
|
||||
)
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_team_run_completed" if team_result.success else "task_team_run_failed",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"plan_mode": plan.mode,
|
||||
"strategy": plan.graph.strategy if plan.graph else None,
|
||||
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
|
||||
"team_run_ids": team_result.run_ids,
|
||||
"team_success": team_result.success,
|
||||
"node_results": self._team_node_results_for_event(plan, team_result),
|
||||
"reason": plan.reason,
|
||||
"error": None if team_result.success else "one or more team nodes failed",
|
||||
},
|
||||
)
|
||||
else:
|
||||
team_summaries = [f"Team execution failed: {team_error}"]
|
||||
team_execution_context = self._failed_team_execution_context(plan, team_error or "unknown error")
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_team_run_failed",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"plan_mode": plan.mode,
|
||||
"strategy": plan.graph.strategy if plan.graph else None,
|
||||
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
|
||||
"team_run_ids": [],
|
||||
"team_success": False,
|
||||
"reason": plan.reason,
|
||||
"error": team_error,
|
||||
},
|
||||
)
|
||||
|
||||
outcome_context, incomplete_prefix, outcome_metadata = self._team_synthesis_outcome(
|
||||
plan,
|
||||
team_result,
|
||||
prompt_locale=prompt_locale,
|
||||
)
|
||||
if plan.is_team:
|
||||
team_execution_context = self._join_context(outcome_context, team_execution_context)
|
||||
|
||||
attempt_kwargs = dict(kwargs)
|
||||
attempt_kwargs.update(
|
||||
{
|
||||
"task_id": task.task_id,
|
||||
"task_mode": True,
|
||||
"attempt_index": attempt_index,
|
||||
"allow_candidate_generation": False,
|
||||
"pinned_skill_contexts": preselected_skills,
|
||||
"include_skill_assembly": False,
|
||||
}
|
||||
)
|
||||
attempt_kwargs["execution_context"] = self._join_context(
|
||||
base_execution_context,
|
||||
output_language_instruction,
|
||||
team_execution_context,
|
||||
)
|
||||
if plan.is_team and team_execution_context:
|
||||
attempt_kwargs["include_tools"] = False
|
||||
attempt_kwargs["max_tool_iterations"] = 0
|
||||
attempt_kwargs["skill_selection_context"] = self._build_skill_selection_context(
|
||||
task=task,
|
||||
user_message=message,
|
||||
attempt_index=attempt_index,
|
||||
plan=plan,
|
||||
team_summaries=team_summaries,
|
||||
)
|
||||
|
||||
result = await runner(message, **attempt_kwargs)
|
||||
if outcome_metadata["task_outcome"] == "incomplete":
|
||||
result.output_text = self._apply_incomplete_prefix(result.output_text, incomplete_prefix)
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_synthesis_completed",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"main_run_id": result.run_id,
|
||||
"plan_mode": plan.mode,
|
||||
"strategy": plan.graph.strategy if plan.graph else None,
|
||||
**outcome_metadata,
|
||||
},
|
||||
)
|
||||
task = task_service.append_run(
|
||||
task.task_id,
|
||||
result.run_id,
|
||||
skill_names=self._skill_names_for_run(result.run_id),
|
||||
)
|
||||
evidence_packet = self._build_task_evidence_packet(
|
||||
session_manager=session_manager,
|
||||
task=task,
|
||||
attempt_index=attempt_index,
|
||||
result=result,
|
||||
team_result=team_result,
|
||||
)
|
||||
evidence_text = render_task_evidence(evidence_packet)
|
||||
evidence_debug = {
|
||||
"evidence_run_ids": [
|
||||
item.run_id for item in [evidence_packet.main_run, *evidence_packet.team_runs] if item is not None
|
||||
],
|
||||
"evidence_session_ids": [
|
||||
item.session_id
|
||||
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
|
||||
if item is not None
|
||||
],
|
||||
"tool_result_count": sum(
|
||||
len(item.tool_results)
|
||||
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
|
||||
if item is not None
|
||||
),
|
||||
"evidence_length": len(evidence_text),
|
||||
}
|
||||
session_manager.update_latest_assistant_event_payload(
|
||||
result.session_id,
|
||||
result.run_id,
|
||||
{
|
||||
"task_id": task.task_id,
|
||||
"task_status": task.status,
|
||||
"evidence_status": "recorded",
|
||||
},
|
||||
)
|
||||
session_manager.append_message(
|
||||
result.session_id,
|
||||
run_id=result.run_id,
|
||||
role="system",
|
||||
event_type="task_evidence_recorded",
|
||||
event_payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"evidence_debug": evidence_debug,
|
||||
},
|
||||
content=None,
|
||||
context_visible=False,
|
||||
)
|
||||
result.task_id = task.task_id
|
||||
result.task_status = task.status
|
||||
result.validation_result = None
|
||||
return result
|
||||
|
||||
async def _run_team_for_task(
|
||||
self,
|
||||
plan: TaskExecutionPlan,
|
||||
*,
|
||||
task: TaskRecord,
|
||||
parent_session_id: str,
|
||||
provider_bundle_factory: Any,
|
||||
) -> tuple[TeamRunResult | None, str | None]:
|
||||
if plan.graph is None:
|
||||
return None, "team plan did not include an execution graph"
|
||||
try:
|
||||
from beaver.services.team_service import TeamService
|
||||
|
||||
result = await TeamService(self.create_loop()).run_team(
|
||||
plan.graph,
|
||||
parent_task_id=task.task_id,
|
||||
parent_session_id=parent_session_id,
|
||||
parent_run_id=None,
|
||||
provider_bundle_factory=provider_bundle_factory,
|
||||
allow_candidate_generation=False,
|
||||
)
|
||||
return result, None
|
||||
except Exception as exc:
|
||||
return None, str(exc)
|
||||
|
||||
async def _assemble_task_attempt_skills(
|
||||
self,
|
||||
*,
|
||||
task_description: str,
|
||||
provider_bundle: Any,
|
||||
thinking_enabled: bool | None,
|
||||
include_skill_assembly: bool,
|
||||
pinned_skill_contexts: Any,
|
||||
) -> tuple[list[SkillContext], float]:
|
||||
started = perf_counter()
|
||||
selected = self._coerce_skill_contexts(pinned_skill_contexts)
|
||||
if include_skill_assembly:
|
||||
skill_assembler = self._require_loaded(self.loaded, "skill_assembler")
|
||||
runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
|
||||
assembled = await skill_assembler.assemble(
|
||||
task_description=task_description,
|
||||
provider=provider_bundle.auxiliary_provider or provider_bundle.main_provider,
|
||||
model=getattr(runtime, "model", None),
|
||||
embedding_runtime=getattr(provider_bundle, "embedding_runtime", None),
|
||||
thinking_enabled=thinking_enabled,
|
||||
)
|
||||
selected = self._merge_skill_contexts(
|
||||
selected,
|
||||
list(getattr(assembled, "activated_skills", []) or []),
|
||||
)
|
||||
return selected, (perf_counter() - started) * 1000
|
||||
|
||||
@staticmethod
|
||||
def _coerce_skill_contexts(value: Any) -> list[SkillContext]:
|
||||
if not isinstance(value, list):
|
||||
return []
|
||||
return [item for item in value if isinstance(item, SkillContext)]
|
||||
|
||||
@staticmethod
|
||||
def _merge_skill_contexts(left: list[SkillContext], right: list[SkillContext]) -> list[SkillContext]:
|
||||
merged: list[SkillContext] = []
|
||||
seen: set[str] = set()
|
||||
for skill in [*left, *right]:
|
||||
if skill.name in seen:
|
||||
continue
|
||||
seen.add(skill.name)
|
||||
merged.append(skill)
|
||||
return merged
|
||||
|
||||
@staticmethod
|
||||
def _skill_summaries_for_planner(skills: list[SkillContext]) -> list[str]:
|
||||
summaries: list[str] = []
|
||||
for skill in skills:
|
||||
content = " ".join((skill.content or "").split())
|
||||
if len(content) > 240:
|
||||
content = content[:237].rstrip() + "..."
|
||||
summaries.append(f"{skill.name}: {content}" if content else skill.name)
|
||||
return summaries
|
||||
|
||||
@staticmethod
|
||||
def _tool_hints_for_skills(skills: list[SkillContext]) -> list[str]:
|
||||
result: list[str] = []
|
||||
for skill in skills:
|
||||
for hint in skill.tool_hints:
|
||||
if hint and hint not in result:
|
||||
result.append(hint)
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _require_loaded(loaded: Any, field_name: str) -> Any:
|
||||
value = getattr(loaded, field_name)
|
||||
if value is None:
|
||||
raise RuntimeError(f"Engine loader did not provide required dependency {field_name!r}")
|
||||
return value
|
||||
|
||||
@staticmethod
|
||||
def _merge_latency_ms(current: Any, updates: dict[str, float]) -> dict[str, float]:
|
||||
merged: dict[str, float] = {}
|
||||
if isinstance(current, dict):
|
||||
for key, value in current.items():
|
||||
if isinstance(value, (int, float)):
|
||||
merged[str(key)] = float(value)
|
||||
for key, value in updates.items():
|
||||
merged[key] = merged.get(key, 0.0) + float(value)
|
||||
return merged
|
||||
|
||||
@staticmethod
|
||||
def _output_language_instruction(prompt_locale: str | None) -> str:
|
||||
locale = normalize_main_agent_prompt_locale(prompt_locale)
|
||||
if locale == "en":
|
||||
return (
|
||||
"Output language: English. Use English for user-facing task titles, summaries, plans, "
|
||||
"and final answers unless the user explicitly requests another language."
|
||||
)
|
||||
if locale == "zh-Hant":
|
||||
return (
|
||||
"輸出語言:繁體中文。除非使用者明確要求其他語言,所有面向使用者的任務標題、摘要、"
|
||||
"計劃與最終回答都使用繁體中文。"
|
||||
)
|
||||
return (
|
||||
"输出语言:简体中文。除非用户明确要求其他语言,所有面向用户的任务标题、摘要、"
|
||||
"计划与最终回答都使用简体中文。"
|
||||
)
|
||||
|
||||
def _skill_names_for_run(self, run_id: str) -> list[str]:
|
||||
store = getattr(self.loaded, "run_memory_store", None)
|
||||
if store is None:
|
||||
return []
|
||||
for record in store.list_runs():
|
||||
if record.run_id == run_id:
|
||||
return [receipt.skill_name for receipt in record.activated_skills]
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def _build_skill_selection_context(
|
||||
*,
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
attempt_index: int,
|
||||
plan: TaskExecutionPlan | None = None,
|
||||
team_summaries: list[str] | None = None,
|
||||
) -> str:
|
||||
phase = f"attempt_{attempt_index}"
|
||||
if task.feedback and task.feedback[-1].get("acceptance_type") == "revise":
|
||||
phase = f"revision_attempt_{attempt_index}"
|
||||
elif plan is not None and plan.is_team:
|
||||
phase = f"team_synthesis_attempt_{attempt_index}"
|
||||
|
||||
sections = [
|
||||
f"Task goal:\n{task.goal or task.description}",
|
||||
f"Task description:\n{task.description}",
|
||||
f"Current user request:\n{user_message}",
|
||||
f"Execution phase:\n{phase}",
|
||||
f"Task status:\n{task.status}",
|
||||
]
|
||||
if task.constraints:
|
||||
sections.append("Known constraints:\n" + "\n".join(f"- {item}" for item in task.constraints))
|
||||
if task.skill_names:
|
||||
sections.append(
|
||||
"Previously activated skills (reuse bias, not pinned):\n"
|
||||
+ "\n".join(f"- {item}" for item in task.skill_names)
|
||||
)
|
||||
else:
|
||||
sections.append("Previously activated skills:\nNone")
|
||||
if task.feedback:
|
||||
history_lines = []
|
||||
for item in task.feedback[-5:]:
|
||||
kind = item.get("acceptance_type") or item.get("feedback_type")
|
||||
comment = item.get("comment") or ""
|
||||
run_id = item.get("run_id") or ""
|
||||
history_lines.append(f"- {kind} run={run_id}: {comment}".strip())
|
||||
sections.append("Task acceptance history:\n" + "\n".join(history_lines))
|
||||
if plan is not None:
|
||||
plan_lines = [
|
||||
f"mode: {plan.mode}",
|
||||
f"reason: {plan.reason}",
|
||||
]
|
||||
if plan.final_synthesis_instruction:
|
||||
plan_lines.append(f"final synthesis instruction: {plan.final_synthesis_instruction}")
|
||||
if plan.graph is not None:
|
||||
plan_lines.append(f"strategy: {plan.graph.strategy}")
|
||||
plan_lines.append(
|
||||
"nodes:\n"
|
||||
+ "\n".join(
|
||||
f"- {node.node_id}: {node.task}"
|
||||
for node in plan.graph.nodes
|
||||
)
|
||||
)
|
||||
sections.append("Execution plan:\n" + "\n".join(plan_lines))
|
||||
if team_summaries:
|
||||
sections.append("Team execution summaries:\n" + "\n\n".join(team_summaries)[:2400])
|
||||
sections.append(
|
||||
"Skill selection instruction:\n"
|
||||
"Prefer reusing previously activated skills when they still match the Task. "
|
||||
"Select new skills only if the current request, revision, or execution plan needs a different capability. "
|
||||
"If no published skill matches, return [] and let the run continue without skills."
|
||||
)
|
||||
return "\n\n".join(section for section in sections if section.strip())
|
||||
|
||||
@staticmethod
|
||||
def _append_task_observation(
|
||||
session_manager: Any,
|
||||
session_id: str,
|
||||
*,
|
||||
event_type: str,
|
||||
payload: dict[str, Any],
|
||||
) -> None:
|
||||
session_manager.append_message(
|
||||
session_id,
|
||||
role="system",
|
||||
event_type=event_type,
|
||||
event_payload=payload,
|
||||
content=payload.get("reason") or payload.get("error"),
|
||||
context_visible=False,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _join_context(*parts: str | None) -> str:
|
||||
return "\n\n".join(part.strip() for part in parts if part and part.strip())
|
||||
|
||||
@staticmethod
|
||||
def _team_summary_for_validation(result: TeamRunResult) -> str:
|
||||
lines = [
|
||||
f"success={result.success}",
|
||||
f"task_id={result.task_id or ''}",
|
||||
"summary:",
|
||||
result.summary,
|
||||
"nodes:",
|
||||
]
|
||||
for node in result.node_results:
|
||||
lines.append(
|
||||
f"- {node.node_id}: success={node.success} finish_reason={node.finish_reason} "
|
||||
f"error={node.error or ''} output={node.output_text[:500]}"
|
||||
)
|
||||
return "\n".join(lines)
|
||||
|
||||
@staticmethod
|
||||
def _team_node_results_for_event(plan: TaskExecutionPlan, result: TeamRunResult) -> list[dict[str, Any]]:
|
||||
nodes = {node.node_id: node for node in plan.graph.nodes} if plan.graph else {}
|
||||
payloads: list[dict[str, Any]] = []
|
||||
for item in result.node_results:
|
||||
payload = item.to_dict()
|
||||
node = nodes.get(item.node_id)
|
||||
if node is not None:
|
||||
payload["selected_skill_names"] = list(node.inherited_pinned_skills)
|
||||
payload["ephemeral_skill_names"] = [
|
||||
skill.name for skill in node.inherited_pinned_skill_contexts
|
||||
]
|
||||
payload["skill_query"] = node.agent.metadata.get("skill_query")
|
||||
payload["ephemeral_guidance_id"] = node.agent.metadata.get("ephemeral_guidance_id")
|
||||
payload["ephemeral_guidance_name"] = node.agent.metadata.get("ephemeral_guidance_name")
|
||||
payload["ephemeral_used"] = bool(node.inherited_pinned_skill_contexts)
|
||||
payloads.append(payload)
|
||||
return payloads
|
||||
|
||||
@staticmethod
|
||||
def _team_run_evidence(result: TeamRunResult | None) -> list[RunEvidence]:
|
||||
if result is None:
|
||||
return []
|
||||
return [node.evidence for node in result.node_results if node.evidence is not None]
|
||||
|
||||
@staticmethod
|
||||
def _team_synthesis_outcome(
|
||||
plan: TaskExecutionPlan,
|
||||
result: TeamRunResult | None,
|
||||
*,
|
||||
prompt_locale: str | None = None,
|
||||
) -> tuple[str, str, dict[str, Any]]:
|
||||
if not plan.is_team or plan.graph is None:
|
||||
metadata = {
|
||||
"task_outcome": "single",
|
||||
"incomplete_node_ids": [],
|
||||
"node_statuses": {},
|
||||
"evidence_gaps": {},
|
||||
}
|
||||
return "Task outcome: single", "", metadata
|
||||
|
||||
result_by_node = {
|
||||
item.node_id: item
|
||||
for item in (result.node_results if result is not None else [])
|
||||
}
|
||||
node_statuses: dict[str, str] = {}
|
||||
evidence_gaps: dict[str, list[str]] = {}
|
||||
incomplete_node_ids: list[str] = []
|
||||
detail_lines: list[str] = []
|
||||
successful_lines: list[str] = []
|
||||
for node in plan.graph.nodes:
|
||||
node_result = result_by_node.get(node.node_id)
|
||||
status = node_result.completion_status if node_result is not None else "not_run"
|
||||
node_statuses[node.node_id] = status
|
||||
gaps = list(node_result.evidence_gaps) if node_result is not None else []
|
||||
if gaps:
|
||||
evidence_gaps[node.node_id] = gaps
|
||||
if node.required_for_completion and status != "succeeded":
|
||||
incomplete_node_ids.append(node.node_id)
|
||||
detail_lines.append(
|
||||
f"- {node.node_id}: status={status}, "
|
||||
f"finish_reason={node_result.finish_reason if node_result is not None else 'not_run'}, "
|
||||
f"error={(node_result.error or '') if node_result is not None else 'node did not run'}, "
|
||||
f"evidence_gaps={gaps}"
|
||||
)
|
||||
elif node_result is not None and status == "succeeded":
|
||||
successful_lines.append(f"- {node.node_id}: {node_result.output_text[:1000]}")
|
||||
|
||||
task_outcome = "incomplete" if incomplete_node_ids else "complete"
|
||||
metadata = {
|
||||
"task_outcome": task_outcome,
|
||||
"incomplete_node_ids": incomplete_node_ids,
|
||||
"node_statuses": node_statuses,
|
||||
"evidence_gaps": evidence_gaps,
|
||||
}
|
||||
context_parts = [
|
||||
f"Task outcome: {task_outcome}",
|
||||
"Incomplete node IDs: " + (", ".join(incomplete_node_ids) or "none"),
|
||||
]
|
||||
if detail_lines:
|
||||
context_parts.append("Incomplete required node details:\n" + "\n".join(detail_lines))
|
||||
if successful_lines:
|
||||
context_parts.append("Available successful node evidence:\n" + "\n".join(successful_lines))
|
||||
if task_outcome == "incomplete":
|
||||
context_parts.append(
|
||||
"Synthesis requirement: produce a partial report from available evidence and explicitly state "
|
||||
"that the task is incomplete, partially completed, or missing required evidence."
|
||||
)
|
||||
prefix = TaskAttemptOrchestrator._incomplete_prefix(prompt_locale) if incomplete_node_ids else ""
|
||||
return "\n\n".join(context_parts), prefix, metadata
|
||||
|
||||
@staticmethod
|
||||
def _incomplete_prefix(prompt_locale: str | None) -> str:
|
||||
locale = normalize_main_agent_prompt_locale(prompt_locale)
|
||||
if locale == "en":
|
||||
return "Task incomplete: some required steps failed or lack required evidence. The report below uses available results only.\n\n"
|
||||
if locale == "zh-Hant":
|
||||
return "任務未完成:部分必要步驟失敗或缺少必要證據。以下內容僅基於現有結果。\n\n"
|
||||
return "任务未完成:部分必要步骤失败或缺少必要证据。以下内容仅基于现有结果。\n\n"
|
||||
|
||||
@staticmethod
|
||||
def _apply_incomplete_prefix(output_text: str, prefix: str) -> str:
|
||||
normalized = output_text.lower()
|
||||
notices = (
|
||||
"任务未完成",
|
||||
"任務未完成",
|
||||
"部分完成",
|
||||
"缺少证据",
|
||||
"缺少證據",
|
||||
"task incomplete",
|
||||
"incomplete task",
|
||||
"partially complete",
|
||||
"missing evidence",
|
||||
)
|
||||
if any(notice in normalized for notice in notices):
|
||||
return output_text
|
||||
return prefix + output_text.lstrip()
|
||||
|
||||
def _build_task_evidence_packet(
|
||||
self,
|
||||
*,
|
||||
session_manager: Any,
|
||||
task: TaskRecord,
|
||||
attempt_index: int,
|
||||
result: AgentRunResult,
|
||||
team_result: TeamRunResult | None,
|
||||
) -> TaskEvidencePacket:
|
||||
main_run = EvidenceBuilder(session_manager).build_run_evidence(
|
||||
result.session_id,
|
||||
result.run_id,
|
||||
result.output_text,
|
||||
result.finish_reason,
|
||||
)
|
||||
return TaskEvidencePacket(
|
||||
task_id=task.task_id,
|
||||
attempt_index=attempt_index,
|
||||
main_run=main_run,
|
||||
team_runs=self._team_run_evidence(team_result),
|
||||
team_node_results=list(team_result.node_results) if team_result is not None else [],
|
||||
final_output=result.output_text,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _team_execution_context(plan: TaskExecutionPlan, result: TeamRunResult) -> str:
|
||||
node_lines = [
|
||||
(
|
||||
f"- {node.node_id}: success={node.success}, finish_reason={node.finish_reason}, "
|
||||
f"run_id={node.run_id or ''}, error={node.error or ''}\n{node.output_text}"
|
||||
)
|
||||
for node in result.node_results
|
||||
]
|
||||
return "\n\n".join(
|
||||
item
|
||||
for item in [
|
||||
"Task team execution result:",
|
||||
f"Planner reason: {plan.reason}",
|
||||
f"Strategy: {plan.graph.strategy if plan.graph else ''}",
|
||||
f"Team success: {result.success}",
|
||||
f"Team summary:\n{result.summary}",
|
||||
"Node results:\n" + "\n\n".join(node_lines),
|
||||
(
|
||||
"Final synthesis instruction:\n" + plan.final_synthesis_instruction
|
||||
if plan.final_synthesis_instruction
|
||||
else None
|
||||
),
|
||||
(
|
||||
"Use successful team outputs as internal evidence. If one or more nodes failed, "
|
||||
"do not blindly repeat failed tool calls. Produce a user-visible fallback answer "
|
||||
"with available evidence and clearly state any missing or uncertain data."
|
||||
),
|
||||
]
|
||||
if item
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _failed_team_execution_context(plan: TaskExecutionPlan, error: str) -> str:
|
||||
return "\n\n".join(
|
||||
[
|
||||
"Task team execution failed before final synthesis.",
|
||||
f"Planner reason: {plan.reason}",
|
||||
f"Strategy: {plan.graph.strategy if plan.graph else ''}",
|
||||
f"Error: {error}",
|
||||
(
|
||||
"Proceed as the main agent. Do not blindly repeat failed tool calls; "
|
||||
"produce a user-visible fallback answer with available evidence and clearly "
|
||||
"state any missing or uncertain data."
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
def _build_team_provider_bundle_factory(self, kwargs: dict[str, Any]) -> Any:
|
||||
def factory(node: ExecutionNode) -> Any:
|
||||
node_kwargs = dict(kwargs)
|
||||
node_kwargs.pop("provider_bundle", None)
|
||||
if node.agent.model:
|
||||
node_kwargs["model"] = node.agent.model
|
||||
if node.agent.provider_name:
|
||||
node_kwargs["provider_name"] = node.agent.provider_name
|
||||
return self.make_provider_bundle_for_task(self.loaded, node_kwargs)
|
||||
|
||||
return factory
|
||||
@ -2,6 +2,8 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
@ -126,6 +128,37 @@ class EvidenceBuilder:
|
||||
)
|
||||
|
||||
|
||||
def evaluate_node_evidence(
|
||||
evidence: RunEvidence,
|
||||
required_evidence: list[str],
|
||||
output_text: str,
|
||||
) -> list[str]:
|
||||
"""Evaluate v1 coarse-grained node evidence requirements."""
|
||||
|
||||
gaps: list[str] = []
|
||||
successful_tools = [
|
||||
item
|
||||
for item in evidence.tool_results
|
||||
if item.event_payload.get("success") is True
|
||||
]
|
||||
for raw_requirement in required_evidence:
|
||||
requirement = str(raw_requirement).strip()
|
||||
if not requirement:
|
||||
continue
|
||||
if requirement == "tool_result":
|
||||
if not successful_tools:
|
||||
_append_unique(gaps, "missing required evidence: tool_result")
|
||||
elif requirement == "url":
|
||||
if not any(_tool_evidence_contains_url(item) for item in successful_tools):
|
||||
_append_unique(gaps, "missing required evidence: url")
|
||||
elif requirement == "output":
|
||||
if not output_text.strip():
|
||||
_append_unique(gaps, "missing required evidence: output")
|
||||
else:
|
||||
_append_unique(gaps, f"unsupported evidence requirement: {requirement}")
|
||||
return gaps
|
||||
|
||||
|
||||
def render_task_evidence(packet: TaskEvidencePacket) -> str:
|
||||
sections = [
|
||||
f"Task evidence packet: task_id={packet.task_id} attempt={packet.attempt_index}",
|
||||
@ -181,3 +214,20 @@ def _render_tool_evidence(item: ToolEvidence) -> str:
|
||||
|
||||
def _optional_str(value: Any) -> str | None:
|
||||
return str(value) if value is not None else None
|
||||
|
||||
|
||||
_URL_RE = re.compile(r"https?://[^\s<>'\"]+", re.IGNORECASE)
|
||||
|
||||
|
||||
def _tool_evidence_contains_url(item: ToolEvidence) -> bool:
|
||||
values = [
|
||||
item.url or "",
|
||||
item.content,
|
||||
json.dumps(item.event_payload, ensure_ascii=False, default=str),
|
||||
]
|
||||
return any(_URL_RE.search(value) is not None for value in values)
|
||||
|
||||
|
||||
def _append_unique(values: list[str], value: str) -> None:
|
||||
if value not in values:
|
||||
values.append(value)
|
||||
|
||||
@ -4,11 +4,14 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Literal
|
||||
|
||||
from beaver.coordinator.models import AgentDescriptor, ExecutionGraph, ExecutionNode
|
||||
from beaver.engine.context import SkillContext
|
||||
from beaver.engine.providers import ProviderBundle
|
||||
from beaver.tools.registry import ToolRegistry
|
||||
|
||||
from .models import TaskRecord
|
||||
from .skill_resolver import SkillResolutionReport, TaskSkillResolver
|
||||
@ -17,6 +20,24 @@ from .skill_resolver import SkillResolutionReport, TaskSkillResolver
|
||||
TaskExecutionMode = Literal["single", "team"]
|
||||
|
||||
|
||||
# Temporary name-based denylist until high-risk tool approval is implemented.
|
||||
# Keep this policy centralized so planner behavior cannot drift by call site.
|
||||
HIGH_RISK_PLANNER_TOOL_NAMES = frozenset(
|
||||
{
|
||||
"delete_file",
|
||||
"execute_command",
|
||||
"external_send",
|
||||
"send_email",
|
||||
"terminal",
|
||||
"write_file",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def _agent_team_enabled() -> bool:
|
||||
return os.getenv("BEAVER_AGENT_TEAM_ENABLED", "1").strip().lower() not in {"0", "false", "no", "off"}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class TaskExecutionPlan:
|
||||
mode: TaskExecutionMode
|
||||
@ -25,14 +46,26 @@ class TaskExecutionPlan:
|
||||
final_synthesis_instruction: str = ""
|
||||
fallback_error: str | None = None
|
||||
skill_resolution_report: list[SkillResolutionReport] = field(default_factory=list)
|
||||
planner_adaptation: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
@property
|
||||
def is_team(self) -> bool:
|
||||
return self.mode == "team" and self.graph is not None
|
||||
|
||||
@classmethod
|
||||
def single(cls, reason: str, *, fallback_error: str | None = None) -> "TaskExecutionPlan":
|
||||
return cls(mode="single", reason=reason, fallback_error=fallback_error)
|
||||
def single(
|
||||
cls,
|
||||
reason: str,
|
||||
*,
|
||||
fallback_error: str | None = None,
|
||||
planner_adaptation: dict[str, Any] | None = None,
|
||||
) -> "TaskExecutionPlan":
|
||||
return cls(
|
||||
mode="single",
|
||||
reason=reason,
|
||||
fallback_error=fallback_error,
|
||||
planner_adaptation=dict(planner_adaptation or {}),
|
||||
)
|
||||
|
||||
def to_event_payload(self) -> dict[str, Any]:
|
||||
strategy = self.graph.strategy if self.graph is not None else None
|
||||
@ -57,6 +90,7 @@ class TaskExecutionPlan:
|
||||
if item.ephemeral_guidance_id
|
||||
],
|
||||
"skill_resolution_report": [item.to_dict() for item in self.skill_resolution_report],
|
||||
"planner_adaptation": dict(self.planner_adaptation),
|
||||
"fallback_error": self.fallback_error,
|
||||
}
|
||||
|
||||
@ -65,10 +99,34 @@ class TaskExecutionPlanner:
|
||||
"""Plan whether a Task attempt should run through a team first."""
|
||||
|
||||
_MAX_NODES = 6
|
||||
_MAX_DEPTH = 4
|
||||
_SUPPORTED_STRATEGIES = {"sequence", "parallel", "dag"}
|
||||
_ALLOWED_NODE_FIELDS = {
|
||||
"node_id",
|
||||
"task",
|
||||
"use_skill",
|
||||
"skill_query",
|
||||
"depends_on",
|
||||
"input_contract",
|
||||
"output_contract",
|
||||
"requested_tools",
|
||||
"required_evidence",
|
||||
"evidence_contract",
|
||||
"validation_rules",
|
||||
"required_for_completion",
|
||||
"block_downstream_on_partial",
|
||||
"max_tool_iterations",
|
||||
"constraints",
|
||||
}
|
||||
|
||||
def __init__(self, *, task_skill_resolver: TaskSkillResolver | None = None) -> None:
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
task_skill_resolver: TaskSkillResolver | None = None,
|
||||
tool_registry: ToolRegistry | None = None,
|
||||
) -> None:
|
||||
self.task_skill_resolver = task_skill_resolver
|
||||
self.tool_registry = tool_registry
|
||||
|
||||
async def plan(
|
||||
self,
|
||||
@ -78,7 +136,15 @@ class TaskExecutionPlanner:
|
||||
attempt_index: int,
|
||||
provider_bundle: ProviderBundle | None = None,
|
||||
timeout_seconds: float = 30.0,
|
||||
skill_summaries: list[str] | None = None,
|
||||
tool_hints: list[str] | None = None,
|
||||
activated_skills: list[SkillContext] | None = None,
|
||||
) -> TaskExecutionPlan:
|
||||
if not _agent_team_enabled():
|
||||
return TaskExecutionPlan.single("planner_disabled_by_environment")
|
||||
if not self._needs_team_planning(task=task, user_message=user_message):
|
||||
return TaskExecutionPlan.single("planner_skipped_simple_task")
|
||||
|
||||
provider = None
|
||||
model = None
|
||||
if provider_bundle is not None:
|
||||
@ -87,6 +153,7 @@ class TaskExecutionPlanner:
|
||||
model = getattr(runtime, "model", None)
|
||||
if provider is None:
|
||||
return TaskExecutionPlan.single("planner_provider_unavailable")
|
||||
selected_template, base_adaptation = self._select_team_template(activated_skills or [])
|
||||
try:
|
||||
response = await asyncio.wait_for(
|
||||
provider.chat(
|
||||
@ -104,6 +171,10 @@ class TaskExecutionPlanner:
|
||||
task=task,
|
||||
user_message=user_message,
|
||||
attempt_index=attempt_index,
|
||||
skill_summaries=skill_summaries or [],
|
||||
tool_hints=tool_hints or [],
|
||||
activated_skills=activated_skills or [],
|
||||
selected_template=selected_template,
|
||||
),
|
||||
},
|
||||
],
|
||||
@ -114,7 +185,40 @@ class TaskExecutionPlanner:
|
||||
),
|
||||
timeout=timeout_seconds,
|
||||
)
|
||||
plan = self.from_json(response.content or "")
|
||||
try:
|
||||
plan = self._from_json_or_raise(response.content or "")
|
||||
except Exception as first_error:
|
||||
repair_response = await asyncio.wait_for(
|
||||
provider.chat(
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Repair invalid Beaver task planner JSON. Return only one compact JSON object.",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
"Repair the invalid planner JSON using the task-only schema from the original "
|
||||
f"request. Validation error: {first_error}\nInvalid output:\n{response.content or ''}"
|
||||
),
|
||||
},
|
||||
],
|
||||
tools=None,
|
||||
model=model,
|
||||
max_tokens=4096,
|
||||
temperature=0.0,
|
||||
),
|
||||
timeout=timeout_seconds,
|
||||
)
|
||||
try:
|
||||
plan = self._from_json_or_raise(repair_response.content or "")
|
||||
except Exception as repair_error:
|
||||
return TaskExecutionPlan.single(
|
||||
"planner_fallback_single",
|
||||
fallback_error=f"initial validation: {first_error}; repair validation: {repair_error}",
|
||||
planner_adaptation=base_adaptation,
|
||||
)
|
||||
self._merge_adaptation(plan, base_adaptation)
|
||||
return await self._resolve_plan(
|
||||
plan,
|
||||
task=task,
|
||||
@ -152,30 +256,90 @@ class TaskExecutionPlanner:
|
||||
graph.validate()
|
||||
plan.graph = graph
|
||||
plan.skill_resolution_report = reports
|
||||
self._merge_skill_resolution_adaptation(plan, reports)
|
||||
return plan
|
||||
except Exception as exc:
|
||||
return TaskExecutionPlan.single("planner_fallback_single", fallback_error=f"task_skill_resolver_failed: {exc}")
|
||||
|
||||
@staticmethod
|
||||
def _needs_team_planning(*, task: TaskRecord, user_message: str) -> bool:
|
||||
text = " ".join(
|
||||
part
|
||||
for part in (
|
||||
task.goal,
|
||||
task.description,
|
||||
user_message,
|
||||
)
|
||||
if part
|
||||
).lower()
|
||||
if not text.strip():
|
||||
return False
|
||||
|
||||
complex_markers = (
|
||||
"agent team",
|
||||
"sub-agent",
|
||||
"multi-agent",
|
||||
"parallel",
|
||||
"dag",
|
||||
"workflow",
|
||||
"review",
|
||||
"research",
|
||||
"compare",
|
||||
"comparison",
|
||||
"architecture",
|
||||
"refactor",
|
||||
"multi-file",
|
||||
"end-to-end",
|
||||
"并行",
|
||||
"团队",
|
||||
"多智能体",
|
||||
"子代理",
|
||||
"工作流",
|
||||
"评审",
|
||||
"审查",
|
||||
"调研",
|
||||
"研究",
|
||||
"对比",
|
||||
"架构",
|
||||
"重构",
|
||||
"多文件",
|
||||
"端到端",
|
||||
)
|
||||
return any(marker in text for marker in complex_markers)
|
||||
|
||||
def from_json(self, text: str) -> TaskExecutionPlan:
|
||||
try:
|
||||
payload = self._parse_json_object(text)
|
||||
mode = str(payload.get("mode") or "single").strip().lower()
|
||||
reason = str(payload.get("reason") or "")
|
||||
if mode != "team":
|
||||
return TaskExecutionPlan.single(reason or "planner_selected_single")
|
||||
|
||||
graph = self._graph_from_payload(payload)
|
||||
graph.validate()
|
||||
return TaskExecutionPlan(
|
||||
mode="team",
|
||||
reason=reason or "planner_selected_team",
|
||||
graph=graph,
|
||||
final_synthesis_instruction=str(payload.get("final_synthesis_instruction") or ""),
|
||||
)
|
||||
return self._from_json_or_raise(text)
|
||||
except Exception as exc:
|
||||
return TaskExecutionPlan.single("planner_fallback_single", fallback_error=str(exc))
|
||||
|
||||
def _graph_from_payload(self, payload: dict[str, Any]) -> ExecutionGraph:
|
||||
def _from_json_or_raise(self, text: str) -> TaskExecutionPlan:
|
||||
payload = self._parse_json_object(text)
|
||||
mode = str(payload.get("mode") or "single").strip().lower()
|
||||
reason = str(payload.get("reason") or "")
|
||||
adaptation = self._adaptation_from_payload(payload)
|
||||
if mode != "team":
|
||||
return TaskExecutionPlan.single(
|
||||
reason or "planner_selected_single",
|
||||
planner_adaptation=adaptation,
|
||||
)
|
||||
|
||||
graph = self._graph_from_payload(payload, adaptation=adaptation)
|
||||
graph.validate(max_depth=self._MAX_DEPTH)
|
||||
return TaskExecutionPlan(
|
||||
mode="team",
|
||||
reason=reason or "planner_selected_team",
|
||||
graph=graph,
|
||||
final_synthesis_instruction=str(payload.get("final_synthesis_instruction") or ""),
|
||||
planner_adaptation=adaptation,
|
||||
)
|
||||
|
||||
def _graph_from_payload(
|
||||
self,
|
||||
payload: dict[str, Any],
|
||||
*,
|
||||
adaptation: dict[str, Any],
|
||||
) -> ExecutionGraph:
|
||||
strategy = str(payload.get("strategy") or "sequence").strip().lower()
|
||||
if strategy not in self._SUPPORTED_STRATEGIES:
|
||||
raise ValueError(f"Unsupported team strategy: {strategy}")
|
||||
@ -189,16 +353,27 @@ class TaskExecutionPlanner:
|
||||
for index, item in enumerate(raw_nodes, start=1):
|
||||
if not isinstance(item, dict):
|
||||
raise ValueError("Each team node must be an object")
|
||||
agent_payload = item.get("agent") if isinstance(item.get("agent"), dict) else {}
|
||||
skill_query = str(item.get("skill_query") or agent_payload.get("skill_query") or item.get("task") or "").strip()
|
||||
requested_capabilities = _string_list(
|
||||
item.get("required_capabilities") or item.get("capabilities") or agent_payload.get("capabilities")
|
||||
)
|
||||
requested_tags = _string_list(item.get("tags") or agent_payload.get("tags"))
|
||||
node_id = str(item.get("node_id") or item.get("id") or agent_payload.get("name") or f"node_{index}").strip()
|
||||
unsupported = sorted(set(item) - self._ALLOWED_NODE_FIELDS)
|
||||
if unsupported:
|
||||
raise ValueError(f"Unsupported team node field(s): {', '.join(unsupported)}")
|
||||
node_id = str(item.get("node_id") or f"node_{index}").strip()
|
||||
task = str(item.get("task") or "").strip()
|
||||
if not node_id or not task:
|
||||
raise ValueError("Each team node requires node_id/id and task")
|
||||
raise ValueError("Each team node requires node_id and task")
|
||||
allowed_tool_names = self._resolve_requested_tools(
|
||||
item.get("requested_tools"),
|
||||
warnings=adaptation["warnings"],
|
||||
)
|
||||
use_skill = _optional_str(item.get("use_skill"))
|
||||
skill_query = _optional_str(item.get("skill_query")) or task
|
||||
if use_skill is not None or "skill_query" in item:
|
||||
adaptation.setdefault("node_skill_bindings", []).append(
|
||||
{
|
||||
"node_id": node_id,
|
||||
"use_skill": use_skill,
|
||||
"skill_query": skill_query,
|
||||
}
|
||||
)
|
||||
nodes.append(
|
||||
ExecutionNode(
|
||||
node_id=node_id,
|
||||
@ -208,30 +383,147 @@ class TaskExecutionPlanner:
|
||||
role="",
|
||||
system_prompt="",
|
||||
metadata={
|
||||
"use_skill": use_skill,
|
||||
"skill_query": skill_query,
|
||||
"required_capabilities": requested_capabilities,
|
||||
"requested_tags": requested_tags,
|
||||
"required_capabilities": [],
|
||||
"requested_tags": [],
|
||||
"sub_agent_kind": "generic_skill_worker",
|
||||
},
|
||||
),
|
||||
depends_on=[str(dep) for dep in item.get("depends_on") or []],
|
||||
inherited_pinned_skills=[str(name) for name in item.get("pinned_skills") or []],
|
||||
constraints=[str(value) for value in item.get("constraints") or []],
|
||||
expected_output=str(item.get("expected_output") or "") or None,
|
||||
input_contract=_dict_value(item.get("input_contract")),
|
||||
output_contract=_dict_value(item.get("output_contract")),
|
||||
allowed_tool_names=allowed_tool_names,
|
||||
required_evidence=_string_list(item.get("required_evidence")),
|
||||
evidence_contract=_dict_value(item.get("evidence_contract")),
|
||||
validation_rules=_string_list(item.get("validation_rules")),
|
||||
required_for_completion=bool(item.get("required_for_completion", True)),
|
||||
block_downstream_on_partial=bool(item.get("block_downstream_on_partial", False)),
|
||||
max_tool_iterations=_optional_int(item.get("max_tool_iterations")),
|
||||
)
|
||||
)
|
||||
return ExecutionGraph(strategy=strategy, nodes=nodes) # type: ignore[arg-type]
|
||||
|
||||
def _resolve_requested_tools(self, value: Any, *, warnings: list[str]) -> list[str] | None:
|
||||
if value is None:
|
||||
return None
|
||||
result: list[str] = []
|
||||
for name in _string_list(value):
|
||||
if name.lower() in HIGH_RISK_PLANNER_TOOL_NAMES:
|
||||
_append_unique(warnings, f"requires_high_risk_review: {name}")
|
||||
continue
|
||||
if self.tool_registry is None or self.tool_registry.get(name) is None:
|
||||
_append_unique(warnings, f"unknown tool removed: {name}")
|
||||
continue
|
||||
result.append(name)
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _adaptation_from_payload(payload: dict[str, Any]) -> dict[str, Any]:
|
||||
raw = payload.get("adaptation")
|
||||
adaptation = dict(raw) if isinstance(raw, dict) else {}
|
||||
adaptation["warnings"] = _string_list(adaptation.get("warnings"))
|
||||
return adaptation
|
||||
|
||||
@staticmethod
|
||||
def _select_team_template(
|
||||
activated_skills: list[SkillContext],
|
||||
) -> tuple[SkillContext | None, dict[str, Any]]:
|
||||
candidates = [
|
||||
skill
|
||||
for skill in activated_skills
|
||||
if isinstance(skill.team_template, dict) and isinstance(skill.team_template.get("nodes"), list)
|
||||
]
|
||||
selected = candidates[0] if candidates else None
|
||||
warnings: list[str] = []
|
||||
for skill in activated_skills:
|
||||
for warning in skill.team_template_warnings:
|
||||
_append_unique(warnings, f"{skill.name}: {warning}")
|
||||
return selected, {
|
||||
"template_used": False,
|
||||
"selected_template": selected.name if selected else None,
|
||||
"selection_reason": (
|
||||
"first activated skill with a valid team template"
|
||||
if selected
|
||||
else "no activated skill has a valid team template"
|
||||
),
|
||||
"ignored_templates": [skill.name for skill in candidates[1:]],
|
||||
"warnings": warnings,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _merge_adaptation(plan: TaskExecutionPlan, base: dict[str, Any]) -> None:
|
||||
payload = dict(plan.planner_adaptation)
|
||||
warnings: list[str] = []
|
||||
for warning in [*base.get("warnings", []), *payload.get("warnings", [])]:
|
||||
_append_unique(warnings, str(warning))
|
||||
merged = {
|
||||
"template_used": bool(payload.get("template_used", False)),
|
||||
"selected_template": base.get("selected_template"),
|
||||
"selection_reason": base.get("selection_reason"),
|
||||
"ignored_templates": list(base.get("ignored_templates", [])),
|
||||
"warnings": warnings,
|
||||
}
|
||||
if isinstance(payload.get("node_skill_bindings"), list):
|
||||
merged["node_skill_bindings"] = [dict(item) for item in payload["node_skill_bindings"] if isinstance(item, dict)]
|
||||
plan.planner_adaptation = merged
|
||||
|
||||
@staticmethod
|
||||
def _merge_skill_resolution_adaptation(
|
||||
plan: TaskExecutionPlan,
|
||||
reports: list[SkillResolutionReport],
|
||||
) -> None:
|
||||
warnings = plan.planner_adaptation.setdefault("warnings", [])
|
||||
bindings = plan.planner_adaptation.get("node_skill_bindings")
|
||||
binding_by_node = {
|
||||
str(item.get("node_id")): item
|
||||
for item in bindings or []
|
||||
if isinstance(item, dict)
|
||||
}
|
||||
for report in reports:
|
||||
for warning in report.warnings:
|
||||
_append_unique(warnings, warning)
|
||||
binding = binding_by_node.get(report.node_id)
|
||||
if binding is not None and report.requested_skill_name and not report.exact_binding_used:
|
||||
binding["fallback_reason"] = f"use_skill unresolved; {report.reason}"
|
||||
|
||||
@staticmethod
|
||||
def _prompt(
|
||||
*,
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
attempt_index: int,
|
||||
skill_summaries: list[str] | None = None,
|
||||
tool_hints: list[str] | None = None,
|
||||
activated_skills: list[SkillContext] | None = None,
|
||||
selected_template: SkillContext | None = None,
|
||||
) -> str:
|
||||
history_note = ""
|
||||
if task.feedback:
|
||||
history_note = "\nRelevant task history:\n" + json.dumps(task.feedback[-5:], ensure_ascii=False)
|
||||
skill_note = ""
|
||||
if skill_summaries:
|
||||
skill_note = "\nActivated skill summaries:\n" + "\n".join(f"- {item}" for item in skill_summaries)
|
||||
guidance_note = ""
|
||||
if activated_skills:
|
||||
guidance_note = "\nActivated Skill guidance:\n" + "\n".join(
|
||||
f"[{skill.name}]\n{skill.content}" for skill in activated_skills
|
||||
)
|
||||
template_note = ""
|
||||
if selected_template is not None:
|
||||
template_note = "\nPrimary Skill team template:\n" + json.dumps(
|
||||
{
|
||||
"skill_name": selected_template.name,
|
||||
"skill_version": selected_template.version,
|
||||
"template": selected_template.team_template,
|
||||
},
|
||||
ensure_ascii=False,
|
||||
indent=2,
|
||||
)
|
||||
tool_note = ""
|
||||
if tool_hints:
|
||||
tool_note = "\nActivated skill tool hints:\n" + "\n".join(f"- {item}" for item in tool_hints)
|
||||
return (
|
||||
"Decide execution mode for this internal Task attempt.\n"
|
||||
"Use mode=team only when independent research, review, implementation slices, or staged checks "
|
||||
@ -241,13 +533,24 @@ class TaskExecutionPlanner:
|
||||
' "mode": "single" | "team",\n'
|
||||
' "reason": "short reason",\n'
|
||||
' "strategy": "sequence" | "parallel" | "dag",\n'
|
||||
' "nodes": [{"node_id": "api_review", "task": "...", "skill_query": "API contract review", '
|
||||
'"required_capabilities": ["schema compatibility"], "depends_on": []}],\n'
|
||||
' "nodes": [{"node_id": "collect", "task": "...", "use_skill": "optional exact skill", '
|
||||
'"skill_query": "optional dynamic skill query", "depends_on": [], '
|
||||
'"input_contract": {}, "output_contract": {}, "requested_tools": [], '
|
||||
'"required_evidence": [], "evidence_contract": {}, "validation_rules": [], '
|
||||
'"required_for_completion": true, "block_downstream_on_partial": false, '
|
||||
'"max_tool_iterations": 3, "constraints": []}],\n'
|
||||
' "adaptation": {"template_used": true, "warnings": []},\n'
|
||||
' "final_synthesis_instruction": "how the main agent should synthesize team output"\n'
|
||||
"}\n\n"
|
||||
"Node definitions are task-only. Never output agent or role fields. Use at most one primary "
|
||||
"Skill template; treat all other activated Skills as guidance.\n\n"
|
||||
f"Task goal:\n{task.goal}\n\n"
|
||||
f"Current user request:\n{user_message}\n\n"
|
||||
f"Attempt index: {attempt_index}\n"
|
||||
f"{skill_note}"
|
||||
f"{guidance_note}"
|
||||
f"{template_note}"
|
||||
f"{tool_note}"
|
||||
f"{history_note}"
|
||||
)
|
||||
|
||||
@ -275,6 +578,26 @@ def _optional_str(value: Any) -> str | None:
|
||||
return text or None
|
||||
|
||||
|
||||
def _optional_int(value: Any) -> int | None:
|
||||
if value in (None, ""):
|
||||
return None
|
||||
if isinstance(value, bool):
|
||||
raise ValueError("max_tool_iterations must be an integer")
|
||||
result = int(value)
|
||||
if result < 0:
|
||||
raise ValueError("max_tool_iterations must be non-negative")
|
||||
return result
|
||||
|
||||
|
||||
def _dict_value(value: Any) -> dict[str, Any]:
|
||||
return dict(value) if isinstance(value, dict) else {}
|
||||
|
||||
|
||||
def _append_unique(values: list[str], value: str) -> None:
|
||||
if value and value not in values:
|
||||
values.append(value)
|
||||
|
||||
|
||||
def _string_list(value: Any) -> list[str]:
|
||||
if not isinstance(value, list):
|
||||
if isinstance(value, str):
|
||||
|
||||
@ -4,6 +4,7 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
from .models import MainAgentDecision, TaskRecord
|
||||
@ -24,6 +25,15 @@ class MainAgentRouter:
|
||||
thinking_enabled: bool | None = None,
|
||||
timeout_seconds: float = 8.0,
|
||||
) -> MainAgentDecision:
|
||||
if active_task is None and _is_obvious_simple_chat(message):
|
||||
return MainAgentDecision(mode="simple", reason="obvious_simple_chat", action="simple_chat")
|
||||
if active_task is None and _is_obvious_task_request(message):
|
||||
return MainAgentDecision(
|
||||
mode="task",
|
||||
reason="obvious_task",
|
||||
starts_new_task=True,
|
||||
action="create_task",
|
||||
)
|
||||
if provider is None:
|
||||
return self._apply_active_task_boundary(
|
||||
self._fallback(active_task=active_task, reason="router_provider_unavailable"),
|
||||
@ -246,6 +256,64 @@ def _clean_short_title(value: Any) -> str | None:
|
||||
return title[:40] or None
|
||||
|
||||
|
||||
def _is_obvious_simple_chat(message: str) -> bool:
|
||||
text = _compact_text(message).lower().strip("!!??。.,,~~")
|
||||
if not text:
|
||||
return False
|
||||
if _has_url_or_path(text) or _looks_like_fresh_task_request(text):
|
||||
return False
|
||||
if len(text) <= 24 and text in {
|
||||
"hi",
|
||||
"hello",
|
||||
"hey",
|
||||
"thanks",
|
||||
"thankyou",
|
||||
"thankyou!",
|
||||
"谢谢",
|
||||
"谢了",
|
||||
"多谢",
|
||||
"你好",
|
||||
"您好",
|
||||
"嗨",
|
||||
"在吗",
|
||||
"早上好",
|
||||
"下午好",
|
||||
"晚上好",
|
||||
"辛苦了",
|
||||
}:
|
||||
return True
|
||||
simple_prefixes = (
|
||||
"翻译",
|
||||
"translate",
|
||||
"润色",
|
||||
"改写",
|
||||
"校对",
|
||||
"总结下面",
|
||||
"总结这段",
|
||||
"摘要下面",
|
||||
"summarize this",
|
||||
)
|
||||
return len(text) <= 1200 and text.startswith(simple_prefixes)
|
||||
|
||||
|
||||
def _is_obvious_task_request(message: str) -> bool:
|
||||
text = _compact_text(message)
|
||||
if not text:
|
||||
return False
|
||||
if _looks_like_explicit_task_followup(text):
|
||||
return False
|
||||
if _has_url_or_path(text):
|
||||
return True
|
||||
return _looks_like_fresh_task_request(text)
|
||||
|
||||
|
||||
def _has_url_or_path(text: str) -> bool:
|
||||
return bool(
|
||||
re.search(r"https?://|www\.", text)
|
||||
or re.search(r"(^|[\s'\"`])(?:[./~]|[a-zA-Z]:[\\/])[^\s'\"`]+", text)
|
||||
)
|
||||
|
||||
|
||||
def _looks_like_explicit_task_followup(message: str) -> bool:
|
||||
text = _compact_text(message)
|
||||
if not text:
|
||||
@ -307,6 +375,16 @@ def _looks_like_fresh_task_request(message: str) -> bool:
|
||||
"看看最新",
|
||||
"最新",
|
||||
"今天",
|
||||
"昨天",
|
||||
"昨日",
|
||||
"昨晚",
|
||||
"刚刚",
|
||||
"最近",
|
||||
"近期",
|
||||
"本届",
|
||||
"本场",
|
||||
"这场",
|
||||
"上一场",
|
||||
"明天",
|
||||
"上传",
|
||||
"下载",
|
||||
@ -324,6 +402,12 @@ def _looks_like_fresh_task_request(message: str) -> bool:
|
||||
"look up",
|
||||
"latest",
|
||||
"today",
|
||||
"yesterday",
|
||||
"last night",
|
||||
"recent",
|
||||
"recently",
|
||||
"this match",
|
||||
"this game",
|
||||
"tomorrow",
|
||||
"upload",
|
||||
"download",
|
||||
|
||||
@ -7,9 +7,11 @@ from dataclasses import dataclass, field, replace
|
||||
from typing import Any
|
||||
|
||||
from beaver.coordinator.models import AgentDescriptor, ExecutionGraph, ExecutionNode
|
||||
from beaver.engine.context import SkillContext
|
||||
from beaver.engine.providers import ProviderBundle
|
||||
from beaver.skills.assembler.embedding_retriever import SkillEmbeddingRetriever
|
||||
from beaver.skills.catalog.loader import SkillsLoader
|
||||
from beaver.skills.catalog.utils import strip_frontmatter
|
||||
from beaver.skills.drafts import DraftService
|
||||
from beaver.skills.learning import EphemeralGuidanceSynthesizer
|
||||
from beaver.tasks.models import TaskRecord
|
||||
@ -24,6 +26,9 @@ class SkillResolutionReport:
|
||||
ephemeral_guidance_id: str | None = None
|
||||
ephemeral_guidance_name: str | None = None
|
||||
ephemeral_used: bool = False
|
||||
requested_skill_name: str | None = None
|
||||
exact_binding_used: bool = False
|
||||
warnings: list[str] = field(default_factory=list)
|
||||
reason: str = ""
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
@ -35,6 +40,9 @@ class SkillResolutionReport:
|
||||
"ephemeral_guidance_id": self.ephemeral_guidance_id,
|
||||
"ephemeral_guidance_name": self.ephemeral_guidance_name,
|
||||
"ephemeral_used": self.ephemeral_used,
|
||||
"requested_skill_name": self.requested_skill_name,
|
||||
"exact_binding_used": self.exact_binding_used,
|
||||
"warnings": list(self.warnings),
|
||||
"reason": self.reason,
|
||||
}
|
||||
|
||||
@ -87,12 +95,45 @@ class TaskSkillResolver:
|
||||
attempt_index: int,
|
||||
provider_bundle: ProviderBundle,
|
||||
) -> tuple[ExecutionNode, SkillResolutionReport]:
|
||||
use_skill = str(node.agent.metadata.get("use_skill") or "").strip()
|
||||
skill_query = str(node.agent.metadata.get("skill_query") or node.task or node.node_id).strip()
|
||||
warnings: list[str] = []
|
||||
required_capabilities = [
|
||||
str(item).strip()
|
||||
for item in node.agent.metadata.get("required_capabilities", [])
|
||||
if str(item).strip()
|
||||
]
|
||||
if use_skill:
|
||||
exact_context = self._load_exact_skill_context(use_skill)
|
||||
if exact_context is not None:
|
||||
resolved = self._generic_node(
|
||||
node,
|
||||
pinned_skill_names=_merge_names(node.inherited_pinned_skills, [use_skill]),
|
||||
pinned_skill_contexts=_merge_skill_contexts(
|
||||
node.inherited_pinned_skill_contexts,
|
||||
[exact_context],
|
||||
),
|
||||
metadata={
|
||||
**node.agent.metadata,
|
||||
"use_skill": use_skill,
|
||||
"skill_query": skill_query,
|
||||
"required_capabilities": required_capabilities,
|
||||
"selected_skill_names": [use_skill],
|
||||
"ephemeral_skill_names": [],
|
||||
"exact_binding_used": True,
|
||||
},
|
||||
)
|
||||
return resolved, SkillResolutionReport(
|
||||
node_id=node.node_id,
|
||||
skill_query=skill_query,
|
||||
required_capabilities=required_capabilities,
|
||||
selected_skill_names=[use_skill],
|
||||
requested_skill_name=use_skill,
|
||||
exact_binding_used=True,
|
||||
reason="exact use_skill binding",
|
||||
)
|
||||
warnings.append(f"use_skill unresolved: {use_skill}")
|
||||
|
||||
if self._is_summary_only_node(node, skill_query=skill_query, required_capabilities=required_capabilities):
|
||||
resolved = self._generic_node(
|
||||
node,
|
||||
@ -104,6 +145,7 @@ class TaskSkillResolver:
|
||||
"required_capabilities": required_capabilities,
|
||||
"selected_skill_names": [],
|
||||
"ephemeral_skill_names": [],
|
||||
"exact_binding_used": False,
|
||||
"summary_uses_dependency_outputs_only": True,
|
||||
},
|
||||
)
|
||||
@ -113,6 +155,9 @@ class TaskSkillResolver:
|
||||
required_capabilities=required_capabilities,
|
||||
selected_skill_names=[],
|
||||
ephemeral_used=False,
|
||||
requested_skill_name=use_skill or None,
|
||||
exact_binding_used=False,
|
||||
warnings=warnings,
|
||||
reason="summary node uses dependency outputs directly",
|
||||
)
|
||||
|
||||
@ -141,6 +186,7 @@ class TaskSkillResolver:
|
||||
"required_capabilities": required_capabilities,
|
||||
"selected_skill_names": selected,
|
||||
"ephemeral_skill_names": [],
|
||||
"exact_binding_used": False,
|
||||
},
|
||||
)
|
||||
return resolved, SkillResolutionReport(
|
||||
@ -149,6 +195,9 @@ class TaskSkillResolver:
|
||||
required_capabilities=required_capabilities,
|
||||
selected_skill_names=selected,
|
||||
ephemeral_used=False,
|
||||
requested_skill_name=use_skill or None,
|
||||
exact_binding_used=False,
|
||||
warnings=warnings,
|
||||
reason="matched published skill",
|
||||
)
|
||||
|
||||
@ -174,6 +223,7 @@ class TaskSkillResolver:
|
||||
"ephemeral_guidance_id": missing.guidance_id,
|
||||
"ephemeral_guidance_name": missing.guidance_name,
|
||||
"ephemeral_skill_names": [missing.skill_context.name],
|
||||
"exact_binding_used": False,
|
||||
},
|
||||
)
|
||||
return resolved, SkillResolutionReport(
|
||||
@ -183,9 +233,27 @@ class TaskSkillResolver:
|
||||
ephemeral_guidance_id=missing.guidance_id,
|
||||
ephemeral_guidance_name=missing.guidance_name,
|
||||
ephemeral_used=True,
|
||||
requested_skill_name=use_skill or None,
|
||||
exact_binding_used=False,
|
||||
warnings=warnings,
|
||||
reason="generated ephemeral guidance for missing sub-agent capability",
|
||||
)
|
||||
|
||||
def _load_exact_skill_context(self, name: str) -> SkillContext | None:
|
||||
record = self.skills_loader.get_skill_record(name)
|
||||
raw_content = self.skills_loader.load_published_skill(name)
|
||||
content = strip_frontmatter(raw_content).strip() if raw_content else ""
|
||||
if record is None or not content:
|
||||
return None
|
||||
return SkillContext(
|
||||
name=name,
|
||||
content=content,
|
||||
version=record.version,
|
||||
content_hash=record.content_hash or "",
|
||||
activation_reason="explicit_node_binding",
|
||||
tool_hints=list(record.tool_hints),
|
||||
)
|
||||
|
||||
async def _select_published_skills(self, *, query: str, provider_bundle: ProviderBundle) -> list[str]:
|
||||
candidates = self.skills_loader.build_selection_candidates()
|
||||
if not candidates:
|
||||
@ -336,3 +404,14 @@ def _merge_names(parent: list[str], selected: list[str]) -> list[str]:
|
||||
if name and name not in result:
|
||||
result.append(name)
|
||||
return result
|
||||
|
||||
|
||||
def _merge_skill_contexts(parent: list[SkillContext], selected: list[SkillContext]) -> list[SkillContext]:
|
||||
result: list[SkillContext] = []
|
||||
seen: set[str] = set()
|
||||
for context in [*parent, *selected]:
|
||||
if context.name in seen:
|
||||
continue
|
||||
seen.add(context.name)
|
||||
result.append(context)
|
||||
return result
|
||||
|
||||
@ -5,10 +5,11 @@ from __future__ import annotations
|
||||
import asyncio
|
||||
from dataclasses import dataclass, field
|
||||
from html import unescape
|
||||
from html.parser import HTMLParser
|
||||
import json
|
||||
import re
|
||||
from typing import Any
|
||||
from urllib.parse import quote_plus, urlparse
|
||||
from urllib.parse import quote_plus, urljoin, urlparse
|
||||
|
||||
import httpx
|
||||
|
||||
@ -24,6 +25,10 @@ def _strip_html(value: str) -> str:
|
||||
return re.sub(r"\s+", " ", text).strip()
|
||||
|
||||
|
||||
def _compact_text(value: str) -> str:
|
||||
return re.sub(r"\s+", " ", unescape(value)).strip()
|
||||
|
||||
|
||||
def _safe_url(url: str) -> str:
|
||||
parsed = urlparse(url)
|
||||
if parsed.scheme not in {"http", "https"} or not parsed.netloc:
|
||||
@ -31,6 +36,77 @@ def _safe_url(url: str) -> str:
|
||||
return url
|
||||
|
||||
|
||||
class _HtmlMetadataParser(HTMLParser):
|
||||
def __init__(self, base_url: str) -> None:
|
||||
super().__init__(convert_charrefs=True)
|
||||
self.base_url = base_url
|
||||
self.title = ""
|
||||
self.links: list[dict[str, str]] = []
|
||||
self._in_title = False
|
||||
self._current_href: str | None = None
|
||||
self._current_text: list[str] = []
|
||||
self._skip_depth = 0
|
||||
self._seen_urls: set[str] = set()
|
||||
|
||||
def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
|
||||
lowered = tag.lower()
|
||||
if lowered in {"script", "style"}:
|
||||
self._skip_depth += 1
|
||||
return
|
||||
if self._skip_depth:
|
||||
return
|
||||
if lowered == "title":
|
||||
self._in_title = True
|
||||
return
|
||||
if lowered == "a":
|
||||
href = dict(attrs).get("href")
|
||||
if href:
|
||||
self._current_href = urljoin(self.base_url, href)
|
||||
self._current_text = []
|
||||
|
||||
def handle_endtag(self, tag: str) -> None:
|
||||
lowered = tag.lower()
|
||||
if lowered in {"script", "style"} and self._skip_depth:
|
||||
self._skip_depth -= 1
|
||||
return
|
||||
if self._skip_depth:
|
||||
return
|
||||
if lowered == "title":
|
||||
self._in_title = False
|
||||
self.title = _compact_text(self.title)
|
||||
return
|
||||
if lowered == "a" and self._current_href:
|
||||
parsed = urlparse(self._current_href)
|
||||
if parsed.scheme in {"http", "https"} and self._current_href not in self._seen_urls:
|
||||
text = _compact_text(" ".join(self._current_text))
|
||||
self.links.append({"text": text, "url": self._current_href})
|
||||
self._seen_urls.add(self._current_href)
|
||||
self._current_href = None
|
||||
self._current_text = []
|
||||
|
||||
def handle_data(self, data: str) -> None:
|
||||
if self._skip_depth:
|
||||
return
|
||||
if self._in_title:
|
||||
self.title += data
|
||||
if self._current_href:
|
||||
self._current_text.append(data)
|
||||
|
||||
|
||||
def _extract_html_metadata(html: str, base_url: str, *, max_links: int = 80) -> dict[str, Any]:
|
||||
parser = _HtmlMetadataParser(base_url)
|
||||
parser.feed(html)
|
||||
links = parser.links[:max_links]
|
||||
pdf_links = [
|
||||
link for link in links if urlparse(link["url"]).path.lower().endswith(".pdf")
|
||||
][:30]
|
||||
return {
|
||||
"title": parser.title,
|
||||
"links": links,
|
||||
"pdf_links": pdf_links,
|
||||
}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class WebFetchTool:
|
||||
name: str = "web_fetch"
|
||||
@ -61,13 +137,20 @@ class WebFetchTool:
|
||||
response.raise_for_status()
|
||||
content_type = response.headers.get("content-type", "")
|
||||
raw = response.text
|
||||
text = _strip_html(raw) if "html" in content_type.lower() else raw
|
||||
is_html = "html" in content_type.lower()
|
||||
text = _strip_html(raw) if is_html else raw
|
||||
metadata = _extract_html_metadata(raw, str(response.url)) if is_html else {
|
||||
"title": "",
|
||||
"links": [],
|
||||
"pdf_links": [],
|
||||
}
|
||||
truncated = len(text) > limit
|
||||
return _json_result(
|
||||
True,
|
||||
url=str(response.url),
|
||||
status_code=response.status_code,
|
||||
content_type=content_type,
|
||||
**metadata,
|
||||
content=text[:limit],
|
||||
truncated=truncated,
|
||||
)
|
||||
@ -97,6 +180,15 @@ class WebSearchTool:
|
||||
if not str(query).strip():
|
||||
raise ValueError("query is required")
|
||||
bounded = max(1, min(int(limit or 5), 10))
|
||||
errors: list[str] = []
|
||||
try:
|
||||
ddgs_results = await asyncio.to_thread(_search_ddgs, query, bounded)
|
||||
except Exception as exc:
|
||||
ddgs_results = []
|
||||
errors.append(str(exc))
|
||||
if ddgs_results:
|
||||
return _json_result(True, **_search_result_payload(query, "ddgs", ddgs_results))
|
||||
|
||||
headers = {"User-Agent": "Mozilla/5.0 Beaver/1.0"}
|
||||
timeout = httpx.Timeout(connect=5, read=8, write=5, pool=5)
|
||||
async with httpx.AsyncClient(timeout=timeout, follow_redirects=True, trust_env=True) as client:
|
||||
@ -118,7 +210,6 @@ class WebSearchTool:
|
||||
)
|
||||
),
|
||||
]
|
||||
errors: list[str] = []
|
||||
try:
|
||||
for completed in asyncio.as_completed(tasks):
|
||||
try:
|
||||
@ -127,7 +218,7 @@ class WebSearchTool:
|
||||
errors.append(str(exc))
|
||||
continue
|
||||
if results:
|
||||
return _json_result(True, query=query, engine=engine, results=results)
|
||||
return _json_result(True, **_search_result_payload(query, engine, results))
|
||||
detail = "; ".join(error for error in errors if error) or "no search results"
|
||||
return _json_result(False, query=query, error=detail)
|
||||
finally:
|
||||
@ -182,6 +273,62 @@ def _parse_bing_results(html: str, limit: int) -> list[dict[str, str]]:
|
||||
return results
|
||||
|
||||
|
||||
def _search_ddgs(query: str, limit: int) -> list[dict[str, str]]:
|
||||
from ddgs import DDGS # type: ignore[import-not-found]
|
||||
|
||||
rows = DDGS().text(query, max_results=limit)
|
||||
results: list[dict[str, str]] = []
|
||||
for row in rows or []:
|
||||
title = _compact_text(str(row.get("title") or ""))
|
||||
result_url = str(row.get("href") or row.get("url") or "").strip()
|
||||
snippet = _compact_text(str(row.get("body") or row.get("snippet") or ""))
|
||||
if title and result_url:
|
||||
results.append({"title": title, "url": result_url, "snippet": snippet})
|
||||
if len(results) >= limit:
|
||||
break
|
||||
return results
|
||||
|
||||
|
||||
def _search_result_payload(query: str, engine: str, results: list[dict[str, str]]) -> dict[str, Any]:
|
||||
quality, reason = _assess_search_quality(query, results)
|
||||
payload: dict[str, Any] = {
|
||||
"query": query,
|
||||
"engine": engine,
|
||||
"quality": quality,
|
||||
"results": results,
|
||||
}
|
||||
if reason:
|
||||
payload["low_relevance_reason"] = reason
|
||||
return payload
|
||||
|
||||
|
||||
def _search_terms(value: str) -> set[str]:
|
||||
return {
|
||||
term
|
||||
for term in re.findall(r"[a-z0-9]+", value.lower())
|
||||
if len(term) > 2
|
||||
}
|
||||
|
||||
|
||||
def _assess_search_quality(query: str, results: list[dict[str, str]]) -> tuple[str, str | None]:
|
||||
terms = _search_terms(query)
|
||||
if not terms:
|
||||
return "high", None
|
||||
required_overlap = min(2, len(terms))
|
||||
for result in results:
|
||||
haystack = " ".join(
|
||||
[
|
||||
result.get("title", ""),
|
||||
result.get("snippet", ""),
|
||||
urlparse(result.get("url", "")).netloc,
|
||||
urlparse(result.get("url", "")).path,
|
||||
]
|
||||
)
|
||||
if len(terms & _search_terms(haystack)) >= required_overlap:
|
||||
return "high", None
|
||||
return "low", "results do not overlap enough with query terms"
|
||||
|
||||
|
||||
def _parse_duckduckgo_results(html: str, limit: int) -> list[dict[str, str]]:
|
||||
results: list[dict[str, str]] = []
|
||||
pattern = re.compile(
|
||||
|
||||
@ -37,6 +37,14 @@ class ToolExecutor:
|
||||
) -> ToolResult:
|
||||
"""按工具名执行一次调用。"""
|
||||
|
||||
allowed = context.metadata.get("allowed_tool_names") if context is not None else None
|
||||
if isinstance(allowed, list) and tool_name not in allowed:
|
||||
return ToolResult(
|
||||
success=False,
|
||||
content=f"Tool {tool_name} is not allowed for this node.",
|
||||
tool_name=tool_name,
|
||||
error="tool_not_allowed",
|
||||
)
|
||||
tool = self.registry.get(tool_name)
|
||||
if tool is None:
|
||||
return ToolResult(
|
||||
|
||||
Reference in New Issue
Block a user