feat(tasks): add skill-templated task graph execution

This commit is contained in:
2026-06-23 10:22:58 +08:00
parent 6843d89b2c
commit 53b13e8eac
53 changed files with 4773 additions and 756 deletions

View File

@ -48,6 +48,8 @@ class SkillContext:
content_hash: str = ""
activation_reason: str = "selected"
tool_hints: list[str] = field(default_factory=list)
team_template: dict[str, Any] | None = None
team_template_warnings: list[str] = field(default_factory=list)
@dataclass(slots=True)

View File

@ -317,7 +317,10 @@ class EngineLoader:
draft_service=draft_service,
)
task_service = self._task_service or TaskService(workspace / "tasks")
task_execution_planner = self._task_execution_planner or TaskExecutionPlanner(task_skill_resolver=task_skill_resolver)
task_execution_planner = self._task_execution_planner or TaskExecutionPlanner(
task_skill_resolver=task_skill_resolver,
tool_registry=tool_registry,
)
mcp_manager = MCPConnectionManager(
self.config.tools.mcp_servers,
authz_config=self.config.authz,

View File

@ -8,6 +8,7 @@ import os
import re
from dataclasses import dataclass, field
from datetime import datetime, timezone
from time import perf_counter
from typing import Any
from uuid import uuid4
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
@ -81,6 +82,49 @@ class _DirectRunRequest:
future: asyncio.Future[AgentRunResult]
@dataclass(slots=True)
class _WebSearchLoopGuard:
low_quality_limit: int = 3
_low_quality_count: int = 0
_last_query: str = ""
def observe_result(self, tool_name: str, content: str) -> dict[str, str] | None:
if tool_name != "web_search":
self._reset()
return None
try:
payload = json.loads(content)
except Exception:
self._reset()
return None
query = str(payload.get("query") or self._last_query or "").strip()
is_low_quality = payload.get("success") is False or payload.get("quality") == "low"
if not is_low_quality:
self._reset()
self._last_query = query
return None
self._low_quality_count += 1
self._last_query = query
if self._low_quality_count < self.low_quality_limit:
return None
query_text = f" for query '{query}'" if query else ""
return {
"finish_reason": "web_search_low_quality_budget",
"message": (
"Web search returned low-quality or failed results repeatedly"
f"{query_text}. Stop retrying query variants; use confirmed sources already found, "
"state uncertainty clearly, and mark missing fields as N/A."
),
}
def _reset(self) -> None:
self._low_quality_count = 0
self._last_query = ""
class AgentLoop:
"""Single execution kernel shared by root agents and delegated agents."""
@ -240,6 +284,7 @@ class AgentLoop:
thinking_enabled: bool | None = None,
include_skill_assembly: bool = True,
include_tools: bool = True,
allowed_tool_names: list[str] | None = None,
max_tool_iterations: int | None = None,
provider_bundle: ProviderBundle | None = None,
parent_session_id: str | None = None,
@ -252,6 +297,7 @@ class AgentLoop:
allow_candidate_generation: bool = False,
intent_agent_decision: dict[str, Any] | None = None,
channel_identity: ChannelIdentity | None = None,
pre_run_latency_ms: dict[str, float] | None = None,
) -> AgentRunResult:
"""跑通最小 direct run 主链。
@ -292,6 +338,7 @@ class AgentLoop:
thinking_enabled=thinking_enabled,
include_skill_assembly=include_skill_assembly,
include_tools=include_tools,
allowed_tool_names=allowed_tool_names,
max_tool_iterations=max_tool_iterations,
provider_bundle=provider_bundle,
parent_session_id=parent_session_id,
@ -304,6 +351,7 @@ class AgentLoop:
allow_candidate_generation=allow_candidate_generation,
intent_agent_decision=intent_agent_decision,
channel_identity=channel_identity,
pre_run_latency_ms=pre_run_latency_ms,
)
async def _process_direct_impl(
@ -332,6 +380,7 @@ class AgentLoop:
thinking_enabled: bool | None = None,
include_skill_assembly: bool = True,
include_tools: bool = True,
allowed_tool_names: list[str] | None = None,
max_tool_iterations: int | None = None,
provider_bundle: ProviderBundle | None = None,
parent_session_id: str | None = None,
@ -344,6 +393,7 @@ class AgentLoop:
allow_candidate_generation: bool = False,
intent_agent_decision: dict[str, Any] | None = None,
channel_identity: ChannelIdentity | None = None,
pre_run_latency_ms: dict[str, float] | None = None,
) -> AgentRunResult:
"""真正执行一轮 direct run 的内部实现。
@ -353,8 +403,25 @@ class AgentLoop:
- 这样才能保证 run 模式下外部不能绕过队列直接执行
"""
run_perf_started = perf_counter()
latency_ms = self._initial_latency_ms(pre_run_latency_ms)
def add_latency(key: str, started_at: float) -> None:
latency_ms[key] = latency_ms.get(key, 0.0) + (perf_counter() - started_at) * 1000
loaded = self.boot()
session_manager = self._require_loaded("session_manager")
def session_write(callable_obj: Any, *args: Any, **kwargs: Any) -> Any:
started_at = perf_counter()
try:
return callable_obj(*args, **kwargs)
finally:
add_latency("session_write_ms", started_at)
def append_message(session_id_value: str, **kwargs: Any) -> int:
return session_write(session_manager.append_message, session_id_value, **kwargs)
memory_service = self._require_loaded("memory_service")
context_builder = self._require_loaded("context_builder")
tool_registry = self._require_loaded("tool_registry")
@ -365,9 +432,13 @@ class AgentLoop:
skill_assembler = self._require_loaded("skill_assembler")
skill_learning_service = self._require_loaded("skill_learning_service")
mcp_manager = getattr(loaded, "mcp_manager", None)
if mcp_manager is not None:
loaded.mcp_report = await mcp_manager.connect_all(tool_registry)
loaded.tools = [spec.name for spec in tool_registry.list_specs()]
if include_tools and mcp_manager is not None:
started_at = perf_counter()
try:
loaded.mcp_report = await mcp_manager.connect_all(tool_registry)
loaded.tools = [spec.name for spec in tool_registry.list_specs()]
finally:
add_latency("mcp_ms", started_at)
config = loaded.config
configured_provider = config.resolve_provider_target(model=model, provider_name=provider_name)
@ -393,13 +464,15 @@ class AgentLoop:
memory_snapshot = memory_service.capture_snapshot_for_run()
if parent_session_id:
session_manager.ensure_session(
session_write(
session_manager.ensure_session,
parent_session_id,
source="unknown",
model=resolved_model,
user_id=user_id,
)
session_manager.ensure_session(
session_write(
session_manager.ensure_session,
resolved_session_id,
source=source,
model=resolved_model,
@ -407,7 +480,7 @@ class AgentLoop:
user_id=user_id,
parent_session_id=parent_session_id,
)
session_manager.append_message(
append_message(
resolved_session_id,
run_id=resolved_run_id,
role="system",
@ -435,7 +508,7 @@ class AgentLoop:
user_id=user_id,
)
if intent_agent_decision:
session_manager.append_message(
append_message(
resolved_session_id,
run_id=resolved_run_id,
role="system",
@ -480,35 +553,39 @@ class AgentLoop:
*(pinned_skill_contexts or []),
*self._load_pinned_skill_contexts(skills_loader, pinned_skill_names or []),
]
if not include_skill_assembly:
activated_skills = self._merge_skill_contexts(pinned_skills, [])
else:
skill_query = skill_selection_context or task
assembled_skills = await skill_assembler.assemble(
task_description=skill_query,
provider=skill_selector_provider,
model=skill_selector_model,
embedding_runtime=bundle.embedding_runtime,
thinking_enabled=thinking_enabled,
)
for interaction in getattr(assembled_skills, "llm_interactions", []) or []:
session_manager.append_message(
resolved_session_id,
run_id=resolved_run_id,
role="system",
event_type="skill_assembler_llm_interaction_snapshotted",
event_payload=interaction,
content=json.dumps(interaction, ensure_ascii=False, default=str),
context_visible=False,
source=source,
title=title,
started_at = perf_counter()
try:
if not include_skill_assembly:
activated_skills = self._merge_skill_contexts(pinned_skills, [])
else:
skill_query = skill_selection_context or task
assembled_skills = await skill_assembler.assemble(
task_description=skill_query,
provider=skill_selector_provider,
model=skill_selector_model,
user_id=user_id,
embedding_runtime=bundle.embedding_runtime,
thinking_enabled=thinking_enabled,
)
activated_skills = self._merge_skill_contexts(
pinned_skills,
assembled_skills.activated_skills,
)
for interaction in getattr(assembled_skills, "llm_interactions", []) or []:
append_message(
resolved_session_id,
run_id=resolved_run_id,
role="system",
event_type="skill_assembler_llm_interaction_snapshotted",
event_payload=interaction,
content=json.dumps(interaction, ensure_ascii=False, default=str),
context_visible=False,
source=source,
title=title,
model=skill_selector_model,
user_id=user_id,
)
activated_skills = self._merge_skill_contexts(
pinned_skills,
assembled_skills.activated_skills,
)
finally:
add_latency("skill_assembly_ms", started_at)
skill_activation_messages = context_builder.build_skill_activation_messages(
activated_skills
)
@ -527,7 +604,7 @@ class AgentLoop:
]
if skill_activation_messages or activated_receipts:
session_manager.append_message(
append_message(
resolved_session_id,
run_id=resolved_run_id,
role="system",
@ -544,19 +621,26 @@ class AgentLoop:
user_id=user_id,
)
if not include_tools:
selected_tool_specs = []
else:
selected_tool_specs = await tool_assembler.assemble(
task_description=task,
registry=tool_registry,
skills_loader=skills_loader,
activated_skills=activated_skills,
embedding_runtime=bundle.embedding_runtime,
top_k=10,
)
started_at = perf_counter()
try:
if not include_tools:
selected_tool_specs = []
else:
selected_tool_specs = await tool_assembler.assemble(
task_description=task,
registry=tool_registry,
skills_loader=skills_loader,
activated_skills=activated_skills,
embedding_runtime=bundle.embedding_runtime,
top_k=10,
)
if allowed_tool_names is not None:
allowed = set(allowed_tool_names)
selected_tool_specs = [spec for spec in selected_tool_specs if spec.name in allowed]
finally:
add_latency("tool_assembly_ms", started_at)
tool_schemas = tool_registry.export_selected_provider_schemas(selected_tool_specs)
session_manager.append_message(
append_message(
resolved_session_id,
run_id=resolved_run_id,
role="system",
@ -573,37 +657,41 @@ class AgentLoop:
user_id=user_id,
)
build_input = ContextBuildInput(
base_system_prompt=self.profile.system_prompt,
prompt_locale=prompt_locale,
history=session_manager.get_history(
resolved_session_id,
max_messages=max(1, self.profile.max_context_messages),
),
current_user_input=task,
memory_snapshot=memory_snapshot,
activated_skills=activated_skills,
session_context=SessionContext(
session_id=resolved_session_id,
source=source,
model=resolved_model,
user_id=user_id,
channel=channel_identity.channel_id if channel_identity else None,
channel_kind=channel_identity.kind if channel_identity else None,
account_id=channel_identity.account_id if channel_identity else None,
peer_id=channel_identity.peer_id if channel_identity else None,
peer_type=channel_identity.peer_type if channel_identity else None,
chat_id=channel_identity.peer_id if channel_identity else None,
thread_id=channel_identity.thread_id if channel_identity else None,
parent_session_id=parent_session_id,
),
runtime_context=self._current_runtime_context(),
execution_context=execution_context,
extra_sections=[TOOL_FAILURE_GUIDANCE_PROMPT],
)
context_result = context_builder.build_messages(build_input)
started_at = perf_counter()
try:
build_input = ContextBuildInput(
base_system_prompt=self.profile.system_prompt,
prompt_locale=prompt_locale,
history=session_manager.get_history(
resolved_session_id,
max_messages=max(1, self.profile.max_context_messages),
),
current_user_input=task,
memory_snapshot=memory_snapshot,
activated_skills=activated_skills,
session_context=SessionContext(
session_id=resolved_session_id,
source=source,
model=resolved_model,
user_id=user_id,
channel=channel_identity.channel_id if channel_identity else None,
channel_kind=channel_identity.kind if channel_identity else None,
account_id=channel_identity.account_id if channel_identity else None,
peer_id=channel_identity.peer_id if channel_identity else None,
peer_type=channel_identity.peer_type if channel_identity else None,
chat_id=channel_identity.peer_id if channel_identity else None,
thread_id=channel_identity.thread_id if channel_identity else None,
parent_session_id=parent_session_id,
),
runtime_context=self._current_runtime_context(),
execution_context=execution_context,
extra_sections=[TOOL_FAILURE_GUIDANCE_PROMPT],
)
context_result = context_builder.build_messages(build_input)
finally:
add_latency("context_build_ms", started_at)
if skill_selection_context:
session_manager.append_message(
append_message(
resolved_session_id,
run_id=resolved_run_id,
role="system",
@ -621,8 +709,8 @@ class AgentLoop:
model=resolved_model,
user_id=user_id,
)
session_manager.update_system_prompt(resolved_session_id, context_result.system_prompt)
session_manager.append_message(
session_write(session_manager.update_system_prompt, resolved_session_id, context_result.system_prompt)
append_message(
resolved_session_id,
run_id=resolved_run_id,
role="system",
@ -639,7 +727,7 @@ class AgentLoop:
model=resolved_model,
user_id=user_id,
)
session_manager.append_message(
append_message(
resolved_session_id,
run_id=resolved_run_id,
role="user",
@ -676,6 +764,9 @@ class AgentLoop:
"session_id": resolved_session_id,
"task_id": task_id,
"run_id": resolved_run_id,
"allowed_tool_names": (
None if allowed_tool_names is None else list(allowed_tool_names)
),
},
)
@ -683,6 +774,7 @@ class AgentLoop:
final_finish_reason = "stop"
final_provider_name = bundle.main_runtime.provider_name
final_model = bundle.main_runtime.model
web_search_loop_guard = _WebSearchLoopGuard()
while True:
chat_kwargs: dict[str, Any] = {
@ -713,7 +805,7 @@ class AgentLoop:
"temperature": resolved_temperature,
"thinking_enabled": thinking_enabled,
}
session_manager.append_message(
append_message(
resolved_session_id,
run_id=resolved_run_id,
role="system",
@ -726,14 +818,18 @@ class AgentLoop:
model=final_model,
user_id=user_id,
)
response = await provider.chat(**chat_kwargs)
started_at = perf_counter()
try:
response = await provider.chat(**chat_kwargs)
finally:
add_latency("llm_ms", started_at)
final_provider_name = response.provider_name or final_provider_name
final_model = response.model or final_model
final_usage = self._merge_usage(final_usage, response.usage or {})
self._record_usage(session_manager, resolved_session_id, response.usage or {})
session_write(self._record_usage, session_manager, resolved_session_id, response.usage or {})
assistant_tool_calls = self._serialize_tool_calls(response.tool_calls)
session_manager.append_message(
append_message(
resolved_session_id,
run_id=resolved_run_id,
role="assistant",
@ -764,17 +860,21 @@ class AgentLoop:
break
if iterations >= resolved_max_tool_iterations:
finalized = await self._finalize_after_tool_limit(
provider=provider,
messages=messages,
model=final_model,
max_tokens=resolved_max_tokens,
temperature=resolved_temperature,
thinking_enabled=thinking_enabled,
)
started_at = perf_counter()
try:
finalized = await self._finalize_after_tool_limit(
provider=provider,
messages=messages,
model=final_model,
max_tokens=resolved_max_tokens,
temperature=resolved_temperature,
thinking_enabled=thinking_enabled,
)
finally:
add_latency("llm_ms", started_at)
final_text = finalized or RAW_TOOL_CALL_FALLBACK
final_finish_reason = "max_tool_iterations_finalized" if finalized else "max_tool_iterations"
session_manager.append_message(
append_message(
resolved_session_id,
run_id=resolved_run_id,
role="assistant",
@ -800,9 +900,26 @@ class AgentLoop:
reasoning_content=response.reasoning_content,
)
iterations += 1
for tool_call in response.tool_calls:
result = await effective_tool_executor.execute_tool_call(tool_call, context=tool_context)
session_manager.append_message(
started_at = perf_counter()
try:
if self._can_run_tool_calls_concurrently(response.tool_calls, tool_registry):
tool_results = await asyncio.gather(
*(
effective_tool_executor.execute_tool_call(tool_call, context=tool_context)
for tool_call in response.tool_calls
)
)
else:
tool_results = []
for tool_call in response.tool_calls:
tool_results.append(
await effective_tool_executor.execute_tool_call(tool_call, context=tool_context)
)
finally:
add_latency("tool_ms", started_at)
web_guard_decision: dict[str, str] | None = None
for tool_call, result in zip(response.tool_calls, tool_results, strict=True):
append_message(
resolved_session_id,
run_id=resolved_run_id,
role="tool",
@ -825,8 +942,30 @@ class AgentLoop:
tool_name=result.tool_name,
result=result.content,
)
if web_guard_decision is None:
web_guard_decision = web_search_loop_guard.observe_result(result.tool_name, result.content)
if web_guard_decision is not None:
final_text = web_guard_decision["message"]
final_finish_reason = web_guard_decision["finish_reason"]
append_message(
resolved_session_id,
run_id=resolved_run_id,
role="assistant",
event_type="assistant_message_added",
event_payload={"task_id": task_id} if task_id else None,
content=final_text,
finish_reason=final_finish_reason,
source=source,
title=title,
model=final_model,
user_id=user_id,
)
context_builder.add_assistant_message(messages, content=final_text)
break
session_manager.append_message(
final_latency_ms = self._final_latency_ms(latency_ms, run_perf_started)
final_usage_with_latency = self._usage_with_latency(final_usage, final_latency_ms)
append_message(
resolved_session_id,
run_id=resolved_run_id,
role="system",
@ -837,6 +976,7 @@ class AgentLoop:
"task_id": task_id,
"task_mode": task_mode,
"attempt_index": attempt_index,
"latency_ms": final_latency_ms,
},
content=final_text,
finish_reason=final_finish_reason,
@ -869,12 +1009,12 @@ class AgentLoop:
tool_iterations=iterations,
provider_name=final_provider_name,
model=final_model,
usage=final_usage,
usage=final_usage_with_latency,
task_id=task_id,
)
except Exception as exc:
if not user_message_recorded:
session_manager.append_message(
append_message(
resolved_session_id,
run_id=resolved_run_id,
role="user",
@ -885,6 +1025,7 @@ class AgentLoop:
model=resolved_model,
user_id=user_id,
)
final_latency_ms = self._final_latency_ms(latency_ms, run_perf_started)
result = self._build_error_result(
session_manager=session_manager,
session_id=resolved_session_id,
@ -896,8 +1037,9 @@ class AgentLoop:
message=f"Run failed before completion: {exc}",
tool_iterations=iterations,
provider_name=final_provider_name,
usage=final_usage,
usage=self._usage_with_latency(final_usage, final_latency_ms),
task_id=task_id,
latency_ms=final_latency_ms,
)
self._record_run_receipts(
skill_learning_service=skill_learning_service,
@ -1032,6 +1174,80 @@ class AgentLoop:
)
return payload
@staticmethod
def _can_run_tool_calls_concurrently(tool_calls: list[Any], tool_registry: Any) -> bool:
if len(tool_calls) < 2:
return False
return all(AgentLoop._is_read_only_tool_call(tool_call, tool_registry) for tool_call in tool_calls)
@staticmethod
def _is_read_only_tool_call(tool_call: Any, tool_registry: Any) -> bool:
name = AgentLoop._tool_call_name(tool_call)
if not name:
return False
tool = tool_registry.get(name) if tool_registry is not None else None
if tool is None:
return False
spec = getattr(tool, "spec", None)
toolset = str(getattr(spec, "toolset", "") or "").lower()
metadata = getattr(spec, "metadata", {}) or {}
if metadata.get("read_only") is True:
return True
if metadata.get("mutates") or metadata.get("sensitive"):
return False
return name in {
"list_directory",
"read_file",
"search_files",
"session_search",
"skills_list",
"skill_view",
"user_files_list",
"user_files_read",
"web_fetch",
"web_search",
} and toolset in {"filesystem", "session", "skills", "user_files", "web"}
@staticmethod
def _tool_call_name(tool_call: Any) -> str:
if not isinstance(tool_call, dict):
return str(getattr(tool_call, "name", "") or "")
function = tool_call.get("function")
if isinstance(function, dict):
return str(function.get("name") or "")
return str(tool_call.get("name") or "")
@staticmethod
def _initial_latency_ms(pre_run_latency_ms: dict[str, float] | None) -> dict[str, float]:
latency = {
"router_ms": 0.0,
"mcp_ms": 0.0,
"skill_assembly_ms": 0.0,
"tool_assembly_ms": 0.0,
"context_build_ms": 0.0,
"llm_ms": 0.0,
"tool_ms": 0.0,
"session_write_ms": 0.0,
"total_ms": 0.0,
}
if pre_run_latency_ms:
for key, value in pre_run_latency_ms.items():
if isinstance(value, (int, float)):
latency[str(key)] = latency.get(str(key), 0.0) + float(value)
return latency
@staticmethod
def _final_latency_ms(latency_ms: dict[str, float], run_perf_started: float) -> dict[str, float]:
finalized = dict(latency_ms)
finalized["total_ms"] = finalized.get("total_ms", 0.0) + (perf_counter() - run_perf_started) * 1000
return {key: round(max(0.0, float(value)), 3) for key, value in finalized.items()}
@staticmethod
def _usage_with_latency(usage: dict[str, Any], latency_ms: dict[str, float]) -> dict[str, Any]:
payload = dict(usage)
payload["latency_ms"] = dict(latency_ms)
return payload
@staticmethod
def _record_usage(session_manager: Any, session_id: str, usage: dict[str, Any]) -> None:
"""把 provider usage 映射到 session usage 字段。
@ -1079,6 +1295,7 @@ class AgentLoop:
provider_name: str | None,
usage: dict[str, Any],
task_id: str | None = None,
latency_ms: dict[str, float] | None = None,
) -> AgentRunResult:
"""把主链中的未处理异常收口成可追踪的 assistant error turn。"""
@ -1104,6 +1321,7 @@ class AgentLoop:
"tool_iterations": tool_iterations,
"provider_name": provider_name,
"task_id": task_id,
"latency_ms": latency_ms or {},
},
content=message,
finish_reason="error",