feat(tasks): add skill-templated task graph execution

2026-06-23 10:22:58 +08:00
parent 6843d89b2c
commit 53b13e8eac
53 changed files with 4773 additions and 756 deletions
--- a/app-instance/backend/beaver/coordinator/execution/scheduler.py
+++ b/app-instance/backend/beaver/coordinator/execution/scheduler.py
@ -84,11 +84,21 @@ class TeamGraphScheduler:
        **kwargs,
    ) -> list[NodeRunResult]:
        results: list[NodeRunResult] = []
+        nodes_by_id = {node.node_id: node for node in nodes}
        for node in nodes:
-            if any(not item.success for item in results):
-                results.append(self._blocked(node, results))
+            blocking = [
+                item
+                for item in results
+                if self._blocks_downstream(item, nodes_by_id[item.node_id])
+            ]
+            if blocking:
+                results.append(self._blocked(node, blocking))
                continue
-            dependency_outputs = {item.node_id: item.output_text for item in results if item.success}
+            dependency_outputs = {
+                item.node_id: item.output_text
+                for item in results
+                if item.completion_status in {"succeeded", "partial"}
+            }
            results.append(await self._run_node(node, dependency_outputs=dependency_outputs, **kwargs))
        return results

@ -116,6 +126,7 @@ class TeamGraphScheduler:
        **kwargs,
    ) -> list[NodeRunResult]:
        pending = {node.node_id: node for node in nodes}
+        nodes_by_id = {node.node_id: node for node in nodes}
        completed: dict[str, NodeRunResult] = {}
        ordered: list[NodeRunResult] = []

@ -123,18 +134,28 @@ class TeamGraphScheduler:
            blocked_ids = {
                node_id
                for node_id, node in pending.items()
-                if any(dep in completed and not completed[dep].success for dep in node.depends_on)
+                if any(
+                    dep in completed
+                    and self._blocks_downstream(completed[dep], nodes_by_id[dep])
+                    for dep in node.depends_on
+                )
            }
            for node_id in sorted(blocked_ids):
                node = pending.pop(node_id)
                result = self._blocked(node, list(completed.values()))
                completed[node_id] = result
                ordered.append(result)
+            if blocked_ids:
+                continue

            ready = [
                node
                for node in pending.values()
-                if all(dep in completed and completed[dep].success for dep in node.depends_on)
+                if all(
+                    dep in completed
+                    and not self._blocks_downstream(completed[dep], nodes_by_id[dep])
+                    for dep in node.depends_on
+                )
            ]
            if not ready:
                if pending:
@ -196,6 +217,17 @@ class TeamGraphScheduler:
                expected_output=node.expected_output,
                node_id=node.node_id,
                dependency_outputs=dict(dependency_outputs),
+                input_contract=dict(node.input_contract),
+                output_contract=dict(node.output_contract),
+                allowed_tool_names=(
+                    None if node.allowed_tool_names is None else list(node.allowed_tool_names)
+                ),
+                required_evidence=list(node.required_evidence),
+                evidence_contract=dict(node.evidence_contract),
+                validation_rules=list(node.validation_rules),
+                required_for_completion=node.required_for_completion,
+                block_downstream_on_partial=node.block_downstream_on_partial,
+                max_tool_iterations=node.max_tool_iterations,
            )
            node_provider_bundle = provider_bundle_factory(node) if provider_bundle_factory is not None else provider_bundle
            return await self.runner.run(
@ -213,8 +245,17 @@ class TeamGraphScheduler:
                output_text="",
                finish_reason="error",
                error=str(exc),
+                completion_status="failed",
            )

+    @staticmethod
+    def _blocks_downstream(result: NodeRunResult, node: ExecutionNode) -> bool:
+        if result.completion_status in {"failed", "blocked"}:
+            return True
+        if result.completion_status == "partial":
+            return node.block_downstream_on_partial
+        return not result.success
+
    @staticmethod
    def _merge_pinned(parent: list[str], local: list[str]) -> list[str]:
        result: list[str] = []
@ -245,6 +286,7 @@ class TeamGraphScheduler:
            output_text="",
            finish_reason="blocked",
            error=f"Blocked by failed dependency: {detail}",
+            completion_status="blocked",
        )

    @staticmethod
--- a/app-instance/backend/beaver/coordinator/local.py
+++ b/app-instance/backend/beaver/coordinator/local.py
@ -6,7 +6,7 @@ from uuid import uuid4

 from beaver.engine import AgentLoop
 from beaver.engine.providers import ProviderBundle
-from beaver.tasks.evidence import EvidenceBuilder
+from beaver.tasks.evidence import EvidenceBuilder, evaluate_node_evidence

 from .models import DelegationEnvelope, NodeRunResult

@ -54,6 +54,8 @@ class LocalAgentRunner:
            task_mode=bool(envelope.parent_task_id),
            pinned_skill_names=envelope.inherited_pinned_skills,
            pinned_skill_contexts=envelope.inherited_pinned_skill_contexts,
+            allowed_tool_names=envelope.allowed_tool_names,
+            max_tool_iterations=envelope.max_tool_iterations,
            allow_candidate_generation=allow_candidate_generation,
        )
        loaded = target_loop.boot()
@ -63,7 +65,23 @@ class LocalAgentRunner:
            result.output_text,
            result.finish_reason,
        )
-        success = result.finish_reason == "stop"
+        evidence_gaps = evaluate_node_evidence(
+            evidence,
+            envelope.required_evidence,
+            result.output_text,
+        )
+        run_succeeded = result.finish_reason == "stop"
+        if not run_succeeded:
+            completion_status = "failed"
+        elif evidence_gaps:
+            completion_status = "partial"
+        else:
+            completion_status = "succeeded"
+        success = completion_status == "succeeded"
+        if completion_status == "partial":
+            error = "; ".join(evidence_gaps)
+        else:
+            error = None if success else (result.output_text or result.finish_reason)
        return NodeRunResult(
            node_id=envelope.node_id or envelope.agent.name,
            success=success,
@ -71,8 +89,10 @@ class LocalAgentRunner:
            run_id=result.run_id,
            session_id=result.session_id,
            finish_reason=result.finish_reason,
-            error=None if success else (result.output_text or result.finish_reason),
+            error=error,
            evidence=evidence,
+            completion_status=completion_status,
+            evidence_gaps=evidence_gaps,
        )

    @staticmethod
--- a/app-instance/backend/beaver/coordinator/models.py
+++ b/app-instance/backend/beaver/coordinator/models.py
@ -51,6 +51,15 @@ class DelegationEnvelope:
    expected_output: str | None = None
    node_id: str | None = None
    dependency_outputs: dict[str, str] = field(default_factory=dict)
+    input_contract: dict[str, Any] = field(default_factory=dict)
+    output_contract: dict[str, Any] = field(default_factory=dict)
+    allowed_tool_names: list[str] | None = None
+    required_evidence: list[str] = field(default_factory=list)
+    evidence_contract: dict[str, Any] = field(default_factory=dict)
+    validation_rules: list[str] = field(default_factory=list)
+    required_for_completion: bool = True
+    block_downstream_on_partial: bool = False
+    max_tool_iterations: int | None = None


@dataclass(slots=True)
@ -65,6 +74,15 @@ class ExecutionNode:
    inherited_pinned_skill_contexts: list["SkillContext"] = field(default_factory=list)
    constraints: list[str] = field(default_factory=list)
    expected_output: str | None = None
+    input_contract: dict[str, Any] = field(default_factory=dict)
+    output_contract: dict[str, Any] = field(default_factory=dict)
+    allowed_tool_names: list[str] | None = None
+    required_evidence: list[str] = field(default_factory=list)
+    evidence_contract: dict[str, Any] = field(default_factory=dict)
+    validation_rules: list[str] = field(default_factory=list)
+    required_for_completion: bool = True
+    block_downstream_on_partial: bool = False
+    max_tool_iterations: int | None = None


@dataclass(slots=True)
@ -74,7 +92,7 @@ class ExecutionGraph:
    strategy: TeamStrategy
    nodes: list[ExecutionNode]

-    def validate(self) -> None:
+    def validate(self, *, max_depth: int | None = None) -> None:
        if self.strategy not in {"sequence", "parallel", "dag"}:
            raise NotImplementedError(f"Team strategy {self.strategy!r} is reserved but not implemented in v1")
        if not self.nodes:
@ -91,19 +109,25 @@ class ExecutionGraph:
        visited: set[str] = set()
        deps = {node.node_id: list(node.depends_on) for node in self.nodes}

-        def visit(node_id: str) -> None:
+        def visit(node_id: str) -> int:
            if node_id in visited:
-                return
+                return depths[node_id]
            if node_id in visiting:
                raise ValueError(f"ExecutionGraph has cyclic or unresolved dependencies involving {node_id!r}")
            visiting.add(node_id)
+            depth = 1
            for dep in deps[node_id]:
-                visit(dep)
+                depth = max(depth, visit(dep) + 1)
            visiting.remove(node_id)
            visited.add(node_id)
+            depths[node_id] = depth
+            return depth

+        depths: dict[str, int] = {}
        for node_id in node_ids:
-            visit(node_id)
+            depth = visit(node_id)
+            if max_depth is not None and depth > max_depth:
+                raise ValueError(f"ExecutionGraph exceeds max depth {max_depth}")


@dataclass(slots=True)
@ -118,6 +142,8 @@ class NodeRunResult:
    finish_reason: str = "stop"
    error: str | None = None
    evidence: "RunEvidence | None" = None
+    completion_status: str = "succeeded"
+    evidence_gaps: list[str] = field(default_factory=list)

    def to_dict(self) -> dict[str, Any]:
        return {
@ -129,6 +155,8 @@ class NodeRunResult:
            "finish_reason": self.finish_reason,
            "error": self.error,
            "evidence": self.evidence.to_dict() if self.evidence is not None else None,
+            "completion_status": self.completion_status,
+            "evidence_gaps": list(self.evidence_gaps),
        }


--- a/app-instance/backend/beaver/engine/context/builder.py
+++ b/app-instance/backend/beaver/engine/context/builder.py
@ -48,6 +48,8 @@ class SkillContext:
    content_hash: str = ""
    activation_reason: str = "selected"
    tool_hints: list[str] = field(default_factory=list)
+    team_template: dict[str, Any] | None = None
+    team_template_warnings: list[str] = field(default_factory=list)


@dataclass(slots=True)
--- a/app-instance/backend/beaver/engine/loader.py
+++ b/app-instance/backend/beaver/engine/loader.py
@ -317,7 +317,10 @@ class EngineLoader:
            draft_service=draft_service,
        )
        task_service = self._task_service or TaskService(workspace / "tasks")
-        task_execution_planner = self._task_execution_planner or TaskExecutionPlanner(task_skill_resolver=task_skill_resolver)
+        task_execution_planner = self._task_execution_planner or TaskExecutionPlanner(
+            task_skill_resolver=task_skill_resolver,
+            tool_registry=tool_registry,
+        )
        mcp_manager = MCPConnectionManager(
            self.config.tools.mcp_servers,
            authz_config=self.config.authz,
--- a/app-instance/backend/beaver/engine/loop.py
+++ b/app-instance/backend/beaver/engine/loop.py
@ -8,6 +8,7 @@ import os
 import re
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
+from time import perf_counter
 from typing import Any
 from uuid import uuid4
 from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
@ -81,6 +82,49 @@ class _DirectRunRequest:
    future: asyncio.Future[AgentRunResult]


+@dataclass(slots=True)
+class _WebSearchLoopGuard:
+    low_quality_limit: int = 3
+    _low_quality_count: int = 0
+    _last_query: str = ""
+
+    def observe_result(self, tool_name: str, content: str) -> dict[str, str] | None:
+        if tool_name != "web_search":
+            self._reset()
+            return None
+        try:
+            payload = json.loads(content)
+        except Exception:
+            self._reset()
+            return None
+
+        query = str(payload.get("query") or self._last_query or "").strip()
+        is_low_quality = payload.get("success") is False or payload.get("quality") == "low"
+        if not is_low_quality:
+            self._reset()
+            self._last_query = query
+            return None
+
+        self._low_quality_count += 1
+        self._last_query = query
+        if self._low_quality_count < self.low_quality_limit:
+            return None
+
+        query_text = f" for query '{query}'" if query else ""
+        return {
+            "finish_reason": "web_search_low_quality_budget",
+            "message": (
+                "Web search returned low-quality or failed results repeatedly"
+                f"{query_text}. Stop retrying query variants; use confirmed sources already found, "
+                "state uncertainty clearly, and mark missing fields as N/A."
+            ),
+        }
+
+    def _reset(self) -> None:
+        self._low_quality_count = 0
+        self._last_query = ""
+
+
 class AgentLoop:
    """Single execution kernel shared by root agents and delegated agents."""

@ -240,6 +284,7 @@ class AgentLoop:
        thinking_enabled: bool | None = None,
        include_skill_assembly: bool = True,
        include_tools: bool = True,
+        allowed_tool_names: list[str] | None = None,
        max_tool_iterations: int | None = None,
        provider_bundle: ProviderBundle | None = None,
        parent_session_id: str | None = None,
@ -252,6 +297,7 @@ class AgentLoop:
        allow_candidate_generation: bool = False,
        intent_agent_decision: dict[str, Any] | None = None,
        channel_identity: ChannelIdentity | None = None,
+        pre_run_latency_ms: dict[str, float] | None = None,
    ) -> AgentRunResult:
        """跑通最小 direct run 主链。

@ -292,6 +338,7 @@ class AgentLoop:
            thinking_enabled=thinking_enabled,
            include_skill_assembly=include_skill_assembly,
            include_tools=include_tools,
+            allowed_tool_names=allowed_tool_names,
            max_tool_iterations=max_tool_iterations,
            provider_bundle=provider_bundle,
            parent_session_id=parent_session_id,
@ -304,6 +351,7 @@ class AgentLoop:
            allow_candidate_generation=allow_candidate_generation,
            intent_agent_decision=intent_agent_decision,
            channel_identity=channel_identity,
+            pre_run_latency_ms=pre_run_latency_ms,
        )

    async def _process_direct_impl(
@ -332,6 +380,7 @@ class AgentLoop:
        thinking_enabled: bool | None = None,
        include_skill_assembly: bool = True,
        include_tools: bool = True,
+        allowed_tool_names: list[str] | None = None,
        max_tool_iterations: int | None = None,
        provider_bundle: ProviderBundle | None = None,
        parent_session_id: str | None = None,
@ -344,6 +393,7 @@ class AgentLoop:
        allow_candidate_generation: bool = False,
        intent_agent_decision: dict[str, Any] | None = None,
        channel_identity: ChannelIdentity | None = None,
+        pre_run_latency_ms: dict[str, float] | None = None,
    ) -> AgentRunResult:
        """真正执行一轮 direct run 的内部实现。

@ -353,8 +403,25 @@ class AgentLoop:
        - 这样才能保证 run 模式下外部不能绕过队列直接执行
        """

+        run_perf_started = perf_counter()
+        latency_ms = self._initial_latency_ms(pre_run_latency_ms)
+
+        def add_latency(key: str, started_at: float) -> None:
+            latency_ms[key] = latency_ms.get(key, 0.0) + (perf_counter() - started_at) * 1000
+
        loaded = self.boot()
        session_manager = self._require_loaded("session_manager")
+
+        def session_write(callable_obj: Any, *args: Any, **kwargs: Any) -> Any:
+            started_at = perf_counter()
+            try:
+                return callable_obj(*args, **kwargs)
+            finally:
+                add_latency("session_write_ms", started_at)
+
+        def append_message(session_id_value: str, **kwargs: Any) -> int:
+            return session_write(session_manager.append_message, session_id_value, **kwargs)
+
        memory_service = self._require_loaded("memory_service")
        context_builder = self._require_loaded("context_builder")
        tool_registry = self._require_loaded("tool_registry")
@ -365,9 +432,13 @@ class AgentLoop:
        skill_assembler = self._require_loaded("skill_assembler")
        skill_learning_service = self._require_loaded("skill_learning_service")
        mcp_manager = getattr(loaded, "mcp_manager", None)
-        if mcp_manager is not None:
-            loaded.mcp_report = await mcp_manager.connect_all(tool_registry)
-            loaded.tools = [spec.name for spec in tool_registry.list_specs()]
+        if include_tools and mcp_manager is not None:
+            started_at = perf_counter()
+            try:
+                loaded.mcp_report = await mcp_manager.connect_all(tool_registry)
+                loaded.tools = [spec.name for spec in tool_registry.list_specs()]
+            finally:
+                add_latency("mcp_ms", started_at)

        config = loaded.config
        configured_provider = config.resolve_provider_target(model=model, provider_name=provider_name)
@ -393,13 +464,15 @@ class AgentLoop:
        memory_snapshot = memory_service.capture_snapshot_for_run()

        if parent_session_id:
-            session_manager.ensure_session(
+            session_write(
+                session_manager.ensure_session,
                parent_session_id,
                source="unknown",
                model=resolved_model,
                user_id=user_id,
            )
-        session_manager.ensure_session(
+        session_write(
+            session_manager.ensure_session,
            resolved_session_id,
            source=source,
            model=resolved_model,
@ -407,7 +480,7 @@ class AgentLoop:
            user_id=user_id,
            parent_session_id=parent_session_id,
        )
-        session_manager.append_message(
+        append_message(
            resolved_session_id,
            run_id=resolved_run_id,
            role="system",
@ -435,7 +508,7 @@ class AgentLoop:
            user_id=user_id,
        )
        if intent_agent_decision:
-            session_manager.append_message(
+            append_message(
                resolved_session_id,
                run_id=resolved_run_id,
                role="system",
@ -480,35 +553,39 @@ class AgentLoop:
                *(pinned_skill_contexts or []),
                *self._load_pinned_skill_contexts(skills_loader, pinned_skill_names or []),
            ]
-            if not include_skill_assembly:
-                activated_skills = self._merge_skill_contexts(pinned_skills, [])
-            else:
-                skill_query = skill_selection_context or task
-                assembled_skills = await skill_assembler.assemble(
-                    task_description=skill_query,
-                    provider=skill_selector_provider,
-                    model=skill_selector_model,
-                    embedding_runtime=bundle.embedding_runtime,
-                    thinking_enabled=thinking_enabled,
-                )
-                for interaction in getattr(assembled_skills, "llm_interactions", []) or []:
-                    session_manager.append_message(
-                        resolved_session_id,
-                        run_id=resolved_run_id,
-                        role="system",
-                        event_type="skill_assembler_llm_interaction_snapshotted",
-                        event_payload=interaction,
-                        content=json.dumps(interaction, ensure_ascii=False, default=str),
-                        context_visible=False,
-                        source=source,
-                        title=title,
+            started_at = perf_counter()
+            try:
+                if not include_skill_assembly:
+                    activated_skills = self._merge_skill_contexts(pinned_skills, [])
+                else:
+                    skill_query = skill_selection_context or task
+                    assembled_skills = await skill_assembler.assemble(
+                        task_description=skill_query,
+                        provider=skill_selector_provider,
                        model=skill_selector_model,
-                        user_id=user_id,
+                        embedding_runtime=bundle.embedding_runtime,
+                        thinking_enabled=thinking_enabled,
                    )
-                activated_skills = self._merge_skill_contexts(
-                    pinned_skills,
-                    assembled_skills.activated_skills,
-                )
+                    for interaction in getattr(assembled_skills, "llm_interactions", []) or []:
+                        append_message(
+                            resolved_session_id,
+                            run_id=resolved_run_id,
+                            role="system",
+                            event_type="skill_assembler_llm_interaction_snapshotted",
+                            event_payload=interaction,
+                            content=json.dumps(interaction, ensure_ascii=False, default=str),
+                            context_visible=False,
+                            source=source,
+                            title=title,
+                            model=skill_selector_model,
+                            user_id=user_id,
+                        )
+                    activated_skills = self._merge_skill_contexts(
+                        pinned_skills,
+                        assembled_skills.activated_skills,
+                    )
+            finally:
+                add_latency("skill_assembly_ms", started_at)
            skill_activation_messages = context_builder.build_skill_activation_messages(
                activated_skills
            )
@ -527,7 +604,7 @@ class AgentLoop:
            ]

            if skill_activation_messages or activated_receipts:
-                session_manager.append_message(
+                append_message(
                    resolved_session_id,
                    run_id=resolved_run_id,
                    role="system",
@ -544,19 +621,26 @@ class AgentLoop:
                    user_id=user_id,
                )

-            if not include_tools:
-                selected_tool_specs = []
-            else:
-                selected_tool_specs = await tool_assembler.assemble(
-                    task_description=task,
-                    registry=tool_registry,
-                    skills_loader=skills_loader,
-                    activated_skills=activated_skills,
-                    embedding_runtime=bundle.embedding_runtime,
-                    top_k=10,
-                )
+            started_at = perf_counter()
+            try:
+                if not include_tools:
+                    selected_tool_specs = []
+                else:
+                    selected_tool_specs = await tool_assembler.assemble(
+                        task_description=task,
+                        registry=tool_registry,
+                        skills_loader=skills_loader,
+                        activated_skills=activated_skills,
+                        embedding_runtime=bundle.embedding_runtime,
+                        top_k=10,
+                    )
+                    if allowed_tool_names is not None:
+                        allowed = set(allowed_tool_names)
+                        selected_tool_specs = [spec for spec in selected_tool_specs if spec.name in allowed]
+            finally:
+                add_latency("tool_assembly_ms", started_at)
            tool_schemas = tool_registry.export_selected_provider_schemas(selected_tool_specs)
-            session_manager.append_message(
+            append_message(
                resolved_session_id,
                run_id=resolved_run_id,
                role="system",
@ -573,37 +657,41 @@ class AgentLoop:
                user_id=user_id,
            )

-            build_input = ContextBuildInput(
-                base_system_prompt=self.profile.system_prompt,
-                prompt_locale=prompt_locale,
-                history=session_manager.get_history(
-                    resolved_session_id,
-                    max_messages=max(1, self.profile.max_context_messages),
-                ),
-                current_user_input=task,
-                memory_snapshot=memory_snapshot,
-                activated_skills=activated_skills,
-                session_context=SessionContext(
-                    session_id=resolved_session_id,
-                    source=source,
-                    model=resolved_model,
-                    user_id=user_id,
-                    channel=channel_identity.channel_id if channel_identity else None,
-                    channel_kind=channel_identity.kind if channel_identity else None,
-                    account_id=channel_identity.account_id if channel_identity else None,
-                    peer_id=channel_identity.peer_id if channel_identity else None,
-                    peer_type=channel_identity.peer_type if channel_identity else None,
-                    chat_id=channel_identity.peer_id if channel_identity else None,
-                    thread_id=channel_identity.thread_id if channel_identity else None,
-                    parent_session_id=parent_session_id,
-                ),
-                runtime_context=self._current_runtime_context(),
-                execution_context=execution_context,
-                extra_sections=[TOOL_FAILURE_GUIDANCE_PROMPT],
-            )
-            context_result = context_builder.build_messages(build_input)
+            started_at = perf_counter()
+            try:
+                build_input = ContextBuildInput(
+                    base_system_prompt=self.profile.system_prompt,
+                    prompt_locale=prompt_locale,
+                    history=session_manager.get_history(
+                        resolved_session_id,
+                        max_messages=max(1, self.profile.max_context_messages),
+                    ),
+                    current_user_input=task,
+                    memory_snapshot=memory_snapshot,
+                    activated_skills=activated_skills,
+                    session_context=SessionContext(
+                        session_id=resolved_session_id,
+                        source=source,
+                        model=resolved_model,
+                        user_id=user_id,
+                        channel=channel_identity.channel_id if channel_identity else None,
+                        channel_kind=channel_identity.kind if channel_identity else None,
+                        account_id=channel_identity.account_id if channel_identity else None,
+                        peer_id=channel_identity.peer_id if channel_identity else None,
+                        peer_type=channel_identity.peer_type if channel_identity else None,
+                        chat_id=channel_identity.peer_id if channel_identity else None,
+                        thread_id=channel_identity.thread_id if channel_identity else None,
+                        parent_session_id=parent_session_id,
+                    ),
+                    runtime_context=self._current_runtime_context(),
+                    execution_context=execution_context,
+                    extra_sections=[TOOL_FAILURE_GUIDANCE_PROMPT],
+                )
+                context_result = context_builder.build_messages(build_input)
+            finally:
+                add_latency("context_build_ms", started_at)
            if skill_selection_context:
-                session_manager.append_message(
+                append_message(
                    resolved_session_id,
                    run_id=resolved_run_id,
                    role="system",
@ -621,8 +709,8 @@ class AgentLoop:
                    model=resolved_model,
                    user_id=user_id,
                )
-            session_manager.update_system_prompt(resolved_session_id, context_result.system_prompt)
-            session_manager.append_message(
+            session_write(session_manager.update_system_prompt, resolved_session_id, context_result.system_prompt)
+            append_message(
                resolved_session_id,
                run_id=resolved_run_id,
                role="system",
@ -639,7 +727,7 @@ class AgentLoop:
                model=resolved_model,
                user_id=user_id,
            )
-            session_manager.append_message(
+            append_message(
                resolved_session_id,
                run_id=resolved_run_id,
                role="user",
@ -676,6 +764,9 @@ class AgentLoop:
                    "session_id": resolved_session_id,
                    "task_id": task_id,
                    "run_id": resolved_run_id,
+                    "allowed_tool_names": (
+                        None if allowed_tool_names is None else list(allowed_tool_names)
+                    ),
                },
            )

@ -683,6 +774,7 @@ class AgentLoop:
            final_finish_reason = "stop"
            final_provider_name = bundle.main_runtime.provider_name
            final_model = bundle.main_runtime.model
+            web_search_loop_guard = _WebSearchLoopGuard()

            while True:
                chat_kwargs: dict[str, Any] = {
@ -713,7 +805,7 @@ class AgentLoop:
                    "temperature": resolved_temperature,
                    "thinking_enabled": thinking_enabled,
                }
-                session_manager.append_message(
+                append_message(
                    resolved_session_id,
                    run_id=resolved_run_id,
                    role="system",
@ -726,14 +818,18 @@ class AgentLoop:
                    model=final_model,
                    user_id=user_id,
                )
-                response = await provider.chat(**chat_kwargs)
+                started_at = perf_counter()
+                try:
+                    response = await provider.chat(**chat_kwargs)
+                finally:
+                    add_latency("llm_ms", started_at)
                final_provider_name = response.provider_name or final_provider_name
                final_model = response.model or final_model
                final_usage = self._merge_usage(final_usage, response.usage or {})
-                self._record_usage(session_manager, resolved_session_id, response.usage or {})
+                session_write(self._record_usage, session_manager, resolved_session_id, response.usage or {})

                assistant_tool_calls = self._serialize_tool_calls(response.tool_calls)
-                session_manager.append_message(
+                append_message(
                    resolved_session_id,
                    run_id=resolved_run_id,
                    role="assistant",
@ -764,17 +860,21 @@ class AgentLoop:
                    break

                if iterations >= resolved_max_tool_iterations:
-                    finalized = await self._finalize_after_tool_limit(
-                        provider=provider,
-                        messages=messages,
-                        model=final_model,
-                        max_tokens=resolved_max_tokens,
-                        temperature=resolved_temperature,
-                        thinking_enabled=thinking_enabled,
-                    )
+                    started_at = perf_counter()
+                    try:
+                        finalized = await self._finalize_after_tool_limit(
+                            provider=provider,
+                            messages=messages,
+                            model=final_model,
+                            max_tokens=resolved_max_tokens,
+                            temperature=resolved_temperature,
+                            thinking_enabled=thinking_enabled,
+                        )
+                    finally:
+                        add_latency("llm_ms", started_at)
                    final_text = finalized or RAW_TOOL_CALL_FALLBACK
                    final_finish_reason = "max_tool_iterations_finalized" if finalized else "max_tool_iterations"
-                    session_manager.append_message(
+                    append_message(
                        resolved_session_id,
                        run_id=resolved_run_id,
                        role="assistant",
@ -800,9 +900,26 @@ class AgentLoop:
                    reasoning_content=response.reasoning_content,
                )
                iterations += 1
-                for tool_call in response.tool_calls:
-                    result = await effective_tool_executor.execute_tool_call(tool_call, context=tool_context)
-                    session_manager.append_message(
+                started_at = perf_counter()
+                try:
+                    if self._can_run_tool_calls_concurrently(response.tool_calls, tool_registry):
+                        tool_results = await asyncio.gather(
+                            *(
+                                effective_tool_executor.execute_tool_call(tool_call, context=tool_context)
+                                for tool_call in response.tool_calls
+                            )
+                        )
+                    else:
+                        tool_results = []
+                        for tool_call in response.tool_calls:
+                            tool_results.append(
+                                await effective_tool_executor.execute_tool_call(tool_call, context=tool_context)
+                            )
+                finally:
+                    add_latency("tool_ms", started_at)
+                web_guard_decision: dict[str, str] | None = None
+                for tool_call, result in zip(response.tool_calls, tool_results, strict=True):
+                    append_message(
                        resolved_session_id,
                        run_id=resolved_run_id,
                        role="tool",
@ -825,8 +942,30 @@ class AgentLoop:
                        tool_name=result.tool_name,
                        result=result.content,
                    )
+                    if web_guard_decision is None:
+                        web_guard_decision = web_search_loop_guard.observe_result(result.tool_name, result.content)
+                if web_guard_decision is not None:
+                    final_text = web_guard_decision["message"]
+                    final_finish_reason = web_guard_decision["finish_reason"]
+                    append_message(
+                        resolved_session_id,
+                        run_id=resolved_run_id,
+                        role="assistant",
+                        event_type="assistant_message_added",
+                        event_payload={"task_id": task_id} if task_id else None,
+                        content=final_text,
+                        finish_reason=final_finish_reason,
+                        source=source,
+                        title=title,
+                        model=final_model,
+                        user_id=user_id,
+                    )
+                    context_builder.add_assistant_message(messages, content=final_text)
+                    break

-            session_manager.append_message(
+            final_latency_ms = self._final_latency_ms(latency_ms, run_perf_started)
+            final_usage_with_latency = self._usage_with_latency(final_usage, final_latency_ms)
+            append_message(
                resolved_session_id,
                run_id=resolved_run_id,
                role="system",
@ -837,6 +976,7 @@ class AgentLoop:
                    "task_id": task_id,
                    "task_mode": task_mode,
                    "attempt_index": attempt_index,
+                    "latency_ms": final_latency_ms,
                },
                content=final_text,
                finish_reason=final_finish_reason,
@ -869,12 +1009,12 @@ class AgentLoop:
                tool_iterations=iterations,
                provider_name=final_provider_name,
                model=final_model,
-                usage=final_usage,
+                usage=final_usage_with_latency,
                task_id=task_id,
            )
        except Exception as exc:
            if not user_message_recorded:
-                session_manager.append_message(
+                append_message(
                    resolved_session_id,
                    run_id=resolved_run_id,
                    role="user",
@ -885,6 +1025,7 @@ class AgentLoop:
                    model=resolved_model,
                    user_id=user_id,
                )
+            final_latency_ms = self._final_latency_ms(latency_ms, run_perf_started)
            result = self._build_error_result(
                session_manager=session_manager,
                session_id=resolved_session_id,
@ -896,8 +1037,9 @@ class AgentLoop:
                message=f"Run failed before completion: {exc}",
                tool_iterations=iterations,
                provider_name=final_provider_name,
-                usage=final_usage,
+                usage=self._usage_with_latency(final_usage, final_latency_ms),
                task_id=task_id,
+                latency_ms=final_latency_ms,
            )
            self._record_run_receipts(
                skill_learning_service=skill_learning_service,
@ -1032,6 +1174,80 @@ class AgentLoop:
            )
        return payload

+    @staticmethod
+    def _can_run_tool_calls_concurrently(tool_calls: list[Any], tool_registry: Any) -> bool:
+        if len(tool_calls) < 2:
+            return False
+        return all(AgentLoop._is_read_only_tool_call(tool_call, tool_registry) for tool_call in tool_calls)
+
+    @staticmethod
+    def _is_read_only_tool_call(tool_call: Any, tool_registry: Any) -> bool:
+        name = AgentLoop._tool_call_name(tool_call)
+        if not name:
+            return False
+        tool = tool_registry.get(name) if tool_registry is not None else None
+        if tool is None:
+            return False
+        spec = getattr(tool, "spec", None)
+        toolset = str(getattr(spec, "toolset", "") or "").lower()
+        metadata = getattr(spec, "metadata", {}) or {}
+        if metadata.get("read_only") is True:
+            return True
+        if metadata.get("mutates") or metadata.get("sensitive"):
+            return False
+        return name in {
+            "list_directory",
+            "read_file",
+            "search_files",
+            "session_search",
+            "skills_list",
+            "skill_view",
+            "user_files_list",
+            "user_files_read",
+            "web_fetch",
+            "web_search",
+        } and toolset in {"filesystem", "session", "skills", "user_files", "web"}
+
+    @staticmethod
+    def _tool_call_name(tool_call: Any) -> str:
+        if not isinstance(tool_call, dict):
+            return str(getattr(tool_call, "name", "") or "")
+        function = tool_call.get("function")
+        if isinstance(function, dict):
+            return str(function.get("name") or "")
+        return str(tool_call.get("name") or "")
+
+    @staticmethod
+    def _initial_latency_ms(pre_run_latency_ms: dict[str, float] | None) -> dict[str, float]:
+        latency = {
+            "router_ms": 0.0,
+            "mcp_ms": 0.0,
+            "skill_assembly_ms": 0.0,
+            "tool_assembly_ms": 0.0,
+            "context_build_ms": 0.0,
+            "llm_ms": 0.0,
+            "tool_ms": 0.0,
+            "session_write_ms": 0.0,
+            "total_ms": 0.0,
+        }
+        if pre_run_latency_ms:
+            for key, value in pre_run_latency_ms.items():
+                if isinstance(value, (int, float)):
+                    latency[str(key)] = latency.get(str(key), 0.0) + float(value)
+        return latency
+
+    @staticmethod
+    def _final_latency_ms(latency_ms: dict[str, float], run_perf_started: float) -> dict[str, float]:
+        finalized = dict(latency_ms)
+        finalized["total_ms"] = finalized.get("total_ms", 0.0) + (perf_counter() - run_perf_started) * 1000
+        return {key: round(max(0.0, float(value)), 3) for key, value in finalized.items()}
+
+    @staticmethod
+    def _usage_with_latency(usage: dict[str, Any], latency_ms: dict[str, float]) -> dict[str, Any]:
+        payload = dict(usage)
+        payload["latency_ms"] = dict(latency_ms)
+        return payload
+
    @staticmethod
    def _record_usage(session_manager: Any, session_id: str, usage: dict[str, Any]) -> None:
        """把 provider usage 映射到 session usage 字段。
@ -1079,6 +1295,7 @@ class AgentLoop:
        provider_name: str | None,
        usage: dict[str, Any],
        task_id: str | None = None,
+        latency_ms: dict[str, float] | None = None,
    ) -> AgentRunResult:
        """把主链中的未处理异常收口成可追踪的 assistant error turn。"""

@ -1104,6 +1321,7 @@ class AgentLoop:
                "tool_iterations": tool_iterations,
                "provider_name": provider_name,
                "task_id": task_id,
+                "latency_ms": latency_ms or {},
            },
            content=message,
            finish_reason="error",
--- a/app-instance/backend/beaver/interfaces/web/app.py
+++ b/app-instance/backend/beaver/interfaces/web/app.py
@ -43,6 +43,7 @@ from beaver.services.user_files import (
    UserFileNotFoundError,
    UserFilePathError,
    UserFileSizeError,
+    UserFileStorageError,
    UserFileService,
 )
 from beaver.services.user_file_resolver import (
@ -644,6 +645,8 @@ def create_app(
            return HTTPException(status_code=400, detail=str(exc) or "Invalid path")
        if isinstance(exc, UserFileSizeError):
            return HTTPException(status_code=413, detail=str(exc) or "File too large")
+        if isinstance(exc, UserFileStorageError):
+            return HTTPException(status_code=503, detail=str(exc) or "User file storage is unavailable")
        if isinstance(exc, UserFileConfigurationError):
            return HTTPException(status_code=503, detail=str(exc) or "User file storage is not configured")
        return HTTPException(status_code=400, detail=str(exc) or "User file operation failed")
@ -1327,6 +1330,7 @@ def create_app(
                    "runs": runs,
                }
            )
+        sessions.sort(key=lambda item: item.get("updated_at") or item.get("created_at") or "", reverse=True)
        return {"sessions": sessions}

    @app.post("/api/sessions/{session_id:path}/archive")
@ -3166,6 +3170,11 @@ def _debug_runs_for_session(session_manager: Any, session_id: str) -> list[dict[
            title = getattr(started, "title", None)
        if title is None:
            title = source or "run"
+        latency_ms = None
+        if completed is not None and isinstance(completed.event_payload, dict):
+            raw_latency = completed.event_payload.get("latency_ms")
+            latency_ms = raw_latency if isinstance(raw_latency, dict) else None
+        sorted_records = sorted(records, key=lambda item: item.timestamp or 0, reverse=True)
        runs.append(
            {
                "run_id": run_id,
@ -3181,10 +3190,15 @@ def _debug_runs_for_session(session_manager: Any, session_id: str) -> list[dict[
                "started_at": _iso_from_timestamp(started.timestamp if started is not None else None),
                "ended_at": _iso_from_timestamp(completed.timestamp) if completed is not None else None,
                "finish_reason": completed.finish_reason if completed is not None else None,
-                "events": [_debug_event_to_dict(item) for item in records],
+                "latency_ms": latency_ms or {},
+                "events": [_debug_event_to_dict(item) for item in sorted_records],
            }
        )
-    return runs
+    return sorted(
+        runs,
+        key=lambda item: item.get("ended_at") or item.get("started_at") or "",
+        reverse=True,
+    )


 def _debug_event_to_dict(record: Any) -> dict[str, Any]:
--- a/app-instance/backend/beaver/services/agent_service.py
+++ b/app-instance/backend/beaver/services/agent_service.py
@ -14,24 +14,20 @@ from __future__ import annotations

 import asyncio
 from pathlib import Path
+from time import perf_counter
 from typing import Any
 from uuid import uuid4

-from beaver.coordinator.models import ExecutionNode, TeamRunResult
 from beaver.engine import AgentLoop, AgentProfile, AgentRunResult, EngineLoader
 from beaver.engine.providers import make_provider_bundle
 from beaver.foundation.events import InboundMessage, OutboundMessage
 from beaver.foundation.models import CronJob, CronRunRecord
 from beaver.prompts.main_agent import normalize_main_agent_prompt_locale
 from beaver.tasks import (
-    EvidenceBuilder,
    MainAgentRouter,
-    RunEvidence,
-    TaskEvidencePacket,
-    TaskExecutionPlan,
    TaskRecord,
-    render_task_evidence,
 )
+from beaver.tasks.attempt_orchestrator import TaskAttemptOrchestrator
 from beaver.tasks.service import normalize_acceptance_type


@ -594,15 +590,22 @@ class AgentService:
        router_provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
        router_runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
        active_task = task_service.get_latest_open_task(session_id)
-        decision = await self._main_agent_router.classify(
-            message,
-            active_task=active_task,
-            provider=router_provider,
-            model=getattr(router_runtime, "model", None),
-            recent_messages=session_manager.get_messages_as_conversation(session_id),
-            intent_skill=self._load_intent_agent_skill(loaded),
-            thinking_enabled=kwargs.get("thinking_enabled"),
-        )
+        router_started = perf_counter()
+        try:
+            decision = await self._main_agent_router.classify(
+                message,
+                active_task=active_task,
+                provider=router_provider,
+                model=getattr(router_runtime, "model", None),
+                recent_messages=session_manager.get_messages_as_conversation(session_id),
+                intent_skill=self._load_intent_agent_skill(loaded),
+                thinking_enabled=kwargs.get("thinking_enabled"),
+            )
+        finally:
+            kwargs["pre_run_latency_ms"] = self._merge_latency_ms(
+                kwargs.get("pre_run_latency_ms"),
+                {"router_ms": (perf_counter() - router_started) * 1000},
+            )
        kwargs["intent_agent_decision"] = self._intent_decision_payload(
            decision,
            active_task=active_task,
@ -751,216 +754,19 @@ class AgentService:
        task: TaskRecord,
    ) -> AgentRunResult:
        loaded = self.create_loop().boot()
-        task_service = self._require_loaded(loaded, "task_service")
-        task_execution_planner = self._require_loaded(loaded, "task_execution_planner")
-        session_manager = self._require_loaded(loaded, "session_manager")
-
-        base_execution_context = kwargs.get("execution_context")
-        prompt_locale = kwargs.get("prompt_locale") or task.metadata.get("prompt_locale")
-        output_language_instruction = self._output_language_instruction(prompt_locale)
-        provider_bundle = kwargs.get("provider_bundle") or self._make_provider_bundle_for_task(loaded, kwargs)
-        kwargs = dict(kwargs)
-        team_provider_bundle_factory = kwargs.pop("team_provider_bundle_factory", None)
-        kwargs["provider_bundle"] = provider_bundle
-
-        attempt_index = int(task.metadata.get("latest_attempt_index") or 0) + 1
-        task_service.start_run(task.task_id, user_message=message, attempt_index=attempt_index)
-        plan = await task_execution_planner.plan(
+        return await self._build_task_attempt_orchestrator(loaded).run(
+            message=message,
+            runner=runner,
+            kwargs=kwargs,
            task=task,
-            user_message=message,
-            attempt_index=attempt_index,
-            provider_bundle=provider_bundle,
-        )
-        self._append_task_observation(
-            session_manager,
-            task.session_id,
-            event_type="task_execution_planned",
-            payload={
-                "task_id": task.task_id,
-                "attempt_index": attempt_index,
-                **plan.to_event_payload(),
-            },
-        )
-        team_summaries: list[str] = []
-        team_execution_context = ""
-        team_result: TeamRunResult | None = None
-        if plan.is_team:
-            team_result, team_error = await self._run_team_for_task(
-                plan,
-                task=task,
-                parent_session_id=kwargs["session_id"],
-                provider_bundle_factory=team_provider_bundle_factory
-                or self._build_team_provider_bundle_factory(loaded, kwargs),
-            )
-            if team_result is not None:
-                team_summaries = [self._team_summary_for_validation(team_result)]
-                team_packet = TaskEvidencePacket(
-                    task_id=task.task_id,
-                    attempt_index=attempt_index,
-                    main_run=None,
-                    team_runs=self._team_run_evidence(team_result),
-                    team_node_results=list(team_result.node_results),
-                    final_output="",
-                )
-                team_execution_context = self._join_context(
-                    self._team_execution_context(plan, team_result),
-                    "Rendered team evidence:\n" + render_task_evidence(team_packet),
-                )
-                self._append_task_observation(
-                    session_manager,
-                    task.session_id,
-                    event_type="task_team_run_completed" if team_result.success else "task_team_run_failed",
-                    payload={
-                        "task_id": task.task_id,
-                        "attempt_index": attempt_index,
-                        "plan_mode": plan.mode,
-                        "strategy": plan.graph.strategy if plan.graph else None,
-                        "node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
-                        "team_run_ids": team_result.run_ids,
-                        "team_success": team_result.success,
-                        "node_results": self._team_node_results_for_event(plan, team_result),
-                        "reason": plan.reason,
-                        "error": None if team_result.success else "one or more team nodes failed",
-                    },
-                )
-            else:
-                team_summaries = [f"Team execution failed: {team_error}"]
-                team_execution_context = self._failed_team_execution_context(plan, team_error or "unknown error")
-                self._append_task_observation(
-                    session_manager,
-                    task.session_id,
-                    event_type="task_team_run_failed",
-                    payload={
-                        "task_id": task.task_id,
-                        "attempt_index": attempt_index,
-                        "plan_mode": plan.mode,
-                        "strategy": plan.graph.strategy if plan.graph else None,
-                        "node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
-                        "team_run_ids": [],
-                        "team_success": False,
-                        "reason": plan.reason,
-                        "error": team_error,
-                    },
-                )
-
-        attempt_kwargs = dict(kwargs)
-        attempt_kwargs.update(
-            {
-                "task_id": task.task_id,
-                "task_mode": True,
-                "attempt_index": attempt_index,
-                "allow_candidate_generation": False,
-            }
-        )
-        attempt_kwargs["execution_context"] = self._join_context(
-            base_execution_context,
-            output_language_instruction,
-            team_execution_context,
-        )
-        if plan.is_team and team_execution_context:
-            attempt_kwargs["include_tools"] = False
-            attempt_kwargs["max_tool_iterations"] = 0
-        attempt_kwargs["skill_selection_context"] = self._build_skill_selection_context(
-            task=task,
-            user_message=message,
-            attempt_index=attempt_index,
-            plan=plan,
-            team_summaries=team_summaries,
        )

-        result = await runner(message, **attempt_kwargs)
-        self._append_task_observation(
-            session_manager,
-            task.session_id,
-            event_type="task_synthesis_completed",
-            payload={
-                "task_id": task.task_id,
-                "attempt_index": attempt_index,
-                "main_run_id": result.run_id,
-                "plan_mode": plan.mode,
-                "strategy": plan.graph.strategy if plan.graph else None,
-            },
+    def _build_task_attempt_orchestrator(self, loaded: Any) -> TaskAttemptOrchestrator:
+        return TaskAttemptOrchestrator(
+            loaded=loaded,
+            create_loop=self.create_loop,
+            make_provider_bundle_for_task=self._make_provider_bundle_for_task,
        )
-        task = task_service.append_run(
-            task.task_id,
-            result.run_id,
-            skill_names=self._skill_names_for_run(loaded, result.run_id),
-        )
-        evidence_packet = self._build_task_evidence_packet(
-            session_manager=session_manager,
-            task=task,
-            attempt_index=attempt_index,
-            result=result,
-            team_result=team_result,
-        )
-        evidence_text = render_task_evidence(evidence_packet)
-        evidence_debug = {
-            "evidence_run_ids": [
-                item.run_id for item in [evidence_packet.main_run, *evidence_packet.team_runs] if item is not None
-            ],
-            "evidence_session_ids": [
-                item.session_id
-                for item in [evidence_packet.main_run, *evidence_packet.team_runs]
-                if item is not None
-            ],
-            "tool_result_count": sum(
-                len(item.tool_results)
-                for item in [evidence_packet.main_run, *evidence_packet.team_runs]
-                if item is not None
-            ),
-            "evidence_length": len(evidence_text),
-        }
-        session_manager.update_latest_assistant_event_payload(
-            result.session_id,
-            result.run_id,
-            {
-                "task_id": task.task_id,
-                "task_status": task.status,
-                "evidence_status": "recorded",
-            },
-        )
-        session_manager.append_message(
-            result.session_id,
-            run_id=result.run_id,
-            role="system",
-            event_type="task_evidence_recorded",
-            event_payload={
-                "task_id": task.task_id,
-                "attempt_index": attempt_index,
-                "evidence_debug": evidence_debug,
-            },
-            content=None,
-            context_visible=False,
-        )
-        result.task_id = task.task_id
-        result.task_status = task.status
-        result.validation_result = None
-        return result
-
-    async def _run_team_for_task(
-        self,
-        plan: TaskExecutionPlan,
-        *,
-        task: TaskRecord,
-        parent_session_id: str,
-        provider_bundle_factory: Any,
-    ) -> tuple[TeamRunResult | None, str | None]:
-        if plan.graph is None:
-            return None, "team plan did not include an execution graph"
-        try:
-            from beaver.services.team_service import TeamService
-
-            result = await TeamService(self.create_loop()).run_team(
-                plan.graph,
-                parent_task_id=task.task_id,
-                parent_session_id=parent_session_id,
-                parent_run_id=None,
-                provider_bundle_factory=provider_bundle_factory,
-                allow_candidate_generation=False,
-            )
-            return result, None
-        except Exception as exc:
-            return None, str(exc)

    @staticmethod
    def _require_loaded(loaded: Any, field_name: str) -> Any:
@ -992,32 +798,15 @@ class AgentService:
        }

    @staticmethod
-    def _output_language_instruction(prompt_locale: str | None) -> str:
-        locale = normalize_main_agent_prompt_locale(prompt_locale)
-        if locale == "en":
-            return (
-                "Output language: English. Use English for user-facing task titles, summaries, plans, "
-                "and final answers unless the user explicitly requests another language."
-            )
-        if locale == "zh-Hant":
-            return (
-                "輸出語言：繁體中文。除非使用者明確要求其他語言，所有面向使用者的任務標題、摘要、"
-                "計劃與最終回答都使用繁體中文。"
-            )
-        return (
-            "输出语言：简体中文。除非用户明确要求其他语言，所有面向用户的任务标题、摘要、"
-            "计划与最终回答都使用简体中文。"
-        )
-
-    @staticmethod
-    def _skill_names_for_run(loaded: Any, run_id: str) -> list[str]:
-        store = getattr(loaded, "run_memory_store", None)
-        if store is None:
-            return []
-        for record in store.list_runs():
-            if record.run_id == run_id:
-                return [receipt.skill_name for receipt in record.activated_skills]
-        return []
+    def _merge_latency_ms(current: Any, updates: dict[str, float]) -> dict[str, float]:
+        merged: dict[str, float] = {}
+        if isinstance(current, dict):
+            for key, value in current.items():
+                if isinstance(value, (int, float)):
+                    merged[str(key)] = float(value)
+        for key, value in updates.items():
+            merged[key] = merged.get(key, 0.0) + float(value)
+        return merged

    @staticmethod
    def _acceptance_score_for_learning(acceptance_type: str) -> float:
@ -1027,237 +816,6 @@ class AgentService:
            return 0.5
        return 0.0

-    @staticmethod
-    def _build_skill_selection_context(
-        *,
-        task: TaskRecord,
-        user_message: str,
-        attempt_index: int,
-        plan: TaskExecutionPlan | None = None,
-        team_summaries: list[str] | None = None,
-    ) -> str:
-        phase = f"attempt_{attempt_index}"
-        if task.feedback and task.feedback[-1].get("acceptance_type") == "revise":
-            phase = f"revision_attempt_{attempt_index}"
-        elif plan is not None and plan.is_team:
-            phase = f"team_synthesis_attempt_{attempt_index}"
-
-        sections = [
-            f"Task goal:\n{task.goal or task.description}",
-            f"Task description:\n{task.description}",
-            f"Current user request:\n{user_message}",
-            f"Execution phase:\n{phase}",
-            f"Task status:\n{task.status}",
-        ]
-        if task.constraints:
-            sections.append("Known constraints:\n" + "\n".join(f"- {item}" for item in task.constraints))
-        if task.skill_names:
-            sections.append(
-                "Previously activated skills (reuse bias, not pinned):\n"
-                + "\n".join(f"- {item}" for item in task.skill_names)
-            )
-        else:
-            sections.append("Previously activated skills:\nNone")
-        if task.feedback:
-            history_lines = []
-            for item in task.feedback[-5:]:
-                kind = item.get("acceptance_type") or item.get("feedback_type")
-                comment = item.get("comment") or ""
-                run_id = item.get("run_id") or ""
-                history_lines.append(f"- {kind} run={run_id}: {comment}".strip())
-            sections.append("Task acceptance history:\n" + "\n".join(history_lines))
-        if plan is not None:
-            plan_lines = [
-                f"mode: {plan.mode}",
-                f"reason: {plan.reason}",
-            ]
-            if plan.final_synthesis_instruction:
-                plan_lines.append(f"final synthesis instruction: {plan.final_synthesis_instruction}")
-            if plan.graph is not None:
-                plan_lines.append(f"strategy: {plan.graph.strategy}")
-                plan_lines.append(
-                    "nodes:\n"
-                    + "\n".join(
-                        f"- {node.node_id}: {node.task}"
-                        for node in plan.graph.nodes
-                    )
-                )
-            sections.append("Execution plan:\n" + "\n".join(plan_lines))
-        if team_summaries:
-            sections.append("Team execution summaries:\n" + "\n\n".join(team_summaries)[:2400])
-        sections.append(
-            "Skill selection instruction:\n"
-            "Prefer reusing previously activated skills when they still match the Task. "
-            "Select new skills only if the current request, revision, or execution plan needs a different capability. "
-            "If no published skill matches, return [] and let the run continue without skills."
-        )
-        return "\n\n".join(section for section in sections if section.strip())
-
-    @staticmethod
-    def _run_excerpt(session_manager: Any, session_id: str, run_id: str) -> str:
-        lines = []
-        for event in session_manager.get_run_event_records(session_id, run_id):
-            if event.context_visible and event.content:
-                lines.append(f"{event.role}: {event.content.strip()}")
-        return "\n".join(lines[:12])[:2400]
-
-    @staticmethod
-    def _tool_summaries(session_manager: Any, session_id: str, run_id: str) -> list[str]:
-        summaries = []
-        for event in session_manager.get_run_event_records(session_id, run_id):
-            if event.event_type != "tool_result_recorded":
-                continue
-            text = (event.content or "").strip()
-            if text:
-                summaries.append(f"{event.tool_name or 'tool'}: {text[:500]}")
-        return summaries[:12]
-
-    @staticmethod
-    def _append_task_observation(
-        session_manager: Any,
-        session_id: str,
-        *,
-        event_type: str,
-        payload: dict[str, Any],
-    ) -> None:
-        session_manager.append_message(
-            session_id,
-            role="system",
-            event_type=event_type,
-            event_payload=payload,
-            content=payload.get("reason") or payload.get("error"),
-            context_visible=False,
-        )
-
-    @staticmethod
-    def _join_context(*parts: str | None) -> str:
-        return "\n\n".join(part.strip() for part in parts if part and part.strip())
-
-    @staticmethod
-    def _team_summary_for_validation(result: TeamRunResult) -> str:
-        lines = [
-            f"success={result.success}",
-            f"task_id={result.task_id or ''}",
-            "summary:",
-            result.summary,
-            "nodes:",
-        ]
-        for node in result.node_results:
-            lines.append(
-                f"- {node.node_id}: success={node.success} finish_reason={node.finish_reason} "
-                f"error={node.error or ''} output={node.output_text[:500]}"
-            )
-        return "\n".join(lines)
-
-    @staticmethod
-    def _team_node_results_for_event(plan: TaskExecutionPlan, result: TeamRunResult) -> list[dict[str, Any]]:
-        nodes = {node.node_id: node for node in plan.graph.nodes} if plan.graph else {}
-        payloads: list[dict[str, Any]] = []
-        for item in result.node_results:
-            payload = item.to_dict()
-            node = nodes.get(item.node_id)
-            if node is not None:
-                payload["selected_skill_names"] = list(node.inherited_pinned_skills)
-                payload["ephemeral_skill_names"] = [
-                    skill.name for skill in node.inherited_pinned_skill_contexts
-                ]
-                payload["skill_query"] = node.agent.metadata.get("skill_query")
-                payload["ephemeral_guidance_id"] = node.agent.metadata.get("ephemeral_guidance_id")
-                payload["ephemeral_guidance_name"] = node.agent.metadata.get("ephemeral_guidance_name")
-                payload["ephemeral_used"] = bool(node.inherited_pinned_skill_contexts)
-            payloads.append(payload)
-        return payloads
-
-    @staticmethod
-    def _team_run_evidence(result: TeamRunResult | None) -> list[RunEvidence]:
-        if result is None:
-            return []
-        return [node.evidence for node in result.node_results if node.evidence is not None]
-
-    def _build_task_evidence_packet(
-        self,
-        *,
-        session_manager: Any,
-        task: TaskRecord,
-        attempt_index: int,
-        result: AgentRunResult,
-        team_result: TeamRunResult | None,
-    ) -> TaskEvidencePacket:
-        main_run = EvidenceBuilder(session_manager).build_run_evidence(
-            result.session_id,
-            result.run_id,
-            result.output_text,
-            result.finish_reason,
-        )
-        return TaskEvidencePacket(
-            task_id=task.task_id,
-            attempt_index=attempt_index,
-            main_run=main_run,
-            team_runs=self._team_run_evidence(team_result),
-            team_node_results=list(team_result.node_results) if team_result is not None else [],
-            final_output=result.output_text,
-        )
-
-    @staticmethod
-    def _team_execution_context(plan: TaskExecutionPlan, result: TeamRunResult) -> str:
-        node_lines = [
-            (
-                f"- {node.node_id}: success={node.success}, finish_reason={node.finish_reason}, "
-                f"run_id={node.run_id or ''}, error={node.error or ''}\n{node.output_text}"
-            )
-            for node in result.node_results
-        ]
-        return "\n\n".join(
-            item
-            for item in [
-                "Task team execution result:",
-                f"Planner reason: {plan.reason}",
-                f"Strategy: {plan.graph.strategy if plan.graph else ''}",
-                f"Team success: {result.success}",
-                f"Team summary:\n{result.summary}",
-                "Node results:\n" + "\n\n".join(node_lines),
-                (
-                    "Final synthesis instruction:\n" + plan.final_synthesis_instruction
-                    if plan.final_synthesis_instruction
-                    else None
-                ),
-                (
-                    "Use successful team outputs as internal evidence. If one or more nodes failed, "
-                    "do not blindly repeat failed tool calls. Produce a user-visible fallback answer "
-                    "with available evidence and clearly state any missing or uncertain data."
-                ),
-            ]
-            if item
-        )
-
-    @staticmethod
-    def _failed_team_execution_context(plan: TaskExecutionPlan, error: str) -> str:
-        return "\n\n".join(
-            [
-                "Task team execution failed before final synthesis.",
-                f"Planner reason: {plan.reason}",
-                f"Strategy: {plan.graph.strategy if plan.graph else ''}",
-                f"Error: {error}",
-                (
-                    "Proceed as the main agent. Do not blindly repeat failed tool calls; "
-                    "produce a user-visible fallback answer with available evidence and clearly "
-                    "state any missing or uncertain data."
-                ),
-            ]
-        )
-
-    def _build_team_provider_bundle_factory(self, loaded: Any, kwargs: dict[str, Any]) -> Any:
-        def factory(node: ExecutionNode) -> Any:
-            node_kwargs = dict(kwargs)
-            node_kwargs.pop("provider_bundle", None)
-            if node.agent.model:
-                node_kwargs["model"] = node.agent.model
-            if node.agent.provider_name:
-                node_kwargs["provider_name"] = node.agent.provider_name
-            return self._make_provider_bundle_for_task(loaded, node_kwargs)
-
-        return factory
-
    def _make_provider_bundle_for_task(self, loaded: Any, kwargs: dict[str, Any]) -> Any:
        config = loaded.config
        configured_provider = config.resolve_provider_target(
--- a/app-instance/backend/beaver/services/user_files.py
+++ b/app-instance/backend/beaver/services/user_files.py
@ -40,6 +40,10 @@ class UserFileSizeError(UserFileError):
    """Raised when a user file upload exceeds configured limits."""


+class UserFileStorageError(UserFileError):
+    """Raised when the backing user-file storage cannot complete an operation."""
+
+
@dataclass(frozen=True, slots=True)
 class AgentUserFilePolicy:
    task_id: str | None = None
@ -387,26 +391,34 @@ class MinIOUserFileStorage:

    async def list_dir(self, path: str) -> list[UserFileEntry]:
        prefix = self._object_prefix(path)
-        objects = self.client.list_objects(self.config.bucket, prefix=prefix, recursive=False)
+        try:
+            objects = self.client.list_objects(self.config.bucket, prefix=prefix, recursive=False)
+        except Exception as exc:
+            raise _minio_storage_error("list directory", exc) from exc
        entries: list[UserFileEntry] = []
-        for obj in objects:
-            object_name = str(obj.object_name or "")
-            user_path = self._user_path(object_name)
-            if not user_path or user_path == path or user_path.endswith("/.keep"):
-                continue
-            trimmed = user_path.rstrip("/")
-            name = PurePosixPath(trimmed).name
-            is_dir = bool(getattr(obj, "is_dir", False)) or object_name.endswith("/")
-            entries.append(
-                UserFileEntry(
-                    name=name,
-                    path=trimmed,
-                    type="directory" if is_dir else "file",
-                    size=None if is_dir else getattr(obj, "size", None),
-                    content_type=None if is_dir else "application/octet-stream",
-                    modified=obj.last_modified.isoformat() if getattr(obj, "last_modified", None) else None,
+        try:
+            for obj in objects:
+                object_name = str(obj.object_name or "")
+                user_path = self._user_path(object_name)
+                if not user_path or user_path == path or user_path.endswith("/.keep"):
+                    continue
+                trimmed = user_path.rstrip("/")
+                name = PurePosixPath(trimmed).name
+                is_dir = bool(getattr(obj, "is_dir", False)) or object_name.endswith("/")
+                entries.append(
+                    UserFileEntry(
+                        name=name,
+                        path=trimmed,
+                        type="directory" if is_dir else "file",
+                        size=None if is_dir else getattr(obj, "size", None),
+                        content_type=None if is_dir else "application/octet-stream",
+                        modified=obj.last_modified.isoformat() if getattr(obj, "last_modified", None) else None,
+                    )
                )
-            )
+        except UserFileError:
+            raise
+        except Exception as exc:
+            raise _minio_storage_error("list directory", exc) from exc
        return sorted(entries, key=lambda item: (item.type != "directory", item.name.lower()))

    async def read_file(self, path: str, *, max_bytes: int | None = None) -> UserFileContent:
@ -421,7 +433,9 @@ class MinIOUserFileStorage:
            response.close()
            response.release_conn()
        except Exception as exc:
-            raise UserFileNotFoundError("File not found") from exc
+            if _minio_error_code(exc) in {"NoSuchKey", "NoSuchObject"}:
+                raise UserFileNotFoundError("File not found") from exc
+            raise _minio_storage_error("read file", exc) from exc
        return UserFileContent(
            name=PurePosixPath(path).name,
            path=path,
@ -433,13 +447,16 @@ class MinIOUserFileStorage:

    async def write_file(self, path: str, content: bytes, *, content_type: str) -> UserFileEntry:
        object_name = self._object_name(path)
-        result = self.client.put_object(
-            self.config.bucket,
-            object_name,
-            BytesIO(content),
-            length=len(content),
-            content_type=content_type,
-        )
+        try:
+            self.client.put_object(
+                self.config.bucket,
+                object_name,
+                BytesIO(content),
+                length=len(content),
+                content_type=content_type,
+            )
+        except Exception as exc:
+            raise _minio_storage_error("write file", exc) from exc
        return UserFileEntry(
            name=PurePosixPath(path).name,
            path=path,
@ -475,6 +492,8 @@ class MinIOUserFileStorage:
            except Exception:
                pass
            raise
+        except Exception as exc:
+            raise _minio_storage_error("write file", exc) from exc
        return UserFileEntry(
            name=PurePosixPath(path).name,
            path=path,
@ -490,23 +509,30 @@ class MinIOUserFileStorage:
        try:
            self.client.remove_object(self.config.bucket, object_name)
            removed = True
-        except Exception:
-            pass
+        except Exception as exc:
+            if _minio_error_code(exc) != "NoSuchKey":
+                raise _minio_storage_error("delete path", exc) from exc
        prefix = f"{object_name.rstrip('/')}/"
-        for obj in self.client.list_objects(self.config.bucket, prefix=prefix, recursive=True):
-            self.client.remove_object(self.config.bucket, str(obj.object_name))
-            removed = True
+        try:
+            for obj in self.client.list_objects(self.config.bucket, prefix=prefix, recursive=True):
+                self.client.remove_object(self.config.bucket, str(obj.object_name))
+                removed = True
+        except Exception as exc:
+            raise _minio_storage_error("delete path", exc) from exc
        return removed

    async def mkdir(self, path: str) -> UserFileEntry:
        object_name = f"{self._object_name(path).rstrip('/')}/.keep"
-        self.client.put_object(
-            self.config.bucket,
-            object_name,
-            BytesIO(b""),
-            length=0,
-            content_type="application/x-directory",
-        )
+        try:
+            self.client.put_object(
+                self.config.bucket,
+                object_name,
+                BytesIO(b""),
+                length=0,
+                content_type="application/x-directory",
+            )
+        except Exception as exc:
+            raise _minio_storage_error("create directory", exc) from exc
        return UserFileEntry(
            name=PurePosixPath(path).name,
            path=path,
@ -600,6 +626,18 @@ def _safe_scope(value: str | None) -> str:
    return cleaned or "interactive"


+def _minio_error_code(exc: Exception) -> str:
+    return str(getattr(exc, "code", "") or "")
+
+
+def _minio_storage_error(operation: str, exc: Exception) -> UserFileStorageError:
+    code = _minio_error_code(exc)
+    message = f"User file storage {operation} failed"
+    if code:
+        message = f"{message}: {code}"
+    return UserFileStorageError(message)
+
+
 class _LimitedReadStream:
    def __init__(self, stream: object, *, max_bytes: int | None = None) -> None:
        self.stream = stream
--- a/app-instance/backend/beaver/skills/assembler/task_assembler.py
+++ b/app-instance/backend/beaver/skills/assembler/task_assembler.py
@ -83,6 +83,12 @@ class SkillAssembler:
            return SkillAssemblyResult()
        llm_interactions: list[dict[str, Any]] = []

+        if len(candidates) == 1:
+            return SkillAssemblyResult(
+                activated_skills=self._activate_skill_contexts([candidates[0]["name"]]),
+                llm_interactions=llm_interactions,
+            )
+
        if len(candidates) <= self.max_detailed_candidates:
            shortlisted_names = [item["name"] for item in candidates]
        else:
@ -115,6 +121,10 @@ class SkillAssembler:
        if not selected_names:
            return SkillAssemblyResult(llm_interactions=llm_interactions)

+        activated_skills = self._activate_skill_contexts(selected_names)
+        return SkillAssemblyResult(activated_skills=activated_skills, llm_interactions=llm_interactions)
+
+    def _activate_skill_contexts(self, selected_names: list[str]) -> list[SkillContext]:
        activated_skills: list[SkillContext] = []
        for name in selected_names:
            record = self.loader.get_skill_record(name)
@ -130,10 +140,11 @@ class SkillAssembler:
                    content_hash=record.content_hash or "" if record is not None else "",
                    activation_reason="llm_selected",
                    tool_hints=list(record.tool_hints) if record is not None else [],
+                    team_template=getattr(record, "team_template", None) if record is not None else None,
+                    team_template_warnings=list(getattr(record, "team_template_warnings", [])) if record is not None else [],
                )
            )
-
-        return SkillAssemblyResult(activated_skills=activated_skills, llm_interactions=llm_interactions)
+        return activated_skills

    async def _select_skill_names(
        self,
--- a/app-instance/backend/beaver/skills/catalog/loader.py
+++ b/app-instance/backend/beaver/skills/catalog/loader.py
@ -28,6 +28,7 @@ from .utils import (
    check_requirements,
    escape_xml,
    extract_required_tool_names,
+    extract_skill_team_template,
    get_missing_requirements,
    parse_frontmatter,
    parse_skill_metadata_blob,
@ -49,6 +50,8 @@ class SkillRecord:
    tool_hints: list[str] = field(default_factory=list)
    frontmatter: dict[str, Any] = field(default_factory=dict)
    description: str = ""
+    team_template: dict[str, Any] | None = None
+    team_template_warnings: list[str] = field(default_factory=list)


 class SkillsLoader:
@ -113,6 +116,7 @@ class SkillsLoader:
                    continue
                normalized_frontmatter = dict(frontmatter)
                meta_blob = parse_skill_metadata_blob(frontmatter.get("metadata", ""))
+                template_result = extract_skill_team_template(body)
                record = SkillRecord(
                    name=name,
                    path=skill_file,
@ -127,6 +131,8 @@ class SkillsLoader:
                    ),
                    frontmatter=normalized_frontmatter,
                    description=str(frontmatter.get("description") or summarize_body(body) or name),
+                    team_template=template_result.template,
+                    team_template_warnings=template_result.warnings,
                )
                if filter_unavailable and not self._record_available(record):
                    continue
@ -146,6 +152,7 @@ class SkillsLoader:
            else:
                path = self.workspace_skills / name / "versions" / loaded.version.version / "SKILL.md"
            _frontmatter, body = parse_frontmatter(loaded.content)
+            template_result = extract_skill_team_template(body)
            record = SkillRecord(
                name=name,
                path=path,
@ -160,6 +167,8 @@ class SkillsLoader:
                ),
                frontmatter=dict(loaded.version.frontmatter),
                description=str(loaded.version.frontmatter.get("description") or loaded.version.summary or name),
+                team_template=template_result.template,
+                team_template_warnings=template_result.warnings,
            )
            if filter_unavailable and not self._record_available(record):
                continue
--- a/app-instance/backend/beaver/skills/catalog/utils.py
+++ b/app-instance/backend/beaver/skills/catalog/utils.py
@ -17,6 +17,7 @@ import json
 import os
 import re
 import shutil
+from dataclasses import dataclass, field
 from typing import Any


@ -84,6 +85,27 @@ def strip_frontmatter(content: str) -> str:
    return body


+@dataclass(slots=True)
+class SkillTeamTemplateParseResult:
+    template: dict[str, Any] | None = None
+    warnings: list[str] = field(default_factory=list)
+
+
+def extract_skill_team_template(body: str) -> SkillTeamTemplateParseResult:
+    matches = re.findall(r"```beaver-team-template\s*\n(.*?)\n```", body, re.DOTALL)
+    if not matches:
+        return SkillTeamTemplateParseResult()
+    if len(matches) != 1:
+        return SkillTeamTemplateParseResult(warnings=["skill defines multiple team templates"])
+    try:
+        template = json.loads(matches[0])
+    except json.JSONDecodeError:
+        return SkillTeamTemplateParseResult(warnings=["team template JSON is invalid"])
+    if not isinstance(template, dict) or not isinstance(template.get("nodes", []), list):
+        return SkillTeamTemplateParseResult(warnings=["team template must be an object with a nodes list"])
+    return SkillTeamTemplateParseResult(template=template)
+
+
 def extract_required_tool_names(body: str) -> list[str]:
    """从 canonical skill 正文的 `## Required Tools` 段落提取工具名。

--- a/app-instance/backend/beaver/skills/learning/eval.py
+++ b/app-instance/backend/beaver/skills/learning/eval.py
@ -284,6 +284,9 @@ def _build_replay_case_reports(
        "side_effects": [*baseline.get("side_effects", []), *candidate_arm.get("side_effects", [])],
        "validator_notes": list(surrogate.get("notes") or []),
    }
+    historical_accepted_score = _historical_accepted_score(case)
+    if historical_accepted_score is not None:
+        case_report["historical_accepted_score"] = historical_accepted_score
    return case_report, {
        "run_id": case["run_id"],
        "session_id": case.get("session_id") or "",
@ -293,6 +296,7 @@ def _build_replay_case_reports(
        "baseline_score": baseline_score,
        "candidate_score": candidate_score,
        "delta": round(candidate_score - baseline_score, 4),
+        **({"historical_accepted_score": historical_accepted_score} if historical_accepted_score is not None else {}),
    }


@ -658,8 +662,11 @@ def _ability_score(*, case: dict[str, Any], arm: dict[str, Any], arm_name: str)
    if validator is not None:
        return _ability_from_validator(validator, arm)
    if not case.get("synthetic"):
-        score = _bounded_score(case.get("accepted_score"), default=0.75) if arm_name == "baseline" else _ability_from_output(arm)["final_score"]
-        return _ability_breakdown(score=score, source="user_feedback" if arm_name == "baseline" else "llm_judge")
+        result = _ability_from_output(arm, source="output_heuristic")
+        historical_accepted_score = _historical_accepted_score(case)
+        if historical_accepted_score is not None:
+            result["historical_accepted_score"] = historical_accepted_score
+        return result
    return _ability_breakdown(score=0.0, source="unscored", notes=["Synthetic cases require a validator."])


@ -697,6 +704,12 @@ def _ability_from_output(arm: dict[str, Any], *, source: str = "llm_judge", note
    return _ability_breakdown(score=score, source=source, notes=notes)


+def _historical_accepted_score(case: dict[str, Any]) -> float | None:
+    if case.get("synthetic") or isinstance(case.get("validator"), dict) or "accepted_score" not in case:
+        return None
+    return _bounded_score(case.get("accepted_score"), default=0.75)
+
+
 def _ability_breakdown(*, score: float, source: str, notes: list[str] | None = None) -> dict[str, Any]:
    bounded = _bounded_score(score, default=0.0)
    return {
--- a/app-instance/backend/beaver/tasks/attempt_orchestrator.py
+++ b/app-instance/backend/beaver/tasks/attempt_orchestrator.py
@ -0,0 +1,695 @@
+"""Task attempt orchestration for Beaver Task mode."""
+
+from __future__ import annotations
+
+from time import perf_counter
+from typing import Any, Callable
+
+from beaver.coordinator.models import ExecutionNode, TeamRunResult
+from beaver.engine import AgentRunResult
+from beaver.engine.context import SkillContext
+from beaver.prompts.main_agent import normalize_main_agent_prompt_locale
+
+from .evidence import EvidenceBuilder, RunEvidence, TaskEvidencePacket, render_task_evidence
+from .models import TaskRecord
+from .planner import TaskExecutionPlan
+
+
+class TaskAttemptOrchestrator:
+    """Own the execution order inside one Task attempt."""
+
+    def __init__(
+        self,
+        *,
+        loaded: Any,
+        create_loop: Callable[[], Any],
+        make_provider_bundle_for_task: Callable[[Any, dict[str, Any]], Any],
+    ) -> None:
+        self.loaded = loaded
+        self.create_loop = create_loop
+        self.make_provider_bundle_for_task = make_provider_bundle_for_task
+
+    async def run(
+        self,
+        *,
+        message: str,
+        runner: Any,
+        kwargs: dict[str, Any],
+        task: TaskRecord,
+    ) -> AgentRunResult:
+        task_service = self._require_loaded(self.loaded, "task_service")
+        task_execution_planner = self._require_loaded(self.loaded, "task_execution_planner")
+        session_manager = self._require_loaded(self.loaded, "session_manager")
+
+        base_execution_context = kwargs.get("execution_context")
+        prompt_locale = kwargs.get("prompt_locale") or task.metadata.get("prompt_locale")
+        output_language_instruction = self._output_language_instruction(prompt_locale)
+        provider_bundle = kwargs.get("provider_bundle") or self.make_provider_bundle_for_task(self.loaded, kwargs)
+        kwargs = dict(kwargs)
+        team_provider_bundle_factory = kwargs.pop("team_provider_bundle_factory", None)
+        kwargs["provider_bundle"] = provider_bundle
+
+        attempt_index = int(task.metadata.get("latest_attempt_index") or 0) + 1
+        task_service.start_run(task.task_id, user_message=message, attempt_index=attempt_index)
+        pre_skill_context = self._build_skill_selection_context(
+            task=task,
+            user_message=message,
+            attempt_index=attempt_index,
+        )
+        preselected_skills, pre_skill_latency_ms = await self._assemble_task_attempt_skills(
+            task_description=pre_skill_context,
+            provider_bundle=provider_bundle,
+            thinking_enabled=kwargs.get("thinking_enabled"),
+            include_skill_assembly=bool(kwargs.get("include_skill_assembly", True)),
+            pinned_skill_contexts=kwargs.get("pinned_skill_contexts"),
+        )
+        if pre_skill_latency_ms:
+            kwargs["pre_run_latency_ms"] = self._merge_latency_ms(
+                kwargs.get("pre_run_latency_ms"),
+                {"pre_skill_assembly_ms": pre_skill_latency_ms},
+            )
+        plan = await task_execution_planner.plan(
+            task=task,
+            user_message=message,
+            attempt_index=attempt_index,
+            provider_bundle=provider_bundle,
+            skill_summaries=self._skill_summaries_for_planner(preselected_skills),
+            tool_hints=self._tool_hints_for_skills(preselected_skills),
+            activated_skills=preselected_skills,
+        )
+        self._append_task_observation(
+            session_manager,
+            task.session_id,
+            event_type="task_execution_planned",
+            payload={
+                "task_id": task.task_id,
+                "attempt_index": attempt_index,
+                **plan.to_event_payload(),
+            },
+        )
+        team_summaries: list[str] = []
+        team_execution_context = ""
+        team_result: TeamRunResult | None = None
+        if plan.is_team:
+            team_result, team_error = await self._run_team_for_task(
+                plan,
+                task=task,
+                parent_session_id=kwargs["session_id"],
+                provider_bundle_factory=team_provider_bundle_factory
+                or self._build_team_provider_bundle_factory(kwargs),
+            )
+            if team_result is not None:
+                team_summaries = [self._team_summary_for_validation(team_result)]
+                team_packet = TaskEvidencePacket(
+                    task_id=task.task_id,
+                    attempt_index=attempt_index,
+                    main_run=None,
+                    team_runs=self._team_run_evidence(team_result),
+                    team_node_results=list(team_result.node_results),
+                    final_output="",
+                )
+                team_execution_context = self._join_context(
+                    self._team_execution_context(plan, team_result),
+                    "Rendered team evidence:\n" + render_task_evidence(team_packet),
+                )
+                self._append_task_observation(
+                    session_manager,
+                    task.session_id,
+                    event_type="task_team_run_completed" if team_result.success else "task_team_run_failed",
+                    payload={
+                        "task_id": task.task_id,
+                        "attempt_index": attempt_index,
+                        "plan_mode": plan.mode,
+                        "strategy": plan.graph.strategy if plan.graph else None,
+                        "node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
+                        "team_run_ids": team_result.run_ids,
+                        "team_success": team_result.success,
+                        "node_results": self._team_node_results_for_event(plan, team_result),
+                        "reason": plan.reason,
+                        "error": None if team_result.success else "one or more team nodes failed",
+                    },
+                )
+            else:
+                team_summaries = [f"Team execution failed: {team_error}"]
+                team_execution_context = self._failed_team_execution_context(plan, team_error or "unknown error")
+                self._append_task_observation(
+                    session_manager,
+                    task.session_id,
+                    event_type="task_team_run_failed",
+                    payload={
+                        "task_id": task.task_id,
+                        "attempt_index": attempt_index,
+                        "plan_mode": plan.mode,
+                        "strategy": plan.graph.strategy if plan.graph else None,
+                        "node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
+                        "team_run_ids": [],
+                        "team_success": False,
+                        "reason": plan.reason,
+                        "error": team_error,
+                    },
+                )
+
+        outcome_context, incomplete_prefix, outcome_metadata = self._team_synthesis_outcome(
+            plan,
+            team_result,
+            prompt_locale=prompt_locale,
+        )
+        if plan.is_team:
+            team_execution_context = self._join_context(outcome_context, team_execution_context)
+
+        attempt_kwargs = dict(kwargs)
+        attempt_kwargs.update(
+            {
+                "task_id": task.task_id,
+                "task_mode": True,
+                "attempt_index": attempt_index,
+                "allow_candidate_generation": False,
+                "pinned_skill_contexts": preselected_skills,
+                "include_skill_assembly": False,
+            }
+        )
+        attempt_kwargs["execution_context"] = self._join_context(
+            base_execution_context,
+            output_language_instruction,
+            team_execution_context,
+        )
+        if plan.is_team and team_execution_context:
+            attempt_kwargs["include_tools"] = False
+            attempt_kwargs["max_tool_iterations"] = 0
+        attempt_kwargs["skill_selection_context"] = self._build_skill_selection_context(
+            task=task,
+            user_message=message,
+            attempt_index=attempt_index,
+            plan=plan,
+            team_summaries=team_summaries,
+        )
+
+        result = await runner(message, **attempt_kwargs)
+        if outcome_metadata["task_outcome"] == "incomplete":
+            result.output_text = self._apply_incomplete_prefix(result.output_text, incomplete_prefix)
+        self._append_task_observation(
+            session_manager,
+            task.session_id,
+            event_type="task_synthesis_completed",
+            payload={
+                "task_id": task.task_id,
+                "attempt_index": attempt_index,
+                "main_run_id": result.run_id,
+                "plan_mode": plan.mode,
+                "strategy": plan.graph.strategy if plan.graph else None,
+                **outcome_metadata,
+            },
+        )
+        task = task_service.append_run(
+            task.task_id,
+            result.run_id,
+            skill_names=self._skill_names_for_run(result.run_id),
+        )
+        evidence_packet = self._build_task_evidence_packet(
+            session_manager=session_manager,
+            task=task,
+            attempt_index=attempt_index,
+            result=result,
+            team_result=team_result,
+        )
+        evidence_text = render_task_evidence(evidence_packet)
+        evidence_debug = {
+            "evidence_run_ids": [
+                item.run_id for item in [evidence_packet.main_run, *evidence_packet.team_runs] if item is not None
+            ],
+            "evidence_session_ids": [
+                item.session_id
+                for item in [evidence_packet.main_run, *evidence_packet.team_runs]
+                if item is not None
+            ],
+            "tool_result_count": sum(
+                len(item.tool_results)
+                for item in [evidence_packet.main_run, *evidence_packet.team_runs]
+                if item is not None
+            ),
+            "evidence_length": len(evidence_text),
+        }
+        session_manager.update_latest_assistant_event_payload(
+            result.session_id,
+            result.run_id,
+            {
+                "task_id": task.task_id,
+                "task_status": task.status,
+                "evidence_status": "recorded",
+            },
+        )
+        session_manager.append_message(
+            result.session_id,
+            run_id=result.run_id,
+            role="system",
+            event_type="task_evidence_recorded",
+            event_payload={
+                "task_id": task.task_id,
+                "attempt_index": attempt_index,
+                "evidence_debug": evidence_debug,
+            },
+            content=None,
+            context_visible=False,
+        )
+        result.task_id = task.task_id
+        result.task_status = task.status
+        result.validation_result = None
+        return result
+
+    async def _run_team_for_task(
+        self,
+        plan: TaskExecutionPlan,
+        *,
+        task: TaskRecord,
+        parent_session_id: str,
+        provider_bundle_factory: Any,
+    ) -> tuple[TeamRunResult | None, str | None]:
+        if plan.graph is None:
+            return None, "team plan did not include an execution graph"
+        try:
+            from beaver.services.team_service import TeamService
+
+            result = await TeamService(self.create_loop()).run_team(
+                plan.graph,
+                parent_task_id=task.task_id,
+                parent_session_id=parent_session_id,
+                parent_run_id=None,
+                provider_bundle_factory=provider_bundle_factory,
+                allow_candidate_generation=False,
+            )
+            return result, None
+        except Exception as exc:
+            return None, str(exc)
+
+    async def _assemble_task_attempt_skills(
+        self,
+        *,
+        task_description: str,
+        provider_bundle: Any,
+        thinking_enabled: bool | None,
+        include_skill_assembly: bool,
+        pinned_skill_contexts: Any,
+    ) -> tuple[list[SkillContext], float]:
+        started = perf_counter()
+        selected = self._coerce_skill_contexts(pinned_skill_contexts)
+        if include_skill_assembly:
+            skill_assembler = self._require_loaded(self.loaded, "skill_assembler")
+            runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
+            assembled = await skill_assembler.assemble(
+                task_description=task_description,
+                provider=provider_bundle.auxiliary_provider or provider_bundle.main_provider,
+                model=getattr(runtime, "model", None),
+                embedding_runtime=getattr(provider_bundle, "embedding_runtime", None),
+                thinking_enabled=thinking_enabled,
+            )
+            selected = self._merge_skill_contexts(
+                selected,
+                list(getattr(assembled, "activated_skills", []) or []),
+            )
+        return selected, (perf_counter() - started) * 1000
+
+    @staticmethod
+    def _coerce_skill_contexts(value: Any) -> list[SkillContext]:
+        if not isinstance(value, list):
+            return []
+        return [item for item in value if isinstance(item, SkillContext)]
+
+    @staticmethod
+    def _merge_skill_contexts(left: list[SkillContext], right: list[SkillContext]) -> list[SkillContext]:
+        merged: list[SkillContext] = []
+        seen: set[str] = set()
+        for skill in [*left, *right]:
+            if skill.name in seen:
+                continue
+            seen.add(skill.name)
+            merged.append(skill)
+        return merged
+
+    @staticmethod
+    def _skill_summaries_for_planner(skills: list[SkillContext]) -> list[str]:
+        summaries: list[str] = []
+        for skill in skills:
+            content = " ".join((skill.content or "").split())
+            if len(content) > 240:
+                content = content[:237].rstrip() + "..."
+            summaries.append(f"{skill.name}: {content}" if content else skill.name)
+        return summaries
+
+    @staticmethod
+    def _tool_hints_for_skills(skills: list[SkillContext]) -> list[str]:
+        result: list[str] = []
+        for skill in skills:
+            for hint in skill.tool_hints:
+                if hint and hint not in result:
+                    result.append(hint)
+        return result
+
+    @staticmethod
+    def _require_loaded(loaded: Any, field_name: str) -> Any:
+        value = getattr(loaded, field_name)
+        if value is None:
+            raise RuntimeError(f"Engine loader did not provide required dependency {field_name!r}")
+        return value
+
+    @staticmethod
+    def _merge_latency_ms(current: Any, updates: dict[str, float]) -> dict[str, float]:
+        merged: dict[str, float] = {}
+        if isinstance(current, dict):
+            for key, value in current.items():
+                if isinstance(value, (int, float)):
+                    merged[str(key)] = float(value)
+        for key, value in updates.items():
+            merged[key] = merged.get(key, 0.0) + float(value)
+        return merged
+
+    @staticmethod
+    def _output_language_instruction(prompt_locale: str | None) -> str:
+        locale = normalize_main_agent_prompt_locale(prompt_locale)
+        if locale == "en":
+            return (
+                "Output language: English. Use English for user-facing task titles, summaries, plans, "
+                "and final answers unless the user explicitly requests another language."
+            )
+        if locale == "zh-Hant":
+            return (
+                "輸出語言：繁體中文。除非使用者明確要求其他語言，所有面向使用者的任務標題、摘要、"
+                "計劃與最終回答都使用繁體中文。"
+            )
+        return (
+            "输出语言：简体中文。除非用户明确要求其他语言，所有面向用户的任务标题、摘要、"
+            "计划与最终回答都使用简体中文。"
+        )
+
+    def _skill_names_for_run(self, run_id: str) -> list[str]:
+        store = getattr(self.loaded, "run_memory_store", None)
+        if store is None:
+            return []
+        for record in store.list_runs():
+            if record.run_id == run_id:
+                return [receipt.skill_name for receipt in record.activated_skills]
+        return []
+
+    @staticmethod
+    def _build_skill_selection_context(
+        *,
+        task: TaskRecord,
+        user_message: str,
+        attempt_index: int,
+        plan: TaskExecutionPlan | None = None,
+        team_summaries: list[str] | None = None,
+    ) -> str:
+        phase = f"attempt_{attempt_index}"
+        if task.feedback and task.feedback[-1].get("acceptance_type") == "revise":
+            phase = f"revision_attempt_{attempt_index}"
+        elif plan is not None and plan.is_team:
+            phase = f"team_synthesis_attempt_{attempt_index}"
+
+        sections = [
+            f"Task goal:\n{task.goal or task.description}",
+            f"Task description:\n{task.description}",
+            f"Current user request:\n{user_message}",
+            f"Execution phase:\n{phase}",
+            f"Task status:\n{task.status}",
+        ]
+        if task.constraints:
+            sections.append("Known constraints:\n" + "\n".join(f"- {item}" for item in task.constraints))
+        if task.skill_names:
+            sections.append(
+                "Previously activated skills (reuse bias, not pinned):\n"
+                + "\n".join(f"- {item}" for item in task.skill_names)
+            )
+        else:
+            sections.append("Previously activated skills:\nNone")
+        if task.feedback:
+            history_lines = []
+            for item in task.feedback[-5:]:
+                kind = item.get("acceptance_type") or item.get("feedback_type")
+                comment = item.get("comment") or ""
+                run_id = item.get("run_id") or ""
+                history_lines.append(f"- {kind} run={run_id}: {comment}".strip())
+            sections.append("Task acceptance history:\n" + "\n".join(history_lines))
+        if plan is not None:
+            plan_lines = [
+                f"mode: {plan.mode}",
+                f"reason: {plan.reason}",
+            ]
+            if plan.final_synthesis_instruction:
+                plan_lines.append(f"final synthesis instruction: {plan.final_synthesis_instruction}")
+            if plan.graph is not None:
+                plan_lines.append(f"strategy: {plan.graph.strategy}")
+                plan_lines.append(
+                    "nodes:\n"
+                    + "\n".join(
+                        f"- {node.node_id}: {node.task}"
+                        for node in plan.graph.nodes
+                    )
+                )
+            sections.append("Execution plan:\n" + "\n".join(plan_lines))
+        if team_summaries:
+            sections.append("Team execution summaries:\n" + "\n\n".join(team_summaries)[:2400])
+        sections.append(
+            "Skill selection instruction:\n"
+            "Prefer reusing previously activated skills when they still match the Task. "
+            "Select new skills only if the current request, revision, or execution plan needs a different capability. "
+            "If no published skill matches, return [] and let the run continue without skills."
+        )
+        return "\n\n".join(section for section in sections if section.strip())
+
+    @staticmethod
+    def _append_task_observation(
+        session_manager: Any,
+        session_id: str,
+        *,
+        event_type: str,
+        payload: dict[str, Any],
+    ) -> None:
+        session_manager.append_message(
+            session_id,
+            role="system",
+            event_type=event_type,
+            event_payload=payload,
+            content=payload.get("reason") or payload.get("error"),
+            context_visible=False,
+        )
+
+    @staticmethod
+    def _join_context(*parts: str | None) -> str:
+        return "\n\n".join(part.strip() for part in parts if part and part.strip())
+
+    @staticmethod
+    def _team_summary_for_validation(result: TeamRunResult) -> str:
+        lines = [
+            f"success={result.success}",
+            f"task_id={result.task_id or ''}",
+            "summary:",
+            result.summary,
+            "nodes:",
+        ]
+        for node in result.node_results:
+            lines.append(
+                f"- {node.node_id}: success={node.success} finish_reason={node.finish_reason} "
+                f"error={node.error or ''} output={node.output_text[:500]}"
+            )
+        return "\n".join(lines)
+
+    @staticmethod
+    def _team_node_results_for_event(plan: TaskExecutionPlan, result: TeamRunResult) -> list[dict[str, Any]]:
+        nodes = {node.node_id: node for node in plan.graph.nodes} if plan.graph else {}
+        payloads: list[dict[str, Any]] = []
+        for item in result.node_results:
+            payload = item.to_dict()
+            node = nodes.get(item.node_id)
+            if node is not None:
+                payload["selected_skill_names"] = list(node.inherited_pinned_skills)
+                payload["ephemeral_skill_names"] = [
+                    skill.name for skill in node.inherited_pinned_skill_contexts
+                ]
+                payload["skill_query"] = node.agent.metadata.get("skill_query")
+                payload["ephemeral_guidance_id"] = node.agent.metadata.get("ephemeral_guidance_id")
+                payload["ephemeral_guidance_name"] = node.agent.metadata.get("ephemeral_guidance_name")
+                payload["ephemeral_used"] = bool(node.inherited_pinned_skill_contexts)
+            payloads.append(payload)
+        return payloads
+
+    @staticmethod
+    def _team_run_evidence(result: TeamRunResult | None) -> list[RunEvidence]:
+        if result is None:
+            return []
+        return [node.evidence for node in result.node_results if node.evidence is not None]
+
+    @staticmethod
+    def _team_synthesis_outcome(
+        plan: TaskExecutionPlan,
+        result: TeamRunResult | None,
+        *,
+        prompt_locale: str | None = None,
+    ) -> tuple[str, str, dict[str, Any]]:
+        if not plan.is_team or plan.graph is None:
+            metadata = {
+                "task_outcome": "single",
+                "incomplete_node_ids": [],
+                "node_statuses": {},
+                "evidence_gaps": {},
+            }
+            return "Task outcome: single", "", metadata
+
+        result_by_node = {
+            item.node_id: item
+            for item in (result.node_results if result is not None else [])
+        }
+        node_statuses: dict[str, str] = {}
+        evidence_gaps: dict[str, list[str]] = {}
+        incomplete_node_ids: list[str] = []
+        detail_lines: list[str] = []
+        successful_lines: list[str] = []
+        for node in plan.graph.nodes:
+            node_result = result_by_node.get(node.node_id)
+            status = node_result.completion_status if node_result is not None else "not_run"
+            node_statuses[node.node_id] = status
+            gaps = list(node_result.evidence_gaps) if node_result is not None else []
+            if gaps:
+                evidence_gaps[node.node_id] = gaps
+            if node.required_for_completion and status != "succeeded":
+                incomplete_node_ids.append(node.node_id)
+                detail_lines.append(
+                    f"- {node.node_id}: status={status}, "
+                    f"finish_reason={node_result.finish_reason if node_result is not None else 'not_run'}, "
+                    f"error={(node_result.error or '') if node_result is not None else 'node did not run'}, "
+                    f"evidence_gaps={gaps}"
+                )
+            elif node_result is not None and status == "succeeded":
+                successful_lines.append(f"- {node.node_id}: {node_result.output_text[:1000]}")
+
+        task_outcome = "incomplete" if incomplete_node_ids else "complete"
+        metadata = {
+            "task_outcome": task_outcome,
+            "incomplete_node_ids": incomplete_node_ids,
+            "node_statuses": node_statuses,
+            "evidence_gaps": evidence_gaps,
+        }
+        context_parts = [
+            f"Task outcome: {task_outcome}",
+            "Incomplete node IDs: " + (", ".join(incomplete_node_ids) or "none"),
+        ]
+        if detail_lines:
+            context_parts.append("Incomplete required node details:\n" + "\n".join(detail_lines))
+        if successful_lines:
+            context_parts.append("Available successful node evidence:\n" + "\n".join(successful_lines))
+        if task_outcome == "incomplete":
+            context_parts.append(
+                "Synthesis requirement: produce a partial report from available evidence and explicitly state "
+                "that the task is incomplete, partially completed, or missing required evidence."
+            )
+        prefix = TaskAttemptOrchestrator._incomplete_prefix(prompt_locale) if incomplete_node_ids else ""
+        return "\n\n".join(context_parts), prefix, metadata
+
+    @staticmethod
+    def _incomplete_prefix(prompt_locale: str | None) -> str:
+        locale = normalize_main_agent_prompt_locale(prompt_locale)
+        if locale == "en":
+            return "Task incomplete: some required steps failed or lack required evidence. The report below uses available results only.\n\n"
+        if locale == "zh-Hant":
+            return "任務未完成：部分必要步驟失敗或缺少必要證據。以下內容僅基於現有結果。\n\n"
+        return "任务未完成：部分必要步骤失败或缺少必要证据。以下内容仅基于现有结果。\n\n"
+
+    @staticmethod
+    def _apply_incomplete_prefix(output_text: str, prefix: str) -> str:
+        normalized = output_text.lower()
+        notices = (
+            "任务未完成",
+            "任務未完成",
+            "部分完成",
+            "缺少证据",
+            "缺少證據",
+            "task incomplete",
+            "incomplete task",
+            "partially complete",
+            "missing evidence",
+        )
+        if any(notice in normalized for notice in notices):
+            return output_text
+        return prefix + output_text.lstrip()
+
+    def _build_task_evidence_packet(
+        self,
+        *,
+        session_manager: Any,
+        task: TaskRecord,
+        attempt_index: int,
+        result: AgentRunResult,
+        team_result: TeamRunResult | None,
+    ) -> TaskEvidencePacket:
+        main_run = EvidenceBuilder(session_manager).build_run_evidence(
+            result.session_id,
+            result.run_id,
+            result.output_text,
+            result.finish_reason,
+        )
+        return TaskEvidencePacket(
+            task_id=task.task_id,
+            attempt_index=attempt_index,
+            main_run=main_run,
+            team_runs=self._team_run_evidence(team_result),
+            team_node_results=list(team_result.node_results) if team_result is not None else [],
+            final_output=result.output_text,
+        )
+
+    @staticmethod
+    def _team_execution_context(plan: TaskExecutionPlan, result: TeamRunResult) -> str:
+        node_lines = [
+            (
+                f"- {node.node_id}: success={node.success}, finish_reason={node.finish_reason}, "
+                f"run_id={node.run_id or ''}, error={node.error or ''}\n{node.output_text}"
+            )
+            for node in result.node_results
+        ]
+        return "\n\n".join(
+            item
+            for item in [
+                "Task team execution result:",
+                f"Planner reason: {plan.reason}",
+                f"Strategy: {plan.graph.strategy if plan.graph else ''}",
+                f"Team success: {result.success}",
+                f"Team summary:\n{result.summary}",
+                "Node results:\n" + "\n\n".join(node_lines),
+                (
+                    "Final synthesis instruction:\n" + plan.final_synthesis_instruction
+                    if plan.final_synthesis_instruction
+                    else None
+                ),
+                (
+                    "Use successful team outputs as internal evidence. If one or more nodes failed, "
+                    "do not blindly repeat failed tool calls. Produce a user-visible fallback answer "
+                    "with available evidence and clearly state any missing or uncertain data."
+                ),
+            ]
+            if item
+        )
+
+    @staticmethod
+    def _failed_team_execution_context(plan: TaskExecutionPlan, error: str) -> str:
+        return "\n\n".join(
+            [
+                "Task team execution failed before final synthesis.",
+                f"Planner reason: {plan.reason}",
+                f"Strategy: {plan.graph.strategy if plan.graph else ''}",
+                f"Error: {error}",
+                (
+                    "Proceed as the main agent. Do not blindly repeat failed tool calls; "
+                    "produce a user-visible fallback answer with available evidence and clearly "
+                    "state any missing or uncertain data."
+                ),
+            ]
+        )
+
+    def _build_team_provider_bundle_factory(self, kwargs: dict[str, Any]) -> Any:
+        def factory(node: ExecutionNode) -> Any:
+            node_kwargs = dict(kwargs)
+            node_kwargs.pop("provider_bundle", None)
+            if node.agent.model:
+                node_kwargs["model"] = node.agent.model
+            if node.agent.provider_name:
+                node_kwargs["provider_name"] = node.agent.provider_name
+            return self.make_provider_bundle_for_task(self.loaded, node_kwargs)
+
+        return factory
--- a/app-instance/backend/beaver/tasks/evidence.py
+++ b/app-instance/backend/beaver/tasks/evidence.py
@ -2,6 +2,8 @@

 from __future__ import annotations

+import json
+import re
 from dataclasses import dataclass, field
 from typing import Any

@ -126,6 +128,37 @@ class EvidenceBuilder:
        )


+def evaluate_node_evidence(
+    evidence: RunEvidence,
+    required_evidence: list[str],
+    output_text: str,
+) -> list[str]:
+    """Evaluate v1 coarse-grained node evidence requirements."""
+
+    gaps: list[str] = []
+    successful_tools = [
+        item
+        for item in evidence.tool_results
+        if item.event_payload.get("success") is True
+    ]
+    for raw_requirement in required_evidence:
+        requirement = str(raw_requirement).strip()
+        if not requirement:
+            continue
+        if requirement == "tool_result":
+            if not successful_tools:
+                _append_unique(gaps, "missing required evidence: tool_result")
+        elif requirement == "url":
+            if not any(_tool_evidence_contains_url(item) for item in successful_tools):
+                _append_unique(gaps, "missing required evidence: url")
+        elif requirement == "output":
+            if not output_text.strip():
+                _append_unique(gaps, "missing required evidence: output")
+        else:
+            _append_unique(gaps, f"unsupported evidence requirement: {requirement}")
+    return gaps
+
+
 def render_task_evidence(packet: TaskEvidencePacket) -> str:
    sections = [
        f"Task evidence packet: task_id={packet.task_id} attempt={packet.attempt_index}",
@ -181,3 +214,20 @@ def _render_tool_evidence(item: ToolEvidence) -> str:

 def _optional_str(value: Any) -> str | None:
    return str(value) if value is not None else None
+
+
+_URL_RE = re.compile(r"https?://[^\s<>'\"]+", re.IGNORECASE)
+
+
+def _tool_evidence_contains_url(item: ToolEvidence) -> bool:
+    values = [
+        item.url or "",
+        item.content,
+        json.dumps(item.event_payload, ensure_ascii=False, default=str),
+    ]
+    return any(_URL_RE.search(value) is not None for value in values)
+
+
+def _append_unique(values: list[str], value: str) -> None:
+    if value not in values:
+        values.append(value)
--- a/app-instance/backend/beaver/tasks/planner.py
+++ b/app-instance/backend/beaver/tasks/planner.py
@ -4,11 +4,14 @@ from __future__ import annotations

 import asyncio
 import json
+import os
 from dataclasses import dataclass, field
 from typing import Any, Literal

 from beaver.coordinator.models import AgentDescriptor, ExecutionGraph, ExecutionNode
+from beaver.engine.context import SkillContext
 from beaver.engine.providers import ProviderBundle
+from beaver.tools.registry import ToolRegistry

 from .models import TaskRecord
 from .skill_resolver import SkillResolutionReport, TaskSkillResolver
@ -17,6 +20,24 @@ from .skill_resolver import SkillResolutionReport, TaskSkillResolver
 TaskExecutionMode = Literal["single", "team"]


+# Temporary name-based denylist until high-risk tool approval is implemented.
+# Keep this policy centralized so planner behavior cannot drift by call site.
+HIGH_RISK_PLANNER_TOOL_NAMES = frozenset(
+    {
+        "delete_file",
+        "execute_command",
+        "external_send",
+        "send_email",
+        "terminal",
+        "write_file",
+    }
+)
+
+
+def _agent_team_enabled() -> bool:
+    return os.getenv("BEAVER_AGENT_TEAM_ENABLED", "1").strip().lower() not in {"0", "false", "no", "off"}
+
+
@dataclass(slots=True)
 class TaskExecutionPlan:
    mode: TaskExecutionMode
@ -25,14 +46,26 @@ class TaskExecutionPlan:
    final_synthesis_instruction: str = ""
    fallback_error: str | None = None
    skill_resolution_report: list[SkillResolutionReport] = field(default_factory=list)
+    planner_adaptation: dict[str, Any] = field(default_factory=dict)

    @property
    def is_team(self) -> bool:
        return self.mode == "team" and self.graph is not None

    @classmethod
-    def single(cls, reason: str, *, fallback_error: str | None = None) -> "TaskExecutionPlan":
-        return cls(mode="single", reason=reason, fallback_error=fallback_error)
+    def single(
+        cls,
+        reason: str,
+        *,
+        fallback_error: str | None = None,
+        planner_adaptation: dict[str, Any] | None = None,
+    ) -> "TaskExecutionPlan":
+        return cls(
+            mode="single",
+            reason=reason,
+            fallback_error=fallback_error,
+            planner_adaptation=dict(planner_adaptation or {}),
+        )

    def to_event_payload(self) -> dict[str, Any]:
        strategy = self.graph.strategy if self.graph is not None else None
@ -57,6 +90,7 @@ class TaskExecutionPlan:
                if item.ephemeral_guidance_id
            ],
            "skill_resolution_report": [item.to_dict() for item in self.skill_resolution_report],
+            "planner_adaptation": dict(self.planner_adaptation),
            "fallback_error": self.fallback_error,
        }

@ -65,10 +99,34 @@ class TaskExecutionPlanner:
    """Plan whether a Task attempt should run through a team first."""

    _MAX_NODES = 6
+    _MAX_DEPTH = 4
    _SUPPORTED_STRATEGIES = {"sequence", "parallel", "dag"}
+    _ALLOWED_NODE_FIELDS = {
+        "node_id",
+        "task",
+        "use_skill",
+        "skill_query",
+        "depends_on",
+        "input_contract",
+        "output_contract",
+        "requested_tools",
+        "required_evidence",
+        "evidence_contract",
+        "validation_rules",
+        "required_for_completion",
+        "block_downstream_on_partial",
+        "max_tool_iterations",
+        "constraints",
+    }

-    def __init__(self, *, task_skill_resolver: TaskSkillResolver | None = None) -> None:
+    def __init__(
+        self,
+        *,
+        task_skill_resolver: TaskSkillResolver | None = None,
+        tool_registry: ToolRegistry | None = None,
+    ) -> None:
        self.task_skill_resolver = task_skill_resolver
+        self.tool_registry = tool_registry

    async def plan(
        self,
@ -78,7 +136,15 @@ class TaskExecutionPlanner:
        attempt_index: int,
        provider_bundle: ProviderBundle | None = None,
        timeout_seconds: float = 30.0,
+        skill_summaries: list[str] | None = None,
+        tool_hints: list[str] | None = None,
+        activated_skills: list[SkillContext] | None = None,
    ) -> TaskExecutionPlan:
+        if not _agent_team_enabled():
+            return TaskExecutionPlan.single("planner_disabled_by_environment")
+        if not self._needs_team_planning(task=task, user_message=user_message):
+            return TaskExecutionPlan.single("planner_skipped_simple_task")
+
        provider = None
        model = None
        if provider_bundle is not None:
@ -87,6 +153,7 @@ class TaskExecutionPlanner:
            model = getattr(runtime, "model", None)
        if provider is None:
            return TaskExecutionPlan.single("planner_provider_unavailable")
+        selected_template, base_adaptation = self._select_team_template(activated_skills or [])
        try:
            response = await asyncio.wait_for(
                provider.chat(
@ -104,6 +171,10 @@ class TaskExecutionPlanner:
                                task=task,
                                user_message=user_message,
                                attempt_index=attempt_index,
+                                skill_summaries=skill_summaries or [],
+                                tool_hints=tool_hints or [],
+                                activated_skills=activated_skills or [],
+                                selected_template=selected_template,
                            ),
                        },
                    ],
@ -114,7 +185,40 @@ class TaskExecutionPlanner:
                ),
                timeout=timeout_seconds,
            )
-            plan = self.from_json(response.content or "")
+            try:
+                plan = self._from_json_or_raise(response.content or "")
+            except Exception as first_error:
+                repair_response = await asyncio.wait_for(
+                    provider.chat(
+                        messages=[
+                            {
+                                "role": "system",
+                                "content": "Repair invalid Beaver task planner JSON. Return only one compact JSON object.",
+                            },
+                            {
+                                "role": "user",
+                                "content": (
+                                    "Repair the invalid planner JSON using the task-only schema from the original "
+                                    f"request. Validation error: {first_error}\nInvalid output:\n{response.content or ''}"
+                                ),
+                            },
+                        ],
+                        tools=None,
+                        model=model,
+                        max_tokens=4096,
+                        temperature=0.0,
+                    ),
+                    timeout=timeout_seconds,
+                )
+                try:
+                    plan = self._from_json_or_raise(repair_response.content or "")
+                except Exception as repair_error:
+                    return TaskExecutionPlan.single(
+                        "planner_fallback_single",
+                        fallback_error=f"initial validation: {first_error}; repair validation: {repair_error}",
+                        planner_adaptation=base_adaptation,
+                    )
+            self._merge_adaptation(plan, base_adaptation)
            return await self._resolve_plan(
                plan,
                task=task,
@ -152,30 +256,90 @@ class TaskExecutionPlanner:
            graph.validate()
            plan.graph = graph
            plan.skill_resolution_report = reports
+            self._merge_skill_resolution_adaptation(plan, reports)
            return plan
        except Exception as exc:
            return TaskExecutionPlan.single("planner_fallback_single", fallback_error=f"task_skill_resolver_failed: {exc}")

+    @staticmethod
+    def _needs_team_planning(*, task: TaskRecord, user_message: str) -> bool:
+        text = " ".join(
+            part
+            for part in (
+                task.goal,
+                task.description,
+                user_message,
+            )
+            if part
+        ).lower()
+        if not text.strip():
+            return False
+
+        complex_markers = (
+            "agent team",
+            "sub-agent",
+            "multi-agent",
+            "parallel",
+            "dag",
+            "workflow",
+            "review",
+            "research",
+            "compare",
+            "comparison",
+            "architecture",
+            "refactor",
+            "multi-file",
+            "end-to-end",
+            "并行",
+            "团队",
+            "多智能体",
+            "子代理",
+            "工作流",
+            "评审",
+            "审查",
+            "调研",
+            "研究",
+            "对比",
+            "架构",
+            "重构",
+            "多文件",
+            "端到端",
+        )
+        return any(marker in text for marker in complex_markers)
+
    def from_json(self, text: str) -> TaskExecutionPlan:
        try:
-            payload = self._parse_json_object(text)
-            mode = str(payload.get("mode") or "single").strip().lower()
-            reason = str(payload.get("reason") or "")
-            if mode != "team":
-                return TaskExecutionPlan.single(reason or "planner_selected_single")
-
-            graph = self._graph_from_payload(payload)
-            graph.validate()
-            return TaskExecutionPlan(
-                mode="team",
-                reason=reason or "planner_selected_team",
-                graph=graph,
-                final_synthesis_instruction=str(payload.get("final_synthesis_instruction") or ""),
-            )
+            return self._from_json_or_raise(text)
        except Exception as exc:
            return TaskExecutionPlan.single("planner_fallback_single", fallback_error=str(exc))

-    def _graph_from_payload(self, payload: dict[str, Any]) -> ExecutionGraph:
+    def _from_json_or_raise(self, text: str) -> TaskExecutionPlan:
+        payload = self._parse_json_object(text)
+        mode = str(payload.get("mode") or "single").strip().lower()
+        reason = str(payload.get("reason") or "")
+        adaptation = self._adaptation_from_payload(payload)
+        if mode != "team":
+            return TaskExecutionPlan.single(
+                reason or "planner_selected_single",
+                planner_adaptation=adaptation,
+            )
+
+        graph = self._graph_from_payload(payload, adaptation=adaptation)
+        graph.validate(max_depth=self._MAX_DEPTH)
+        return TaskExecutionPlan(
+            mode="team",
+            reason=reason or "planner_selected_team",
+            graph=graph,
+            final_synthesis_instruction=str(payload.get("final_synthesis_instruction") or ""),
+            planner_adaptation=adaptation,
+        )
+
+    def _graph_from_payload(
+        self,
+        payload: dict[str, Any],
+        *,
+        adaptation: dict[str, Any],
+    ) -> ExecutionGraph:
        strategy = str(payload.get("strategy") or "sequence").strip().lower()
        if strategy not in self._SUPPORTED_STRATEGIES:
            raise ValueError(f"Unsupported team strategy: {strategy}")
@ -189,16 +353,27 @@ class TaskExecutionPlanner:
        for index, item in enumerate(raw_nodes, start=1):
            if not isinstance(item, dict):
                raise ValueError("Each team node must be an object")
-            agent_payload = item.get("agent") if isinstance(item.get("agent"), dict) else {}
-            skill_query = str(item.get("skill_query") or agent_payload.get("skill_query") or item.get("task") or "").strip()
-            requested_capabilities = _string_list(
-                item.get("required_capabilities") or item.get("capabilities") or agent_payload.get("capabilities")
-            )
-            requested_tags = _string_list(item.get("tags") or agent_payload.get("tags"))
-            node_id = str(item.get("node_id") or item.get("id") or agent_payload.get("name") or f"node_{index}").strip()
+            unsupported = sorted(set(item) - self._ALLOWED_NODE_FIELDS)
+            if unsupported:
+                raise ValueError(f"Unsupported team node field(s): {', '.join(unsupported)}")
+            node_id = str(item.get("node_id") or f"node_{index}").strip()
            task = str(item.get("task") or "").strip()
            if not node_id or not task:
-                raise ValueError("Each team node requires node_id/id and task")
+                raise ValueError("Each team node requires node_id and task")
+            allowed_tool_names = self._resolve_requested_tools(
+                item.get("requested_tools"),
+                warnings=adaptation["warnings"],
+            )
+            use_skill = _optional_str(item.get("use_skill"))
+            skill_query = _optional_str(item.get("skill_query")) or task
+            if use_skill is not None or "skill_query" in item:
+                adaptation.setdefault("node_skill_bindings", []).append(
+                    {
+                        "node_id": node_id,
+                        "use_skill": use_skill,
+                        "skill_query": skill_query,
+                    }
+                )
            nodes.append(
                ExecutionNode(
                    node_id=node_id,
@ -208,30 +383,147 @@ class TaskExecutionPlanner:
                        role="",
                        system_prompt="",
                        metadata={
+                            "use_skill": use_skill,
                            "skill_query": skill_query,
-                            "required_capabilities": requested_capabilities,
-                            "requested_tags": requested_tags,
+                            "required_capabilities": [],
+                            "requested_tags": [],
                            "sub_agent_kind": "generic_skill_worker",
                        },
                    ),
                    depends_on=[str(dep) for dep in item.get("depends_on") or []],
-                    inherited_pinned_skills=[str(name) for name in item.get("pinned_skills") or []],
                    constraints=[str(value) for value in item.get("constraints") or []],
-                    expected_output=str(item.get("expected_output") or "") or None,
+                    input_contract=_dict_value(item.get("input_contract")),
+                    output_contract=_dict_value(item.get("output_contract")),
+                    allowed_tool_names=allowed_tool_names,
+                    required_evidence=_string_list(item.get("required_evidence")),
+                    evidence_contract=_dict_value(item.get("evidence_contract")),
+                    validation_rules=_string_list(item.get("validation_rules")),
+                    required_for_completion=bool(item.get("required_for_completion", True)),
+                    block_downstream_on_partial=bool(item.get("block_downstream_on_partial", False)),
+                    max_tool_iterations=_optional_int(item.get("max_tool_iterations")),
                )
            )
        return ExecutionGraph(strategy=strategy, nodes=nodes)  # type: ignore[arg-type]

+    def _resolve_requested_tools(self, value: Any, *, warnings: list[str]) -> list[str] | None:
+        if value is None:
+            return None
+        result: list[str] = []
+        for name in _string_list(value):
+            if name.lower() in HIGH_RISK_PLANNER_TOOL_NAMES:
+                _append_unique(warnings, f"requires_high_risk_review: {name}")
+                continue
+            if self.tool_registry is None or self.tool_registry.get(name) is None:
+                _append_unique(warnings, f"unknown tool removed: {name}")
+                continue
+            result.append(name)
+        return result
+
+    @staticmethod
+    def _adaptation_from_payload(payload: dict[str, Any]) -> dict[str, Any]:
+        raw = payload.get("adaptation")
+        adaptation = dict(raw) if isinstance(raw, dict) else {}
+        adaptation["warnings"] = _string_list(adaptation.get("warnings"))
+        return adaptation
+
+    @staticmethod
+    def _select_team_template(
+        activated_skills: list[SkillContext],
+    ) -> tuple[SkillContext | None, dict[str, Any]]:
+        candidates = [
+            skill
+            for skill in activated_skills
+            if isinstance(skill.team_template, dict) and isinstance(skill.team_template.get("nodes"), list)
+        ]
+        selected = candidates[0] if candidates else None
+        warnings: list[str] = []
+        for skill in activated_skills:
+            for warning in skill.team_template_warnings:
+                _append_unique(warnings, f"{skill.name}: {warning}")
+        return selected, {
+            "template_used": False,
+            "selected_template": selected.name if selected else None,
+            "selection_reason": (
+                "first activated skill with a valid team template"
+                if selected
+                else "no activated skill has a valid team template"
+            ),
+            "ignored_templates": [skill.name for skill in candidates[1:]],
+            "warnings": warnings,
+        }
+
+    @staticmethod
+    def _merge_adaptation(plan: TaskExecutionPlan, base: dict[str, Any]) -> None:
+        payload = dict(plan.planner_adaptation)
+        warnings: list[str] = []
+        for warning in [*base.get("warnings", []), *payload.get("warnings", [])]:
+            _append_unique(warnings, str(warning))
+        merged = {
+            "template_used": bool(payload.get("template_used", False)),
+            "selected_template": base.get("selected_template"),
+            "selection_reason": base.get("selection_reason"),
+            "ignored_templates": list(base.get("ignored_templates", [])),
+            "warnings": warnings,
+        }
+        if isinstance(payload.get("node_skill_bindings"), list):
+            merged["node_skill_bindings"] = [dict(item) for item in payload["node_skill_bindings"] if isinstance(item, dict)]
+        plan.planner_adaptation = merged
+
+    @staticmethod
+    def _merge_skill_resolution_adaptation(
+        plan: TaskExecutionPlan,
+        reports: list[SkillResolutionReport],
+    ) -> None:
+        warnings = plan.planner_adaptation.setdefault("warnings", [])
+        bindings = plan.planner_adaptation.get("node_skill_bindings")
+        binding_by_node = {
+            str(item.get("node_id")): item
+            for item in bindings or []
+            if isinstance(item, dict)
+        }
+        for report in reports:
+            for warning in report.warnings:
+                _append_unique(warnings, warning)
+            binding = binding_by_node.get(report.node_id)
+            if binding is not None and report.requested_skill_name and not report.exact_binding_used:
+                binding["fallback_reason"] = f"use_skill unresolved; {report.reason}"
+
    @staticmethod
    def _prompt(
        *,
        task: TaskRecord,
        user_message: str,
        attempt_index: int,
+        skill_summaries: list[str] | None = None,
+        tool_hints: list[str] | None = None,
+        activated_skills: list[SkillContext] | None = None,
+        selected_template: SkillContext | None = None,
    ) -> str:
        history_note = ""
        if task.feedback:
            history_note = "\nRelevant task history:\n" + json.dumps(task.feedback[-5:], ensure_ascii=False)
+        skill_note = ""
+        if skill_summaries:
+            skill_note = "\nActivated skill summaries:\n" + "\n".join(f"- {item}" for item in skill_summaries)
+        guidance_note = ""
+        if activated_skills:
+            guidance_note = "\nActivated Skill guidance:\n" + "\n".join(
+                f"[{skill.name}]\n{skill.content}" for skill in activated_skills
+            )
+        template_note = ""
+        if selected_template is not None:
+            template_note = "\nPrimary Skill team template:\n" + json.dumps(
+                {
+                    "skill_name": selected_template.name,
+                    "skill_version": selected_template.version,
+                    "template": selected_template.team_template,
+                },
+                ensure_ascii=False,
+                indent=2,
+            )
+        tool_note = ""
+        if tool_hints:
+            tool_note = "\nActivated skill tool hints:\n" + "\n".join(f"- {item}" for item in tool_hints)
        return (
            "Decide execution mode for this internal Task attempt.\n"
            "Use mode=team only when independent research, review, implementation slices, or staged checks "
@ -241,13 +533,24 @@ class TaskExecutionPlanner:
            '  "mode": "single" | "team",\n'
            '  "reason": "short reason",\n'
            '  "strategy": "sequence" | "parallel" | "dag",\n'
-            '  "nodes": [{"node_id": "api_review", "task": "...", "skill_query": "API contract review", '
-            '"required_capabilities": ["schema compatibility"], "depends_on": []}],\n'
+            '  "nodes": [{"node_id": "collect", "task": "...", "use_skill": "optional exact skill", '
+            '"skill_query": "optional dynamic skill query", "depends_on": [], '
+            '"input_contract": {}, "output_contract": {}, "requested_tools": [], '
+            '"required_evidence": [], "evidence_contract": {}, "validation_rules": [], '
+            '"required_for_completion": true, "block_downstream_on_partial": false, '
+            '"max_tool_iterations": 3, "constraints": []}],\n'
+            '  "adaptation": {"template_used": true, "warnings": []},\n'
            '  "final_synthesis_instruction": "how the main agent should synthesize team output"\n'
            "}\n\n"
+            "Node definitions are task-only. Never output agent or role fields. Use at most one primary "
+            "Skill template; treat all other activated Skills as guidance.\n\n"
            f"Task goal:\n{task.goal}\n\n"
            f"Current user request:\n{user_message}\n\n"
            f"Attempt index: {attempt_index}\n"
+            f"{skill_note}"
+            f"{guidance_note}"
+            f"{template_note}"
+            f"{tool_note}"
            f"{history_note}"
        )

@ -275,6 +578,26 @@ def _optional_str(value: Any) -> str | None:
    return text or None


+def _optional_int(value: Any) -> int | None:
+    if value in (None, ""):
+        return None
+    if isinstance(value, bool):
+        raise ValueError("max_tool_iterations must be an integer")
+    result = int(value)
+    if result < 0:
+        raise ValueError("max_tool_iterations must be non-negative")
+    return result
+
+
+def _dict_value(value: Any) -> dict[str, Any]:
+    return dict(value) if isinstance(value, dict) else {}
+
+
+def _append_unique(values: list[str], value: str) -> None:
+    if value and value not in values:
+        values.append(value)
+
+
 def _string_list(value: Any) -> list[str]:
    if not isinstance(value, list):
        if isinstance(value, str):
--- a/app-instance/backend/beaver/tasks/router.py
+++ b/app-instance/backend/beaver/tasks/router.py
@ -4,6 +4,7 @@ from __future__ import annotations

 import asyncio
 import json
+import re
 from typing import Any

 from .models import MainAgentDecision, TaskRecord
@ -24,6 +25,15 @@ class MainAgentRouter:
        thinking_enabled: bool | None = None,
        timeout_seconds: float = 8.0,
    ) -> MainAgentDecision:
+        if active_task is None and _is_obvious_simple_chat(message):
+            return MainAgentDecision(mode="simple", reason="obvious_simple_chat", action="simple_chat")
+        if active_task is None and _is_obvious_task_request(message):
+            return MainAgentDecision(
+                mode="task",
+                reason="obvious_task",
+                starts_new_task=True,
+                action="create_task",
+            )
        if provider is None:
            return self._apply_active_task_boundary(
                self._fallback(active_task=active_task, reason="router_provider_unavailable"),
@ -246,6 +256,64 @@ def _clean_short_title(value: Any) -> str | None:
    return title[:40] or None


+def _is_obvious_simple_chat(message: str) -> bool:
+    text = _compact_text(message).lower().strip("!！?？。.,，~～")
+    if not text:
+        return False
+    if _has_url_or_path(text) or _looks_like_fresh_task_request(text):
+        return False
+    if len(text) <= 24 and text in {
+        "hi",
+        "hello",
+        "hey",
+        "thanks",
+        "thankyou",
+        "thankyou!",
+        "谢谢",
+        "谢了",
+        "多谢",
+        "你好",
+        "您好",
+        "嗨",
+        "在吗",
+        "早上好",
+        "下午好",
+        "晚上好",
+        "辛苦了",
+    }:
+        return True
+    simple_prefixes = (
+        "翻译",
+        "translate",
+        "润色",
+        "改写",
+        "校对",
+        "总结下面",
+        "总结这段",
+        "摘要下面",
+        "summarize this",
+    )
+    return len(text) <= 1200 and text.startswith(simple_prefixes)
+
+
+def _is_obvious_task_request(message: str) -> bool:
+    text = _compact_text(message)
+    if not text:
+        return False
+    if _looks_like_explicit_task_followup(text):
+        return False
+    if _has_url_or_path(text):
+        return True
+    return _looks_like_fresh_task_request(text)
+
+
+def _has_url_or_path(text: str) -> bool:
+    return bool(
+        re.search(r"https?://|www\.", text)
+        or re.search(r"(^|[\s'\"`])(?:[./~]|[a-zA-Z]:[\\/])[^\s'\"`]+", text)
+    )
+
+
 def _looks_like_explicit_task_followup(message: str) -> bool:
    text = _compact_text(message)
    if not text:
@ -307,6 +375,16 @@ def _looks_like_fresh_task_request(message: str) -> bool:
        "看看最新",
        "最新",
        "今天",
+        "昨天",
+        "昨日",
+        "昨晚",
+        "刚刚",
+        "最近",
+        "近期",
+        "本届",
+        "本场",
+        "这场",
+        "上一场",
        "明天",
        "上传",
        "下载",
@ -324,6 +402,12 @@ def _looks_like_fresh_task_request(message: str) -> bool:
        "look up",
        "latest",
        "today",
+        "yesterday",
+        "last night",
+        "recent",
+        "recently",
+        "this match",
+        "this game",
        "tomorrow",
        "upload",
        "download",
--- a/app-instance/backend/beaver/tasks/skill_resolver.py
+++ b/app-instance/backend/beaver/tasks/skill_resolver.py
@ -7,9 +7,11 @@ from dataclasses import dataclass, field, replace
 from typing import Any

 from beaver.coordinator.models import AgentDescriptor, ExecutionGraph, ExecutionNode
+from beaver.engine.context import SkillContext
 from beaver.engine.providers import ProviderBundle
 from beaver.skills.assembler.embedding_retriever import SkillEmbeddingRetriever
 from beaver.skills.catalog.loader import SkillsLoader
+from beaver.skills.catalog.utils import strip_frontmatter
 from beaver.skills.drafts import DraftService
 from beaver.skills.learning import EphemeralGuidanceSynthesizer
 from beaver.tasks.models import TaskRecord
@ -24,6 +26,9 @@ class SkillResolutionReport:
    ephemeral_guidance_id: str | None = None
    ephemeral_guidance_name: str | None = None
    ephemeral_used: bool = False
+    requested_skill_name: str | None = None
+    exact_binding_used: bool = False
+    warnings: list[str] = field(default_factory=list)
    reason: str = ""

    def to_dict(self) -> dict[str, Any]:
@ -35,6 +40,9 @@ class SkillResolutionReport:
            "ephemeral_guidance_id": self.ephemeral_guidance_id,
            "ephemeral_guidance_name": self.ephemeral_guidance_name,
            "ephemeral_used": self.ephemeral_used,
+            "requested_skill_name": self.requested_skill_name,
+            "exact_binding_used": self.exact_binding_used,
+            "warnings": list(self.warnings),
            "reason": self.reason,
        }

@ -87,12 +95,45 @@ class TaskSkillResolver:
        attempt_index: int,
        provider_bundle: ProviderBundle,
    ) -> tuple[ExecutionNode, SkillResolutionReport]:
+        use_skill = str(node.agent.metadata.get("use_skill") or "").strip()
        skill_query = str(node.agent.metadata.get("skill_query") or node.task or node.node_id).strip()
+        warnings: list[str] = []
        required_capabilities = [
            str(item).strip()
            for item in node.agent.metadata.get("required_capabilities", [])
            if str(item).strip()
        ]
+        if use_skill:
+            exact_context = self._load_exact_skill_context(use_skill)
+            if exact_context is not None:
+                resolved = self._generic_node(
+                    node,
+                    pinned_skill_names=_merge_names(node.inherited_pinned_skills, [use_skill]),
+                    pinned_skill_contexts=_merge_skill_contexts(
+                        node.inherited_pinned_skill_contexts,
+                        [exact_context],
+                    ),
+                    metadata={
+                        **node.agent.metadata,
+                        "use_skill": use_skill,
+                        "skill_query": skill_query,
+                        "required_capabilities": required_capabilities,
+                        "selected_skill_names": [use_skill],
+                        "ephemeral_skill_names": [],
+                        "exact_binding_used": True,
+                    },
+                )
+                return resolved, SkillResolutionReport(
+                    node_id=node.node_id,
+                    skill_query=skill_query,
+                    required_capabilities=required_capabilities,
+                    selected_skill_names=[use_skill],
+                    requested_skill_name=use_skill,
+                    exact_binding_used=True,
+                    reason="exact use_skill binding",
+                )
+            warnings.append(f"use_skill unresolved: {use_skill}")
+
        if self._is_summary_only_node(node, skill_query=skill_query, required_capabilities=required_capabilities):
            resolved = self._generic_node(
                node,
@ -104,6 +145,7 @@ class TaskSkillResolver:
                    "required_capabilities": required_capabilities,
                    "selected_skill_names": [],
                    "ephemeral_skill_names": [],
+                    "exact_binding_used": False,
                    "summary_uses_dependency_outputs_only": True,
                },
            )
@ -113,6 +155,9 @@ class TaskSkillResolver:
                required_capabilities=required_capabilities,
                selected_skill_names=[],
                ephemeral_used=False,
+                requested_skill_name=use_skill or None,
+                exact_binding_used=False,
+                warnings=warnings,
                reason="summary node uses dependency outputs directly",
            )

@ -141,6 +186,7 @@ class TaskSkillResolver:
                    "required_capabilities": required_capabilities,
                    "selected_skill_names": selected,
                    "ephemeral_skill_names": [],
+                    "exact_binding_used": False,
                },
            )
            return resolved, SkillResolutionReport(
@ -149,6 +195,9 @@ class TaskSkillResolver:
                required_capabilities=required_capabilities,
                selected_skill_names=selected,
                ephemeral_used=False,
+                requested_skill_name=use_skill or None,
+                exact_binding_used=False,
+                warnings=warnings,
                reason="matched published skill",
            )

@ -174,6 +223,7 @@ class TaskSkillResolver:
                "ephemeral_guidance_id": missing.guidance_id,
                "ephemeral_guidance_name": missing.guidance_name,
                "ephemeral_skill_names": [missing.skill_context.name],
+                "exact_binding_used": False,
            },
        )
        return resolved, SkillResolutionReport(
@ -183,9 +233,27 @@ class TaskSkillResolver:
            ephemeral_guidance_id=missing.guidance_id,
            ephemeral_guidance_name=missing.guidance_name,
            ephemeral_used=True,
+            requested_skill_name=use_skill or None,
+            exact_binding_used=False,
+            warnings=warnings,
            reason="generated ephemeral guidance for missing sub-agent capability",
        )

+    def _load_exact_skill_context(self, name: str) -> SkillContext | None:
+        record = self.skills_loader.get_skill_record(name)
+        raw_content = self.skills_loader.load_published_skill(name)
+        content = strip_frontmatter(raw_content).strip() if raw_content else ""
+        if record is None or not content:
+            return None
+        return SkillContext(
+            name=name,
+            content=content,
+            version=record.version,
+            content_hash=record.content_hash or "",
+            activation_reason="explicit_node_binding",
+            tool_hints=list(record.tool_hints),
+        )
+
    async def _select_published_skills(self, *, query: str, provider_bundle: ProviderBundle) -> list[str]:
        candidates = self.skills_loader.build_selection_candidates()
        if not candidates:
@ -336,3 +404,14 @@ def _merge_names(parent: list[str], selected: list[str]) -> list[str]:
        if name and name not in result:
            result.append(name)
    return result
+
+
+def _merge_skill_contexts(parent: list[SkillContext], selected: list[SkillContext]) -> list[SkillContext]:
+    result: list[SkillContext] = []
+    seen: set[str] = set()
+    for context in [*parent, *selected]:
+        if context.name in seen:
+            continue
+        seen.add(context.name)
+        result.append(context)
+    return result
--- a/app-instance/backend/beaver/tools/builtins/web.py
+++ b/app-instance/backend/beaver/tools/builtins/web.py
@ -5,10 +5,11 @@ from __future__ import annotations
 import asyncio
 from dataclasses import dataclass, field
 from html import unescape
+from html.parser import HTMLParser
 import json
 import re
 from typing import Any
-from urllib.parse import quote_plus, urlparse
+from urllib.parse import quote_plus, urljoin, urlparse

 import httpx

@ -24,6 +25,10 @@ def _strip_html(value: str) -> str:
    return re.sub(r"\s+", " ", text).strip()


+def _compact_text(value: str) -> str:
+    return re.sub(r"\s+", " ", unescape(value)).strip()
+
+
 def _safe_url(url: str) -> str:
    parsed = urlparse(url)
    if parsed.scheme not in {"http", "https"} or not parsed.netloc:
@ -31,6 +36,77 @@ def _safe_url(url: str) -> str:
    return url


+class _HtmlMetadataParser(HTMLParser):
+    def __init__(self, base_url: str) -> None:
+        super().__init__(convert_charrefs=True)
+        self.base_url = base_url
+        self.title = ""
+        self.links: list[dict[str, str]] = []
+        self._in_title = False
+        self._current_href: str | None = None
+        self._current_text: list[str] = []
+        self._skip_depth = 0
+        self._seen_urls: set[str] = set()
+
+    def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
+        lowered = tag.lower()
+        if lowered in {"script", "style"}:
+            self._skip_depth += 1
+            return
+        if self._skip_depth:
+            return
+        if lowered == "title":
+            self._in_title = True
+            return
+        if lowered == "a":
+            href = dict(attrs).get("href")
+            if href:
+                self._current_href = urljoin(self.base_url, href)
+                self._current_text = []
+
+    def handle_endtag(self, tag: str) -> None:
+        lowered = tag.lower()
+        if lowered in {"script", "style"} and self._skip_depth:
+            self._skip_depth -= 1
+            return
+        if self._skip_depth:
+            return
+        if lowered == "title":
+            self._in_title = False
+            self.title = _compact_text(self.title)
+            return
+        if lowered == "a" and self._current_href:
+            parsed = urlparse(self._current_href)
+            if parsed.scheme in {"http", "https"} and self._current_href not in self._seen_urls:
+                text = _compact_text(" ".join(self._current_text))
+                self.links.append({"text": text, "url": self._current_href})
+                self._seen_urls.add(self._current_href)
+            self._current_href = None
+            self._current_text = []
+
+    def handle_data(self, data: str) -> None:
+        if self._skip_depth:
+            return
+        if self._in_title:
+            self.title += data
+        if self._current_href:
+            self._current_text.append(data)
+
+
+def _extract_html_metadata(html: str, base_url: str, *, max_links: int = 80) -> dict[str, Any]:
+    parser = _HtmlMetadataParser(base_url)
+    parser.feed(html)
+    links = parser.links[:max_links]
+    pdf_links = [
+        link for link in links if urlparse(link["url"]).path.lower().endswith(".pdf")
+    ][:30]
+    return {
+        "title": parser.title,
+        "links": links,
+        "pdf_links": pdf_links,
+    }
+
+
@dataclass(slots=True)
 class WebFetchTool:
    name: str = "web_fetch"
@ -61,13 +137,20 @@ class WebFetchTool:
            response.raise_for_status()
            content_type = response.headers.get("content-type", "")
            raw = response.text
-            text = _strip_html(raw) if "html" in content_type.lower() else raw
+            is_html = "html" in content_type.lower()
+            text = _strip_html(raw) if is_html else raw
+            metadata = _extract_html_metadata(raw, str(response.url)) if is_html else {
+                "title": "",
+                "links": [],
+                "pdf_links": [],
+            }
            truncated = len(text) > limit
            return _json_result(
                True,
                url=str(response.url),
                status_code=response.status_code,
                content_type=content_type,
+                **metadata,
                content=text[:limit],
                truncated=truncated,
            )
@ -97,6 +180,15 @@ class WebSearchTool:
            if not str(query).strip():
                raise ValueError("query is required")
            bounded = max(1, min(int(limit or 5), 10))
+            errors: list[str] = []
+            try:
+                ddgs_results = await asyncio.to_thread(_search_ddgs, query, bounded)
+            except Exception as exc:
+                ddgs_results = []
+                errors.append(str(exc))
+            if ddgs_results:
+                return _json_result(True, **_search_result_payload(query, "ddgs", ddgs_results))
+
            headers = {"User-Agent": "Mozilla/5.0 Beaver/1.0"}
            timeout = httpx.Timeout(connect=5, read=8, write=5, pool=5)
            async with httpx.AsyncClient(timeout=timeout, follow_redirects=True, trust_env=True) as client:
@ -118,7 +210,6 @@ class WebSearchTool:
                        )
                    ),
                ]
-                errors: list[str] = []
                try:
                    for completed in asyncio.as_completed(tasks):
                        try:
@ -127,7 +218,7 @@ class WebSearchTool:
                            errors.append(str(exc))
                            continue
                        if results:
-                            return _json_result(True, query=query, engine=engine, results=results)
+                            return _json_result(True, **_search_result_payload(query, engine, results))
                    detail = "; ".join(error for error in errors if error) or "no search results"
                    return _json_result(False, query=query, error=detail)
                finally:
@ -182,6 +273,62 @@ def _parse_bing_results(html: str, limit: int) -> list[dict[str, str]]:
    return results


+def _search_ddgs(query: str, limit: int) -> list[dict[str, str]]:
+    from ddgs import DDGS  # type: ignore[import-not-found]
+
+    rows = DDGS().text(query, max_results=limit)
+    results: list[dict[str, str]] = []
+    for row in rows or []:
+        title = _compact_text(str(row.get("title") or ""))
+        result_url = str(row.get("href") or row.get("url") or "").strip()
+        snippet = _compact_text(str(row.get("body") or row.get("snippet") or ""))
+        if title and result_url:
+            results.append({"title": title, "url": result_url, "snippet": snippet})
+        if len(results) >= limit:
+            break
+    return results
+
+
+def _search_result_payload(query: str, engine: str, results: list[dict[str, str]]) -> dict[str, Any]:
+    quality, reason = _assess_search_quality(query, results)
+    payload: dict[str, Any] = {
+        "query": query,
+        "engine": engine,
+        "quality": quality,
+        "results": results,
+    }
+    if reason:
+        payload["low_relevance_reason"] = reason
+    return payload
+
+
+def _search_terms(value: str) -> set[str]:
+    return {
+        term
+        for term in re.findall(r"[a-z0-9]+", value.lower())
+        if len(term) > 2
+    }
+
+
+def _assess_search_quality(query: str, results: list[dict[str, str]]) -> tuple[str, str | None]:
+    terms = _search_terms(query)
+    if not terms:
+        return "high", None
+    required_overlap = min(2, len(terms))
+    for result in results:
+        haystack = " ".join(
+            [
+                result.get("title", ""),
+                result.get("snippet", ""),
+                urlparse(result.get("url", "")).netloc,
+                urlparse(result.get("url", "")).path,
+            ]
+        )
+        if len(terms & _search_terms(haystack)) >= required_overlap:
+            return "high", None
+    return "low", "results do not overlap enough with query terms"
+
+
 def _parse_duckduckgo_results(html: str, limit: int) -> list[dict[str, str]]:
    results: list[dict[str, str]] = []
    pattern = re.compile(
--- a/app-instance/backend/beaver/tools/runtime/executor.py
+++ b/app-instance/backend/beaver/tools/runtime/executor.py
@ -37,6 +37,14 @@ class ToolExecutor:
    ) -> ToolResult:
        """按工具名执行一次调用。"""

+        allowed = context.metadata.get("allowed_tool_names") if context is not None else None
+        if isinstance(allowed, list) and tool_name not in allowed:
+            return ToolResult(
+                success=False,
+                content=f"Tool {tool_name} is not allowed for this node.",
+                tool_name=tool_name,
+                error="tool_not_allowed",
+            )
        tool = self.registry.get(tool_name)
        if tool is None:
            return ToolResult(