feat(tasks): add skill-templated task graph execution

2026-06-23 10:22:58 +08:00
parent 6843d89b2c
commit 53b13e8eac
53 changed files with 4773 additions and 756 deletions
--- a/app-instance/backend/beaver/tasks/attempt_orchestrator.py
+++ b/app-instance/backend/beaver/tasks/attempt_orchestrator.py
@ -0,0 +1,695 @@
+"""Task attempt orchestration for Beaver Task mode."""
+
+from __future__ import annotations
+
+from time import perf_counter
+from typing import Any, Callable
+
+from beaver.coordinator.models import ExecutionNode, TeamRunResult
+from beaver.engine import AgentRunResult
+from beaver.engine.context import SkillContext
+from beaver.prompts.main_agent import normalize_main_agent_prompt_locale
+
+from .evidence import EvidenceBuilder, RunEvidence, TaskEvidencePacket, render_task_evidence
+from .models import TaskRecord
+from .planner import TaskExecutionPlan
+
+
+class TaskAttemptOrchestrator:
+    """Own the execution order inside one Task attempt."""
+
+    def __init__(
+        self,
+        *,
+        loaded: Any,
+        create_loop: Callable[[], Any],
+        make_provider_bundle_for_task: Callable[[Any, dict[str, Any]], Any],
+    ) -> None:
+        self.loaded = loaded
+        self.create_loop = create_loop
+        self.make_provider_bundle_for_task = make_provider_bundle_for_task
+
+    async def run(
+        self,
+        *,
+        message: str,
+        runner: Any,
+        kwargs: dict[str, Any],
+        task: TaskRecord,
+    ) -> AgentRunResult:
+        task_service = self._require_loaded(self.loaded, "task_service")
+        task_execution_planner = self._require_loaded(self.loaded, "task_execution_planner")
+        session_manager = self._require_loaded(self.loaded, "session_manager")
+
+        base_execution_context = kwargs.get("execution_context")
+        prompt_locale = kwargs.get("prompt_locale") or task.metadata.get("prompt_locale")
+        output_language_instruction = self._output_language_instruction(prompt_locale)
+        provider_bundle = kwargs.get("provider_bundle") or self.make_provider_bundle_for_task(self.loaded, kwargs)
+        kwargs = dict(kwargs)
+        team_provider_bundle_factory = kwargs.pop("team_provider_bundle_factory", None)
+        kwargs["provider_bundle"] = provider_bundle
+
+        attempt_index = int(task.metadata.get("latest_attempt_index") or 0) + 1
+        task_service.start_run(task.task_id, user_message=message, attempt_index=attempt_index)
+        pre_skill_context = self._build_skill_selection_context(
+            task=task,
+            user_message=message,
+            attempt_index=attempt_index,
+        )
+        preselected_skills, pre_skill_latency_ms = await self._assemble_task_attempt_skills(
+            task_description=pre_skill_context,
+            provider_bundle=provider_bundle,
+            thinking_enabled=kwargs.get("thinking_enabled"),
+            include_skill_assembly=bool(kwargs.get("include_skill_assembly", True)),
+            pinned_skill_contexts=kwargs.get("pinned_skill_contexts"),
+        )
+        if pre_skill_latency_ms:
+            kwargs["pre_run_latency_ms"] = self._merge_latency_ms(
+                kwargs.get("pre_run_latency_ms"),
+                {"pre_skill_assembly_ms": pre_skill_latency_ms},
+            )
+        plan = await task_execution_planner.plan(
+            task=task,
+            user_message=message,
+            attempt_index=attempt_index,
+            provider_bundle=provider_bundle,
+            skill_summaries=self._skill_summaries_for_planner(preselected_skills),
+            tool_hints=self._tool_hints_for_skills(preselected_skills),
+            activated_skills=preselected_skills,
+        )
+        self._append_task_observation(
+            session_manager,
+            task.session_id,
+            event_type="task_execution_planned",
+            payload={
+                "task_id": task.task_id,
+                "attempt_index": attempt_index,
+                **plan.to_event_payload(),
+            },
+        )
+        team_summaries: list[str] = []
+        team_execution_context = ""
+        team_result: TeamRunResult | None = None
+        if plan.is_team:
+            team_result, team_error = await self._run_team_for_task(
+                plan,
+                task=task,
+                parent_session_id=kwargs["session_id"],
+                provider_bundle_factory=team_provider_bundle_factory
+                or self._build_team_provider_bundle_factory(kwargs),
+            )
+            if team_result is not None:
+                team_summaries = [self._team_summary_for_validation(team_result)]
+                team_packet = TaskEvidencePacket(
+                    task_id=task.task_id,
+                    attempt_index=attempt_index,
+                    main_run=None,
+                    team_runs=self._team_run_evidence(team_result),
+                    team_node_results=list(team_result.node_results),
+                    final_output="",
+                )
+                team_execution_context = self._join_context(
+                    self._team_execution_context(plan, team_result),
+                    "Rendered team evidence:\n" + render_task_evidence(team_packet),
+                )
+                self._append_task_observation(
+                    session_manager,
+                    task.session_id,
+                    event_type="task_team_run_completed" if team_result.success else "task_team_run_failed",
+                    payload={
+                        "task_id": task.task_id,
+                        "attempt_index": attempt_index,
+                        "plan_mode": plan.mode,
+                        "strategy": plan.graph.strategy if plan.graph else None,
+                        "node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
+                        "team_run_ids": team_result.run_ids,
+                        "team_success": team_result.success,
+                        "node_results": self._team_node_results_for_event(plan, team_result),
+                        "reason": plan.reason,
+                        "error": None if team_result.success else "one or more team nodes failed",
+                    },
+                )
+            else:
+                team_summaries = [f"Team execution failed: {team_error}"]
+                team_execution_context = self._failed_team_execution_context(plan, team_error or "unknown error")
+                self._append_task_observation(
+                    session_manager,
+                    task.session_id,
+                    event_type="task_team_run_failed",
+                    payload={
+                        "task_id": task.task_id,
+                        "attempt_index": attempt_index,
+                        "plan_mode": plan.mode,
+                        "strategy": plan.graph.strategy if plan.graph else None,
+                        "node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
+                        "team_run_ids": [],
+                        "team_success": False,
+                        "reason": plan.reason,
+                        "error": team_error,
+                    },
+                )
+
+        outcome_context, incomplete_prefix, outcome_metadata = self._team_synthesis_outcome(
+            plan,
+            team_result,
+            prompt_locale=prompt_locale,
+        )
+        if plan.is_team:
+            team_execution_context = self._join_context(outcome_context, team_execution_context)
+
+        attempt_kwargs = dict(kwargs)
+        attempt_kwargs.update(
+            {
+                "task_id": task.task_id,
+                "task_mode": True,
+                "attempt_index": attempt_index,
+                "allow_candidate_generation": False,
+                "pinned_skill_contexts": preselected_skills,
+                "include_skill_assembly": False,
+            }
+        )
+        attempt_kwargs["execution_context"] = self._join_context(
+            base_execution_context,
+            output_language_instruction,
+            team_execution_context,
+        )
+        if plan.is_team and team_execution_context:
+            attempt_kwargs["include_tools"] = False
+            attempt_kwargs["max_tool_iterations"] = 0
+        attempt_kwargs["skill_selection_context"] = self._build_skill_selection_context(
+            task=task,
+            user_message=message,
+            attempt_index=attempt_index,
+            plan=plan,
+            team_summaries=team_summaries,
+        )
+
+        result = await runner(message, **attempt_kwargs)
+        if outcome_metadata["task_outcome"] == "incomplete":
+            result.output_text = self._apply_incomplete_prefix(result.output_text, incomplete_prefix)
+        self._append_task_observation(
+            session_manager,
+            task.session_id,
+            event_type="task_synthesis_completed",
+            payload={
+                "task_id": task.task_id,
+                "attempt_index": attempt_index,
+                "main_run_id": result.run_id,
+                "plan_mode": plan.mode,
+                "strategy": plan.graph.strategy if plan.graph else None,
+                **outcome_metadata,
+            },
+        )
+        task = task_service.append_run(
+            task.task_id,
+            result.run_id,
+            skill_names=self._skill_names_for_run(result.run_id),
+        )
+        evidence_packet = self._build_task_evidence_packet(
+            session_manager=session_manager,
+            task=task,
+            attempt_index=attempt_index,
+            result=result,
+            team_result=team_result,
+        )
+        evidence_text = render_task_evidence(evidence_packet)
+        evidence_debug = {
+            "evidence_run_ids": [
+                item.run_id for item in [evidence_packet.main_run, *evidence_packet.team_runs] if item is not None
+            ],
+            "evidence_session_ids": [
+                item.session_id
+                for item in [evidence_packet.main_run, *evidence_packet.team_runs]
+                if item is not None
+            ],
+            "tool_result_count": sum(
+                len(item.tool_results)
+                for item in [evidence_packet.main_run, *evidence_packet.team_runs]
+                if item is not None
+            ),
+            "evidence_length": len(evidence_text),
+        }
+        session_manager.update_latest_assistant_event_payload(
+            result.session_id,
+            result.run_id,
+            {
+                "task_id": task.task_id,
+                "task_status": task.status,
+                "evidence_status": "recorded",
+            },
+        )
+        session_manager.append_message(
+            result.session_id,
+            run_id=result.run_id,
+            role="system",
+            event_type="task_evidence_recorded",
+            event_payload={
+                "task_id": task.task_id,
+                "attempt_index": attempt_index,
+                "evidence_debug": evidence_debug,
+            },
+            content=None,
+            context_visible=False,
+        )
+        result.task_id = task.task_id
+        result.task_status = task.status
+        result.validation_result = None
+        return result
+
+    async def _run_team_for_task(
+        self,
+        plan: TaskExecutionPlan,
+        *,
+        task: TaskRecord,
+        parent_session_id: str,
+        provider_bundle_factory: Any,
+    ) -> tuple[TeamRunResult | None, str | None]:
+        if plan.graph is None:
+            return None, "team plan did not include an execution graph"
+        try:
+            from beaver.services.team_service import TeamService
+
+            result = await TeamService(self.create_loop()).run_team(
+                plan.graph,
+                parent_task_id=task.task_id,
+                parent_session_id=parent_session_id,
+                parent_run_id=None,
+                provider_bundle_factory=provider_bundle_factory,
+                allow_candidate_generation=False,
+            )
+            return result, None
+        except Exception as exc:
+            return None, str(exc)
+
+    async def _assemble_task_attempt_skills(
+        self,
+        *,
+        task_description: str,
+        provider_bundle: Any,
+        thinking_enabled: bool | None,
+        include_skill_assembly: bool,
+        pinned_skill_contexts: Any,
+    ) -> tuple[list[SkillContext], float]:
+        started = perf_counter()
+        selected = self._coerce_skill_contexts(pinned_skill_contexts)
+        if include_skill_assembly:
+            skill_assembler = self._require_loaded(self.loaded, "skill_assembler")
+            runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
+            assembled = await skill_assembler.assemble(
+                task_description=task_description,
+                provider=provider_bundle.auxiliary_provider or provider_bundle.main_provider,
+                model=getattr(runtime, "model", None),
+                embedding_runtime=getattr(provider_bundle, "embedding_runtime", None),
+                thinking_enabled=thinking_enabled,
+            )
+            selected = self._merge_skill_contexts(
+                selected,
+                list(getattr(assembled, "activated_skills", []) or []),
+            )
+        return selected, (perf_counter() - started) * 1000
+
+    @staticmethod
+    def _coerce_skill_contexts(value: Any) -> list[SkillContext]:
+        if not isinstance(value, list):
+            return []
+        return [item for item in value if isinstance(item, SkillContext)]
+
+    @staticmethod
+    def _merge_skill_contexts(left: list[SkillContext], right: list[SkillContext]) -> list[SkillContext]:
+        merged: list[SkillContext] = []
+        seen: set[str] = set()
+        for skill in [*left, *right]:
+            if skill.name in seen:
+                continue
+            seen.add(skill.name)
+            merged.append(skill)
+        return merged
+
+    @staticmethod
+    def _skill_summaries_for_planner(skills: list[SkillContext]) -> list[str]:
+        summaries: list[str] = []
+        for skill in skills:
+            content = " ".join((skill.content or "").split())
+            if len(content) > 240:
+                content = content[:237].rstrip() + "..."
+            summaries.append(f"{skill.name}: {content}" if content else skill.name)
+        return summaries
+
+    @staticmethod
+    def _tool_hints_for_skills(skills: list[SkillContext]) -> list[str]:
+        result: list[str] = []
+        for skill in skills:
+            for hint in skill.tool_hints:
+                if hint and hint not in result:
+                    result.append(hint)
+        return result
+
+    @staticmethod
+    def _require_loaded(loaded: Any, field_name: str) -> Any:
+        value = getattr(loaded, field_name)
+        if value is None:
+            raise RuntimeError(f"Engine loader did not provide required dependency {field_name!r}")
+        return value
+
+    @staticmethod
+    def _merge_latency_ms(current: Any, updates: dict[str, float]) -> dict[str, float]:
+        merged: dict[str, float] = {}
+        if isinstance(current, dict):
+            for key, value in current.items():
+                if isinstance(value, (int, float)):
+                    merged[str(key)] = float(value)
+        for key, value in updates.items():
+            merged[key] = merged.get(key, 0.0) + float(value)
+        return merged
+
+    @staticmethod
+    def _output_language_instruction(prompt_locale: str | None) -> str:
+        locale = normalize_main_agent_prompt_locale(prompt_locale)
+        if locale == "en":
+            return (
+                "Output language: English. Use English for user-facing task titles, summaries, plans, "
+                "and final answers unless the user explicitly requests another language."
+            )
+        if locale == "zh-Hant":
+            return (
+                "輸出語言：繁體中文。除非使用者明確要求其他語言，所有面向使用者的任務標題、摘要、"
+                "計劃與最終回答都使用繁體中文。"
+            )
+        return (
+            "输出语言：简体中文。除非用户明确要求其他语言，所有面向用户的任务标题、摘要、"
+            "计划与最终回答都使用简体中文。"
+        )
+
+    def _skill_names_for_run(self, run_id: str) -> list[str]:
+        store = getattr(self.loaded, "run_memory_store", None)
+        if store is None:
+            return []
+        for record in store.list_runs():
+            if record.run_id == run_id:
+                return [receipt.skill_name for receipt in record.activated_skills]
+        return []
+
+    @staticmethod
+    def _build_skill_selection_context(
+        *,
+        task: TaskRecord,
+        user_message: str,
+        attempt_index: int,
+        plan: TaskExecutionPlan | None = None,
+        team_summaries: list[str] | None = None,
+    ) -> str:
+        phase = f"attempt_{attempt_index}"
+        if task.feedback and task.feedback[-1].get("acceptance_type") == "revise":
+            phase = f"revision_attempt_{attempt_index}"
+        elif plan is not None and plan.is_team:
+            phase = f"team_synthesis_attempt_{attempt_index}"
+
+        sections = [
+            f"Task goal:\n{task.goal or task.description}",
+            f"Task description:\n{task.description}",
+            f"Current user request:\n{user_message}",
+            f"Execution phase:\n{phase}",
+            f"Task status:\n{task.status}",
+        ]
+        if task.constraints:
+            sections.append("Known constraints:\n" + "\n".join(f"- {item}" for item in task.constraints))
+        if task.skill_names:
+            sections.append(
+                "Previously activated skills (reuse bias, not pinned):\n"
+                + "\n".join(f"- {item}" for item in task.skill_names)
+            )
+        else:
+            sections.append("Previously activated skills:\nNone")
+        if task.feedback:
+            history_lines = []
+            for item in task.feedback[-5:]:
+                kind = item.get("acceptance_type") or item.get("feedback_type")
+                comment = item.get("comment") or ""
+                run_id = item.get("run_id") or ""
+                history_lines.append(f"- {kind} run={run_id}: {comment}".strip())
+            sections.append("Task acceptance history:\n" + "\n".join(history_lines))
+        if plan is not None:
+            plan_lines = [
+                f"mode: {plan.mode}",
+                f"reason: {plan.reason}",
+            ]
+            if plan.final_synthesis_instruction:
+                plan_lines.append(f"final synthesis instruction: {plan.final_synthesis_instruction}")
+            if plan.graph is not None:
+                plan_lines.append(f"strategy: {plan.graph.strategy}")
+                plan_lines.append(
+                    "nodes:\n"
+                    + "\n".join(
+                        f"- {node.node_id}: {node.task}"
+                        for node in plan.graph.nodes
+                    )
+                )
+            sections.append("Execution plan:\n" + "\n".join(plan_lines))
+        if team_summaries:
+            sections.append("Team execution summaries:\n" + "\n\n".join(team_summaries)[:2400])
+        sections.append(
+            "Skill selection instruction:\n"
+            "Prefer reusing previously activated skills when they still match the Task. "
+            "Select new skills only if the current request, revision, or execution plan needs a different capability. "
+            "If no published skill matches, return [] and let the run continue without skills."
+        )
+        return "\n\n".join(section for section in sections if section.strip())
+
+    @staticmethod
+    def _append_task_observation(
+        session_manager: Any,
+        session_id: str,
+        *,
+        event_type: str,
+        payload: dict[str, Any],
+    ) -> None:
+        session_manager.append_message(
+            session_id,
+            role="system",
+            event_type=event_type,
+            event_payload=payload,
+            content=payload.get("reason") or payload.get("error"),
+            context_visible=False,
+        )
+
+    @staticmethod
+    def _join_context(*parts: str | None) -> str:
+        return "\n\n".join(part.strip() for part in parts if part and part.strip())
+
+    @staticmethod
+    def _team_summary_for_validation(result: TeamRunResult) -> str:
+        lines = [
+            f"success={result.success}",
+            f"task_id={result.task_id or ''}",
+            "summary:",
+            result.summary,
+            "nodes:",
+        ]
+        for node in result.node_results:
+            lines.append(
+                f"- {node.node_id}: success={node.success} finish_reason={node.finish_reason} "
+                f"error={node.error or ''} output={node.output_text[:500]}"
+            )
+        return "\n".join(lines)
+
+    @staticmethod
+    def _team_node_results_for_event(plan: TaskExecutionPlan, result: TeamRunResult) -> list[dict[str, Any]]:
+        nodes = {node.node_id: node for node in plan.graph.nodes} if plan.graph else {}
+        payloads: list[dict[str, Any]] = []
+        for item in result.node_results:
+            payload = item.to_dict()
+            node = nodes.get(item.node_id)
+            if node is not None:
+                payload["selected_skill_names"] = list(node.inherited_pinned_skills)
+                payload["ephemeral_skill_names"] = [
+                    skill.name for skill in node.inherited_pinned_skill_contexts
+                ]
+                payload["skill_query"] = node.agent.metadata.get("skill_query")
+                payload["ephemeral_guidance_id"] = node.agent.metadata.get("ephemeral_guidance_id")
+                payload["ephemeral_guidance_name"] = node.agent.metadata.get("ephemeral_guidance_name")
+                payload["ephemeral_used"] = bool(node.inherited_pinned_skill_contexts)
+            payloads.append(payload)
+        return payloads
+
+    @staticmethod
+    def _team_run_evidence(result: TeamRunResult | None) -> list[RunEvidence]:
+        if result is None:
+            return []
+        return [node.evidence for node in result.node_results if node.evidence is not None]
+
+    @staticmethod
+    def _team_synthesis_outcome(
+        plan: TaskExecutionPlan,
+        result: TeamRunResult | None,
+        *,
+        prompt_locale: str | None = None,
+    ) -> tuple[str, str, dict[str, Any]]:
+        if not plan.is_team or plan.graph is None:
+            metadata = {
+                "task_outcome": "single",
+                "incomplete_node_ids": [],
+                "node_statuses": {},
+                "evidence_gaps": {},
+            }
+            return "Task outcome: single", "", metadata
+
+        result_by_node = {
+            item.node_id: item
+            for item in (result.node_results if result is not None else [])
+        }
+        node_statuses: dict[str, str] = {}
+        evidence_gaps: dict[str, list[str]] = {}
+        incomplete_node_ids: list[str] = []
+        detail_lines: list[str] = []
+        successful_lines: list[str] = []
+        for node in plan.graph.nodes:
+            node_result = result_by_node.get(node.node_id)
+            status = node_result.completion_status if node_result is not None else "not_run"
+            node_statuses[node.node_id] = status
+            gaps = list(node_result.evidence_gaps) if node_result is not None else []
+            if gaps:
+                evidence_gaps[node.node_id] = gaps
+            if node.required_for_completion and status != "succeeded":
+                incomplete_node_ids.append(node.node_id)
+                detail_lines.append(
+                    f"- {node.node_id}: status={status}, "
+                    f"finish_reason={node_result.finish_reason if node_result is not None else 'not_run'}, "
+                    f"error={(node_result.error or '') if node_result is not None else 'node did not run'}, "
+                    f"evidence_gaps={gaps}"
+                )
+            elif node_result is not None and status == "succeeded":
+                successful_lines.append(f"- {node.node_id}: {node_result.output_text[:1000]}")
+
+        task_outcome = "incomplete" if incomplete_node_ids else "complete"
+        metadata = {
+            "task_outcome": task_outcome,
+            "incomplete_node_ids": incomplete_node_ids,
+            "node_statuses": node_statuses,
+            "evidence_gaps": evidence_gaps,
+        }
+        context_parts = [
+            f"Task outcome: {task_outcome}",
+            "Incomplete node IDs: " + (", ".join(incomplete_node_ids) or "none"),
+        ]
+        if detail_lines:
+            context_parts.append("Incomplete required node details:\n" + "\n".join(detail_lines))
+        if successful_lines:
+            context_parts.append("Available successful node evidence:\n" + "\n".join(successful_lines))
+        if task_outcome == "incomplete":
+            context_parts.append(
+                "Synthesis requirement: produce a partial report from available evidence and explicitly state "
+                "that the task is incomplete, partially completed, or missing required evidence."
+            )
+        prefix = TaskAttemptOrchestrator._incomplete_prefix(prompt_locale) if incomplete_node_ids else ""
+        return "\n\n".join(context_parts), prefix, metadata
+
+    @staticmethod
+    def _incomplete_prefix(prompt_locale: str | None) -> str:
+        locale = normalize_main_agent_prompt_locale(prompt_locale)
+        if locale == "en":
+            return "Task incomplete: some required steps failed or lack required evidence. The report below uses available results only.\n\n"
+        if locale == "zh-Hant":
+            return "任務未完成：部分必要步驟失敗或缺少必要證據。以下內容僅基於現有結果。\n\n"
+        return "任务未完成：部分必要步骤失败或缺少必要证据。以下内容仅基于现有结果。\n\n"
+
+    @staticmethod
+    def _apply_incomplete_prefix(output_text: str, prefix: str) -> str:
+        normalized = output_text.lower()
+        notices = (
+            "任务未完成",
+            "任務未完成",
+            "部分完成",
+            "缺少证据",
+            "缺少證據",
+            "task incomplete",
+            "incomplete task",
+            "partially complete",
+            "missing evidence",
+        )
+        if any(notice in normalized for notice in notices):
+            return output_text
+        return prefix + output_text.lstrip()
+
+    def _build_task_evidence_packet(
+        self,
+        *,
+        session_manager: Any,
+        task: TaskRecord,
+        attempt_index: int,
+        result: AgentRunResult,
+        team_result: TeamRunResult | None,
+    ) -> TaskEvidencePacket:
+        main_run = EvidenceBuilder(session_manager).build_run_evidence(
+            result.session_id,
+            result.run_id,
+            result.output_text,
+            result.finish_reason,
+        )
+        return TaskEvidencePacket(
+            task_id=task.task_id,
+            attempt_index=attempt_index,
+            main_run=main_run,
+            team_runs=self._team_run_evidence(team_result),
+            team_node_results=list(team_result.node_results) if team_result is not None else [],
+            final_output=result.output_text,
+        )
+
+    @staticmethod
+    def _team_execution_context(plan: TaskExecutionPlan, result: TeamRunResult) -> str:
+        node_lines = [
+            (
+                f"- {node.node_id}: success={node.success}, finish_reason={node.finish_reason}, "
+                f"run_id={node.run_id or ''}, error={node.error or ''}\n{node.output_text}"
+            )
+            for node in result.node_results
+        ]
+        return "\n\n".join(
+            item
+            for item in [
+                "Task team execution result:",
+                f"Planner reason: {plan.reason}",
+                f"Strategy: {plan.graph.strategy if plan.graph else ''}",
+                f"Team success: {result.success}",
+                f"Team summary:\n{result.summary}",
+                "Node results:\n" + "\n\n".join(node_lines),
+                (
+                    "Final synthesis instruction:\n" + plan.final_synthesis_instruction
+                    if plan.final_synthesis_instruction
+                    else None
+                ),
+                (
+                    "Use successful team outputs as internal evidence. If one or more nodes failed, "
+                    "do not blindly repeat failed tool calls. Produce a user-visible fallback answer "
+                    "with available evidence and clearly state any missing or uncertain data."
+                ),
+            ]
+            if item
+        )
+
+    @staticmethod
+    def _failed_team_execution_context(plan: TaskExecutionPlan, error: str) -> str:
+        return "\n\n".join(
+            [
+                "Task team execution failed before final synthesis.",
+                f"Planner reason: {plan.reason}",
+                f"Strategy: {plan.graph.strategy if plan.graph else ''}",
+                f"Error: {error}",
+                (
+                    "Proceed as the main agent. Do not blindly repeat failed tool calls; "
+                    "produce a user-visible fallback answer with available evidence and clearly "
+                    "state any missing or uncertain data."
+                ),
+            ]
+        )
+
+    def _build_team_provider_bundle_factory(self, kwargs: dict[str, Any]) -> Any:
+        def factory(node: ExecutionNode) -> Any:
+            node_kwargs = dict(kwargs)
+            node_kwargs.pop("provider_bundle", None)
+            if node.agent.model:
+                node_kwargs["model"] = node.agent.model
+            if node.agent.provider_name:
+                node_kwargs["provider_name"] = node.agent.provider_name
+            return self.make_provider_bundle_for_task(self.loaded, node_kwargs)
+
+        return factory
--- a/app-instance/backend/beaver/tasks/evidence.py
+++ b/app-instance/backend/beaver/tasks/evidence.py
@ -2,6 +2,8 @@

 from __future__ import annotations

+import json
+import re
 from dataclasses import dataclass, field
 from typing import Any

@ -126,6 +128,37 @@ class EvidenceBuilder:
        )


+def evaluate_node_evidence(
+    evidence: RunEvidence,
+    required_evidence: list[str],
+    output_text: str,
+) -> list[str]:
+    """Evaluate v1 coarse-grained node evidence requirements."""
+
+    gaps: list[str] = []
+    successful_tools = [
+        item
+        for item in evidence.tool_results
+        if item.event_payload.get("success") is True
+    ]
+    for raw_requirement in required_evidence:
+        requirement = str(raw_requirement).strip()
+        if not requirement:
+            continue
+        if requirement == "tool_result":
+            if not successful_tools:
+                _append_unique(gaps, "missing required evidence: tool_result")
+        elif requirement == "url":
+            if not any(_tool_evidence_contains_url(item) for item in successful_tools):
+                _append_unique(gaps, "missing required evidence: url")
+        elif requirement == "output":
+            if not output_text.strip():
+                _append_unique(gaps, "missing required evidence: output")
+        else:
+            _append_unique(gaps, f"unsupported evidence requirement: {requirement}")
+    return gaps
+
+
 def render_task_evidence(packet: TaskEvidencePacket) -> str:
    sections = [
        f"Task evidence packet: task_id={packet.task_id} attempt={packet.attempt_index}",
@ -181,3 +214,20 @@ def _render_tool_evidence(item: ToolEvidence) -> str:

 def _optional_str(value: Any) -> str | None:
    return str(value) if value is not None else None
+
+
+_URL_RE = re.compile(r"https?://[^\s<>'\"]+", re.IGNORECASE)
+
+
+def _tool_evidence_contains_url(item: ToolEvidence) -> bool:
+    values = [
+        item.url or "",
+        item.content,
+        json.dumps(item.event_payload, ensure_ascii=False, default=str),
+    ]
+    return any(_URL_RE.search(value) is not None for value in values)
+
+
+def _append_unique(values: list[str], value: str) -> None:
+    if value not in values:
+        values.append(value)
--- a/app-instance/backend/beaver/tasks/planner.py
+++ b/app-instance/backend/beaver/tasks/planner.py
@ -4,11 +4,14 @@ from __future__ import annotations

 import asyncio
 import json
+import os
 from dataclasses import dataclass, field
 from typing import Any, Literal

 from beaver.coordinator.models import AgentDescriptor, ExecutionGraph, ExecutionNode
+from beaver.engine.context import SkillContext
 from beaver.engine.providers import ProviderBundle
+from beaver.tools.registry import ToolRegistry

 from .models import TaskRecord
 from .skill_resolver import SkillResolutionReport, TaskSkillResolver
@ -17,6 +20,24 @@ from .skill_resolver import SkillResolutionReport, TaskSkillResolver
 TaskExecutionMode = Literal["single", "team"]


+# Temporary name-based denylist until high-risk tool approval is implemented.
+# Keep this policy centralized so planner behavior cannot drift by call site.
+HIGH_RISK_PLANNER_TOOL_NAMES = frozenset(
+    {
+        "delete_file",
+        "execute_command",
+        "external_send",
+        "send_email",
+        "terminal",
+        "write_file",
+    }
+)
+
+
+def _agent_team_enabled() -> bool:
+    return os.getenv("BEAVER_AGENT_TEAM_ENABLED", "1").strip().lower() not in {"0", "false", "no", "off"}
+
+
@dataclass(slots=True)
 class TaskExecutionPlan:
    mode: TaskExecutionMode
@ -25,14 +46,26 @@ class TaskExecutionPlan:
    final_synthesis_instruction: str = ""
    fallback_error: str | None = None
    skill_resolution_report: list[SkillResolutionReport] = field(default_factory=list)
+    planner_adaptation: dict[str, Any] = field(default_factory=dict)

    @property
    def is_team(self) -> bool:
        return self.mode == "team" and self.graph is not None

    @classmethod
-    def single(cls, reason: str, *, fallback_error: str | None = None) -> "TaskExecutionPlan":
-        return cls(mode="single", reason=reason, fallback_error=fallback_error)
+    def single(
+        cls,
+        reason: str,
+        *,
+        fallback_error: str | None = None,
+        planner_adaptation: dict[str, Any] | None = None,
+    ) -> "TaskExecutionPlan":
+        return cls(
+            mode="single",
+            reason=reason,
+            fallback_error=fallback_error,
+            planner_adaptation=dict(planner_adaptation or {}),
+        )

    def to_event_payload(self) -> dict[str, Any]:
        strategy = self.graph.strategy if self.graph is not None else None
@ -57,6 +90,7 @@ class TaskExecutionPlan:
                if item.ephemeral_guidance_id
            ],
            "skill_resolution_report": [item.to_dict() for item in self.skill_resolution_report],
+            "planner_adaptation": dict(self.planner_adaptation),
            "fallback_error": self.fallback_error,
        }

@ -65,10 +99,34 @@ class TaskExecutionPlanner:
    """Plan whether a Task attempt should run through a team first."""

    _MAX_NODES = 6
+    _MAX_DEPTH = 4
    _SUPPORTED_STRATEGIES = {"sequence", "parallel", "dag"}
+    _ALLOWED_NODE_FIELDS = {
+        "node_id",
+        "task",
+        "use_skill",
+        "skill_query",
+        "depends_on",
+        "input_contract",
+        "output_contract",
+        "requested_tools",
+        "required_evidence",
+        "evidence_contract",
+        "validation_rules",
+        "required_for_completion",
+        "block_downstream_on_partial",
+        "max_tool_iterations",
+        "constraints",
+    }

-    def __init__(self, *, task_skill_resolver: TaskSkillResolver | None = None) -> None:
+    def __init__(
+        self,
+        *,
+        task_skill_resolver: TaskSkillResolver | None = None,
+        tool_registry: ToolRegistry | None = None,
+    ) -> None:
        self.task_skill_resolver = task_skill_resolver
+        self.tool_registry = tool_registry

    async def plan(
        self,
@ -78,7 +136,15 @@ class TaskExecutionPlanner:
        attempt_index: int,
        provider_bundle: ProviderBundle | None = None,
        timeout_seconds: float = 30.0,
+        skill_summaries: list[str] | None = None,
+        tool_hints: list[str] | None = None,
+        activated_skills: list[SkillContext] | None = None,
    ) -> TaskExecutionPlan:
+        if not _agent_team_enabled():
+            return TaskExecutionPlan.single("planner_disabled_by_environment")
+        if not self._needs_team_planning(task=task, user_message=user_message):
+            return TaskExecutionPlan.single("planner_skipped_simple_task")
+
        provider = None
        model = None
        if provider_bundle is not None:
@ -87,6 +153,7 @@ class TaskExecutionPlanner:
            model = getattr(runtime, "model", None)
        if provider is None:
            return TaskExecutionPlan.single("planner_provider_unavailable")
+        selected_template, base_adaptation = self._select_team_template(activated_skills or [])
        try:
            response = await asyncio.wait_for(
                provider.chat(
@ -104,6 +171,10 @@ class TaskExecutionPlanner:
                                task=task,
                                user_message=user_message,
                                attempt_index=attempt_index,
+                                skill_summaries=skill_summaries or [],
+                                tool_hints=tool_hints or [],
+                                activated_skills=activated_skills or [],
+                                selected_template=selected_template,
                            ),
                        },
                    ],
@ -114,7 +185,40 @@ class TaskExecutionPlanner:
                ),
                timeout=timeout_seconds,
            )
-            plan = self.from_json(response.content or "")
+            try:
+                plan = self._from_json_or_raise(response.content or "")
+            except Exception as first_error:
+                repair_response = await asyncio.wait_for(
+                    provider.chat(
+                        messages=[
+                            {
+                                "role": "system",
+                                "content": "Repair invalid Beaver task planner JSON. Return only one compact JSON object.",
+                            },
+                            {
+                                "role": "user",
+                                "content": (
+                                    "Repair the invalid planner JSON using the task-only schema from the original "
+                                    f"request. Validation error: {first_error}\nInvalid output:\n{response.content or ''}"
+                                ),
+                            },
+                        ],
+                        tools=None,
+                        model=model,
+                        max_tokens=4096,
+                        temperature=0.0,
+                    ),
+                    timeout=timeout_seconds,
+                )
+                try:
+                    plan = self._from_json_or_raise(repair_response.content or "")
+                except Exception as repair_error:
+                    return TaskExecutionPlan.single(
+                        "planner_fallback_single",
+                        fallback_error=f"initial validation: {first_error}; repair validation: {repair_error}",
+                        planner_adaptation=base_adaptation,
+                    )
+            self._merge_adaptation(plan, base_adaptation)
            return await self._resolve_plan(
                plan,
                task=task,
@ -152,30 +256,90 @@ class TaskExecutionPlanner:
            graph.validate()
            plan.graph = graph
            plan.skill_resolution_report = reports
+            self._merge_skill_resolution_adaptation(plan, reports)
            return plan
        except Exception as exc:
            return TaskExecutionPlan.single("planner_fallback_single", fallback_error=f"task_skill_resolver_failed: {exc}")

+    @staticmethod
+    def _needs_team_planning(*, task: TaskRecord, user_message: str) -> bool:
+        text = " ".join(
+            part
+            for part in (
+                task.goal,
+                task.description,
+                user_message,
+            )
+            if part
+        ).lower()
+        if not text.strip():
+            return False
+
+        complex_markers = (
+            "agent team",
+            "sub-agent",
+            "multi-agent",
+            "parallel",
+            "dag",
+            "workflow",
+            "review",
+            "research",
+            "compare",
+            "comparison",
+            "architecture",
+            "refactor",
+            "multi-file",
+            "end-to-end",
+            "并行",
+            "团队",
+            "多智能体",
+            "子代理",
+            "工作流",
+            "评审",
+            "审查",
+            "调研",
+            "研究",
+            "对比",
+            "架构",
+            "重构",
+            "多文件",
+            "端到端",
+        )
+        return any(marker in text for marker in complex_markers)
+
    def from_json(self, text: str) -> TaskExecutionPlan:
        try:
-            payload = self._parse_json_object(text)
-            mode = str(payload.get("mode") or "single").strip().lower()
-            reason = str(payload.get("reason") or "")
-            if mode != "team":
-                return TaskExecutionPlan.single(reason or "planner_selected_single")
-
-            graph = self._graph_from_payload(payload)
-            graph.validate()
-            return TaskExecutionPlan(
-                mode="team",
-                reason=reason or "planner_selected_team",
-                graph=graph,
-                final_synthesis_instruction=str(payload.get("final_synthesis_instruction") or ""),
-            )
+            return self._from_json_or_raise(text)
        except Exception as exc:
            return TaskExecutionPlan.single("planner_fallback_single", fallback_error=str(exc))

-    def _graph_from_payload(self, payload: dict[str, Any]) -> ExecutionGraph:
+    def _from_json_or_raise(self, text: str) -> TaskExecutionPlan:
+        payload = self._parse_json_object(text)
+        mode = str(payload.get("mode") or "single").strip().lower()
+        reason = str(payload.get("reason") or "")
+        adaptation = self._adaptation_from_payload(payload)
+        if mode != "team":
+            return TaskExecutionPlan.single(
+                reason or "planner_selected_single",
+                planner_adaptation=adaptation,
+            )
+
+        graph = self._graph_from_payload(payload, adaptation=adaptation)
+        graph.validate(max_depth=self._MAX_DEPTH)
+        return TaskExecutionPlan(
+            mode="team",
+            reason=reason or "planner_selected_team",
+            graph=graph,
+            final_synthesis_instruction=str(payload.get("final_synthesis_instruction") or ""),
+            planner_adaptation=adaptation,
+        )
+
+    def _graph_from_payload(
+        self,
+        payload: dict[str, Any],
+        *,
+        adaptation: dict[str, Any],
+    ) -> ExecutionGraph:
        strategy = str(payload.get("strategy") or "sequence").strip().lower()
        if strategy not in self._SUPPORTED_STRATEGIES:
            raise ValueError(f"Unsupported team strategy: {strategy}")
@ -189,16 +353,27 @@ class TaskExecutionPlanner:
        for index, item in enumerate(raw_nodes, start=1):
            if not isinstance(item, dict):
                raise ValueError("Each team node must be an object")
-            agent_payload = item.get("agent") if isinstance(item.get("agent"), dict) else {}
-            skill_query = str(item.get("skill_query") or agent_payload.get("skill_query") or item.get("task") or "").strip()
-            requested_capabilities = _string_list(
-                item.get("required_capabilities") or item.get("capabilities") or agent_payload.get("capabilities")
-            )
-            requested_tags = _string_list(item.get("tags") or agent_payload.get("tags"))
-            node_id = str(item.get("node_id") or item.get("id") or agent_payload.get("name") or f"node_{index}").strip()
+            unsupported = sorted(set(item) - self._ALLOWED_NODE_FIELDS)
+            if unsupported:
+                raise ValueError(f"Unsupported team node field(s): {', '.join(unsupported)}")
+            node_id = str(item.get("node_id") or f"node_{index}").strip()
            task = str(item.get("task") or "").strip()
            if not node_id or not task:
-                raise ValueError("Each team node requires node_id/id and task")
+                raise ValueError("Each team node requires node_id and task")
+            allowed_tool_names = self._resolve_requested_tools(
+                item.get("requested_tools"),
+                warnings=adaptation["warnings"],
+            )
+            use_skill = _optional_str(item.get("use_skill"))
+            skill_query = _optional_str(item.get("skill_query")) or task
+            if use_skill is not None or "skill_query" in item:
+                adaptation.setdefault("node_skill_bindings", []).append(
+                    {
+                        "node_id": node_id,
+                        "use_skill": use_skill,
+                        "skill_query": skill_query,
+                    }
+                )
            nodes.append(
                ExecutionNode(
                    node_id=node_id,
@ -208,30 +383,147 @@ class TaskExecutionPlanner:
                        role="",
                        system_prompt="",
                        metadata={
+                            "use_skill": use_skill,
                            "skill_query": skill_query,
-                            "required_capabilities": requested_capabilities,
-                            "requested_tags": requested_tags,
+                            "required_capabilities": [],
+                            "requested_tags": [],
                            "sub_agent_kind": "generic_skill_worker",
                        },
                    ),
                    depends_on=[str(dep) for dep in item.get("depends_on") or []],
-                    inherited_pinned_skills=[str(name) for name in item.get("pinned_skills") or []],
                    constraints=[str(value) for value in item.get("constraints") or []],
-                    expected_output=str(item.get("expected_output") or "") or None,
+                    input_contract=_dict_value(item.get("input_contract")),
+                    output_contract=_dict_value(item.get("output_contract")),
+                    allowed_tool_names=allowed_tool_names,
+                    required_evidence=_string_list(item.get("required_evidence")),
+                    evidence_contract=_dict_value(item.get("evidence_contract")),
+                    validation_rules=_string_list(item.get("validation_rules")),
+                    required_for_completion=bool(item.get("required_for_completion", True)),
+                    block_downstream_on_partial=bool(item.get("block_downstream_on_partial", False)),
+                    max_tool_iterations=_optional_int(item.get("max_tool_iterations")),
                )
            )
        return ExecutionGraph(strategy=strategy, nodes=nodes)  # type: ignore[arg-type]

+    def _resolve_requested_tools(self, value: Any, *, warnings: list[str]) -> list[str] | None:
+        if value is None:
+            return None
+        result: list[str] = []
+        for name in _string_list(value):
+            if name.lower() in HIGH_RISK_PLANNER_TOOL_NAMES:
+                _append_unique(warnings, f"requires_high_risk_review: {name}")
+                continue
+            if self.tool_registry is None or self.tool_registry.get(name) is None:
+                _append_unique(warnings, f"unknown tool removed: {name}")
+                continue
+            result.append(name)
+        return result
+
+    @staticmethod
+    def _adaptation_from_payload(payload: dict[str, Any]) -> dict[str, Any]:
+        raw = payload.get("adaptation")
+        adaptation = dict(raw) if isinstance(raw, dict) else {}
+        adaptation["warnings"] = _string_list(adaptation.get("warnings"))
+        return adaptation
+
+    @staticmethod
+    def _select_team_template(
+        activated_skills: list[SkillContext],
+    ) -> tuple[SkillContext | None, dict[str, Any]]:
+        candidates = [
+            skill
+            for skill in activated_skills
+            if isinstance(skill.team_template, dict) and isinstance(skill.team_template.get("nodes"), list)
+        ]
+        selected = candidates[0] if candidates else None
+        warnings: list[str] = []
+        for skill in activated_skills:
+            for warning in skill.team_template_warnings:
+                _append_unique(warnings, f"{skill.name}: {warning}")
+        return selected, {
+            "template_used": False,
+            "selected_template": selected.name if selected else None,
+            "selection_reason": (
+                "first activated skill with a valid team template"
+                if selected
+                else "no activated skill has a valid team template"
+            ),
+            "ignored_templates": [skill.name for skill in candidates[1:]],
+            "warnings": warnings,
+        }
+
+    @staticmethod
+    def _merge_adaptation(plan: TaskExecutionPlan, base: dict[str, Any]) -> None:
+        payload = dict(plan.planner_adaptation)
+        warnings: list[str] = []
+        for warning in [*base.get("warnings", []), *payload.get("warnings", [])]:
+            _append_unique(warnings, str(warning))
+        merged = {
+            "template_used": bool(payload.get("template_used", False)),
+            "selected_template": base.get("selected_template"),
+            "selection_reason": base.get("selection_reason"),
+            "ignored_templates": list(base.get("ignored_templates", [])),
+            "warnings": warnings,
+        }
+        if isinstance(payload.get("node_skill_bindings"), list):
+            merged["node_skill_bindings"] = [dict(item) for item in payload["node_skill_bindings"] if isinstance(item, dict)]
+        plan.planner_adaptation = merged
+
+    @staticmethod
+    def _merge_skill_resolution_adaptation(
+        plan: TaskExecutionPlan,
+        reports: list[SkillResolutionReport],
+    ) -> None:
+        warnings = plan.planner_adaptation.setdefault("warnings", [])
+        bindings = plan.planner_adaptation.get("node_skill_bindings")
+        binding_by_node = {
+            str(item.get("node_id")): item
+            for item in bindings or []
+            if isinstance(item, dict)
+        }
+        for report in reports:
+            for warning in report.warnings:
+                _append_unique(warnings, warning)
+            binding = binding_by_node.get(report.node_id)
+            if binding is not None and report.requested_skill_name and not report.exact_binding_used:
+                binding["fallback_reason"] = f"use_skill unresolved; {report.reason}"
+
    @staticmethod
    def _prompt(
        *,
        task: TaskRecord,
        user_message: str,
        attempt_index: int,
+        skill_summaries: list[str] | None = None,
+        tool_hints: list[str] | None = None,
+        activated_skills: list[SkillContext] | None = None,
+        selected_template: SkillContext | None = None,
    ) -> str:
        history_note = ""
        if task.feedback:
            history_note = "\nRelevant task history:\n" + json.dumps(task.feedback[-5:], ensure_ascii=False)
+        skill_note = ""
+        if skill_summaries:
+            skill_note = "\nActivated skill summaries:\n" + "\n".join(f"- {item}" for item in skill_summaries)
+        guidance_note = ""
+        if activated_skills:
+            guidance_note = "\nActivated Skill guidance:\n" + "\n".join(
+                f"[{skill.name}]\n{skill.content}" for skill in activated_skills
+            )
+        template_note = ""
+        if selected_template is not None:
+            template_note = "\nPrimary Skill team template:\n" + json.dumps(
+                {
+                    "skill_name": selected_template.name,
+                    "skill_version": selected_template.version,
+                    "template": selected_template.team_template,
+                },
+                ensure_ascii=False,
+                indent=2,
+            )
+        tool_note = ""
+        if tool_hints:
+            tool_note = "\nActivated skill tool hints:\n" + "\n".join(f"- {item}" for item in tool_hints)
        return (
            "Decide execution mode for this internal Task attempt.\n"
            "Use mode=team only when independent research, review, implementation slices, or staged checks "
@ -241,13 +533,24 @@ class TaskExecutionPlanner:
            '  "mode": "single" | "team",\n'
            '  "reason": "short reason",\n'
            '  "strategy": "sequence" | "parallel" | "dag",\n'
-            '  "nodes": [{"node_id": "api_review", "task": "...", "skill_query": "API contract review", '
-            '"required_capabilities": ["schema compatibility"], "depends_on": []}],\n'
+            '  "nodes": [{"node_id": "collect", "task": "...", "use_skill": "optional exact skill", '
+            '"skill_query": "optional dynamic skill query", "depends_on": [], '
+            '"input_contract": {}, "output_contract": {}, "requested_tools": [], '
+            '"required_evidence": [], "evidence_contract": {}, "validation_rules": [], '
+            '"required_for_completion": true, "block_downstream_on_partial": false, '
+            '"max_tool_iterations": 3, "constraints": []}],\n'
+            '  "adaptation": {"template_used": true, "warnings": []},\n'
            '  "final_synthesis_instruction": "how the main agent should synthesize team output"\n'
            "}\n\n"
+            "Node definitions are task-only. Never output agent or role fields. Use at most one primary "
+            "Skill template; treat all other activated Skills as guidance.\n\n"
            f"Task goal:\n{task.goal}\n\n"
            f"Current user request:\n{user_message}\n\n"
            f"Attempt index: {attempt_index}\n"
+            f"{skill_note}"
+            f"{guidance_note}"
+            f"{template_note}"
+            f"{tool_note}"
            f"{history_note}"
        )

@ -275,6 +578,26 @@ def _optional_str(value: Any) -> str | None:
    return text or None


+def _optional_int(value: Any) -> int | None:
+    if value in (None, ""):
+        return None
+    if isinstance(value, bool):
+        raise ValueError("max_tool_iterations must be an integer")
+    result = int(value)
+    if result < 0:
+        raise ValueError("max_tool_iterations must be non-negative")
+    return result
+
+
+def _dict_value(value: Any) -> dict[str, Any]:
+    return dict(value) if isinstance(value, dict) else {}
+
+
+def _append_unique(values: list[str], value: str) -> None:
+    if value and value not in values:
+        values.append(value)
+
+
 def _string_list(value: Any) -> list[str]:
    if not isinstance(value, list):
        if isinstance(value, str):
--- a/app-instance/backend/beaver/tasks/router.py
+++ b/app-instance/backend/beaver/tasks/router.py
@ -4,6 +4,7 @@ from __future__ import annotations

 import asyncio
 import json
+import re
 from typing import Any

 from .models import MainAgentDecision, TaskRecord
@ -24,6 +25,15 @@ class MainAgentRouter:
        thinking_enabled: bool | None = None,
        timeout_seconds: float = 8.0,
    ) -> MainAgentDecision:
+        if active_task is None and _is_obvious_simple_chat(message):
+            return MainAgentDecision(mode="simple", reason="obvious_simple_chat", action="simple_chat")
+        if active_task is None and _is_obvious_task_request(message):
+            return MainAgentDecision(
+                mode="task",
+                reason="obvious_task",
+                starts_new_task=True,
+                action="create_task",
+            )
        if provider is None:
            return self._apply_active_task_boundary(
                self._fallback(active_task=active_task, reason="router_provider_unavailable"),
@ -246,6 +256,64 @@ def _clean_short_title(value: Any) -> str | None:
    return title[:40] or None


+def _is_obvious_simple_chat(message: str) -> bool:
+    text = _compact_text(message).lower().strip("!！?？。.,，~～")
+    if not text:
+        return False
+    if _has_url_or_path(text) or _looks_like_fresh_task_request(text):
+        return False
+    if len(text) <= 24 and text in {
+        "hi",
+        "hello",
+        "hey",
+        "thanks",
+        "thankyou",
+        "thankyou!",
+        "谢谢",
+        "谢了",
+        "多谢",
+        "你好",
+        "您好",
+        "嗨",
+        "在吗",
+        "早上好",
+        "下午好",
+        "晚上好",
+        "辛苦了",
+    }:
+        return True
+    simple_prefixes = (
+        "翻译",
+        "translate",
+        "润色",
+        "改写",
+        "校对",
+        "总结下面",
+        "总结这段",
+        "摘要下面",
+        "summarize this",
+    )
+    return len(text) <= 1200 and text.startswith(simple_prefixes)
+
+
+def _is_obvious_task_request(message: str) -> bool:
+    text = _compact_text(message)
+    if not text:
+        return False
+    if _looks_like_explicit_task_followup(text):
+        return False
+    if _has_url_or_path(text):
+        return True
+    return _looks_like_fresh_task_request(text)
+
+
+def _has_url_or_path(text: str) -> bool:
+    return bool(
+        re.search(r"https?://|www\.", text)
+        or re.search(r"(^|[\s'\"`])(?:[./~]|[a-zA-Z]:[\\/])[^\s'\"`]+", text)
+    )
+
+
 def _looks_like_explicit_task_followup(message: str) -> bool:
    text = _compact_text(message)
    if not text:
@ -307,6 +375,16 @@ def _looks_like_fresh_task_request(message: str) -> bool:
        "看看最新",
        "最新",
        "今天",
+        "昨天",
+        "昨日",
+        "昨晚",
+        "刚刚",
+        "最近",
+        "近期",
+        "本届",
+        "本场",
+        "这场",
+        "上一场",
        "明天",
        "上传",
        "下载",
@ -324,6 +402,12 @@ def _looks_like_fresh_task_request(message: str) -> bool:
        "look up",
        "latest",
        "today",
+        "yesterday",
+        "last night",
+        "recent",
+        "recently",
+        "this match",
+        "this game",
        "tomorrow",
        "upload",
        "download",
--- a/app-instance/backend/beaver/tasks/skill_resolver.py
+++ b/app-instance/backend/beaver/tasks/skill_resolver.py
@ -7,9 +7,11 @@ from dataclasses import dataclass, field, replace
 from typing import Any

 from beaver.coordinator.models import AgentDescriptor, ExecutionGraph, ExecutionNode
+from beaver.engine.context import SkillContext
 from beaver.engine.providers import ProviderBundle
 from beaver.skills.assembler.embedding_retriever import SkillEmbeddingRetriever
 from beaver.skills.catalog.loader import SkillsLoader
+from beaver.skills.catalog.utils import strip_frontmatter
 from beaver.skills.drafts import DraftService
 from beaver.skills.learning import EphemeralGuidanceSynthesizer
 from beaver.tasks.models import TaskRecord
@ -24,6 +26,9 @@ class SkillResolutionReport:
    ephemeral_guidance_id: str | None = None
    ephemeral_guidance_name: str | None = None
    ephemeral_used: bool = False
+    requested_skill_name: str | None = None
+    exact_binding_used: bool = False
+    warnings: list[str] = field(default_factory=list)
    reason: str = ""

    def to_dict(self) -> dict[str, Any]:
@ -35,6 +40,9 @@ class SkillResolutionReport:
            "ephemeral_guidance_id": self.ephemeral_guidance_id,
            "ephemeral_guidance_name": self.ephemeral_guidance_name,
            "ephemeral_used": self.ephemeral_used,
+            "requested_skill_name": self.requested_skill_name,
+            "exact_binding_used": self.exact_binding_used,
+            "warnings": list(self.warnings),
            "reason": self.reason,
        }

@ -87,12 +95,45 @@ class TaskSkillResolver:
        attempt_index: int,
        provider_bundle: ProviderBundle,
    ) -> tuple[ExecutionNode, SkillResolutionReport]:
+        use_skill = str(node.agent.metadata.get("use_skill") or "").strip()
        skill_query = str(node.agent.metadata.get("skill_query") or node.task or node.node_id).strip()
+        warnings: list[str] = []
        required_capabilities = [
            str(item).strip()
            for item in node.agent.metadata.get("required_capabilities", [])
            if str(item).strip()
        ]
+        if use_skill:
+            exact_context = self._load_exact_skill_context(use_skill)
+            if exact_context is not None:
+                resolved = self._generic_node(
+                    node,
+                    pinned_skill_names=_merge_names(node.inherited_pinned_skills, [use_skill]),
+                    pinned_skill_contexts=_merge_skill_contexts(
+                        node.inherited_pinned_skill_contexts,
+                        [exact_context],
+                    ),
+                    metadata={
+                        **node.agent.metadata,
+                        "use_skill": use_skill,
+                        "skill_query": skill_query,
+                        "required_capabilities": required_capabilities,
+                        "selected_skill_names": [use_skill],
+                        "ephemeral_skill_names": [],
+                        "exact_binding_used": True,
+                    },
+                )
+                return resolved, SkillResolutionReport(
+                    node_id=node.node_id,
+                    skill_query=skill_query,
+                    required_capabilities=required_capabilities,
+                    selected_skill_names=[use_skill],
+                    requested_skill_name=use_skill,
+                    exact_binding_used=True,
+                    reason="exact use_skill binding",
+                )
+            warnings.append(f"use_skill unresolved: {use_skill}")
+
        if self._is_summary_only_node(node, skill_query=skill_query, required_capabilities=required_capabilities):
            resolved = self._generic_node(
                node,
@ -104,6 +145,7 @@ class TaskSkillResolver:
                    "required_capabilities": required_capabilities,
                    "selected_skill_names": [],
                    "ephemeral_skill_names": [],
+                    "exact_binding_used": False,
                    "summary_uses_dependency_outputs_only": True,
                },
            )
@ -113,6 +155,9 @@ class TaskSkillResolver:
                required_capabilities=required_capabilities,
                selected_skill_names=[],
                ephemeral_used=False,
+                requested_skill_name=use_skill or None,
+                exact_binding_used=False,
+                warnings=warnings,
                reason="summary node uses dependency outputs directly",
            )

@ -141,6 +186,7 @@ class TaskSkillResolver:
                    "required_capabilities": required_capabilities,
                    "selected_skill_names": selected,
                    "ephemeral_skill_names": [],
+                    "exact_binding_used": False,
                },
            )
            return resolved, SkillResolutionReport(
@ -149,6 +195,9 @@ class TaskSkillResolver:
                required_capabilities=required_capabilities,
                selected_skill_names=selected,
                ephemeral_used=False,
+                requested_skill_name=use_skill or None,
+                exact_binding_used=False,
+                warnings=warnings,
                reason="matched published skill",
            )

@ -174,6 +223,7 @@ class TaskSkillResolver:
                "ephemeral_guidance_id": missing.guidance_id,
                "ephemeral_guidance_name": missing.guidance_name,
                "ephemeral_skill_names": [missing.skill_context.name],
+                "exact_binding_used": False,
            },
        )
        return resolved, SkillResolutionReport(
@ -183,9 +233,27 @@ class TaskSkillResolver:
            ephemeral_guidance_id=missing.guidance_id,
            ephemeral_guidance_name=missing.guidance_name,
            ephemeral_used=True,
+            requested_skill_name=use_skill or None,
+            exact_binding_used=False,
+            warnings=warnings,
            reason="generated ephemeral guidance for missing sub-agent capability",
        )

+    def _load_exact_skill_context(self, name: str) -> SkillContext | None:
+        record = self.skills_loader.get_skill_record(name)
+        raw_content = self.skills_loader.load_published_skill(name)
+        content = strip_frontmatter(raw_content).strip() if raw_content else ""
+        if record is None or not content:
+            return None
+        return SkillContext(
+            name=name,
+            content=content,
+            version=record.version,
+            content_hash=record.content_hash or "",
+            activation_reason="explicit_node_binding",
+            tool_hints=list(record.tool_hints),
+        )
+
    async def _select_published_skills(self, *, query: str, provider_bundle: ProviderBundle) -> list[str]:
        candidates = self.skills_loader.build_selection_candidates()
        if not candidates:
@ -336,3 +404,14 @@ def _merge_names(parent: list[str], selected: list[str]) -> list[str]:
        if name and name not in result:
            result.append(name)
    return result
+
+
+def _merge_skill_contexts(parent: list[SkillContext], selected: list[SkillContext]) -> list[SkillContext]:
+    result: list[SkillContext] = []
+    seen: set[str] = set()
+    for context in [*parent, *selected]:
+        if context.name in seen:
+            continue
+        seen.add(context.name)
+        result.append(context)
+    return result