feat(beaver): 完成Task Team功能v1实现，重构后端架构支持统一内核

新增内部Task系统，包括验证、反馈门控机制，实现自动质量验证 (通过率>=0.75)和用户反馈闭环(satisfied/revise/abandon)。实现Agent Team v1协调器，支持sequence/parallel/dag执行策略， sub-agent复用主AgentLoop，每个run使用独立memory snapshot。建立Skill学习pipeline，包含draft/审核/发布/回滚完整生命周期，通过Task验证通过且用户满意才生成学习候选。重构目录结构，移除third_party依赖，建立统一engine内核，所有agent共享运行时基础组件。更新ContextBuilder清理provider消息字段，增强SkillContext版本管理，集成TaskExecutionPlanner和TaskSkillResolver实现技能解析机制。
2026-05-08 17:14:14 +08:00
parent 5ba5c7e4c1
commit 8a12c30141
93 changed files with 16724 additions and 1247 deletions
--- a/app-instance/backend/beaver/services/agent_service.py
+++ b/app-instance/backend/beaver/services/agent_service.py
@ -15,9 +15,13 @@ from __future__ import annotations
 import asyncio
 from pathlib import Path
 from typing import Any
+from uuid import uuid4

+from beaver.coordinator.models import ExecutionNode, TeamRunResult
 from beaver.engine import AgentLoop, AgentProfile, AgentRunResult, EngineLoader
+from beaver.engine.providers import make_provider_bundle
 from beaver.foundation.events import InboundMessage, OutboundMessage
+from beaver.tasks import MainAgentRouter, TaskExecutionPlan, TaskRecord, ValidationResult


 class AgentService:
@ -45,6 +49,7 @@ class AgentService:
        self.loader = loader or EngineLoader(workspace=workspace, config_path=config_path)
        self._loop: AgentLoop | None = None
        self._run_task: asyncio.Task[None] | None = None
+        self._main_agent_router = MainAgentRouter()

    def create_loop(self) -> AgentLoop:
        """创建并缓存当前 service 使用的 AgentLoop。"""
@ -176,7 +181,7 @@ class AgentService:
                "use 'await AgentService.submit_direct(...)' after start()."
            )
        loop = self.create_loop()
-        return await loop.process_direct(message, **kwargs)
+        return await self._process_with_main_agent(message, runner=loop.process_direct, kwargs=kwargs)

    async def submit_direct(
        self,
@ -189,7 +194,502 @@ class AgentService:
        """

        loop = self.create_loop()
-        return await loop.submit_direct(message, **kwargs)
+        return await self._process_with_main_agent(message, runner=loop.submit_direct, kwargs=kwargs)
+
+    async def submit_feedback(
+        self,
+        *,
+        session_id: str,
+        run_id: str,
+        feedback_type: str,
+        comment: str | None = None,
+    ) -> dict[str, Any]:
+        """Record chat feedback for the internal task linked to a run."""
+
+        loaded = self.create_loop().boot()
+        task_service = self._require_loaded(loaded, "task_service")
+        task = task_service.get_task_by_run_id(run_id)
+        if task is None or task.session_id != session_id:
+            raise ValueError(f"No internal task found for run_id={run_id!r}")
+
+        normalized = feedback_type.strip().lower()
+        if normalized not in {"satisfied", "revise", "abandon"}:
+            raise ValueError("feedback_type must be one of: satisfied, revise, abandon")
+
+        already_recorded = any(
+            item.get("run_id") == run_id and item.get("feedback_type") == normalized
+            for item in task.feedback
+        )
+        conflicting_feedback = next(
+            (
+                item
+                for item in task.feedback
+                if item.get("run_id") == run_id and item.get("feedback_type") != normalized
+            ),
+            None,
+        )
+        if conflicting_feedback is not None:
+            raise ValueError(
+                f"Feedback for run_id={run_id!r} was already recorded as "
+                f"{conflicting_feedback.get('feedback_type')!r}"
+            )
+        if task.status in {"closed", "abandoned"} and not already_recorded:
+            raise ValueError(f"Task {task.task_id} is already finalized as {task.status!r}")
+        updated = task if already_recorded else task_service.add_feedback(
+            task.task_id,
+            feedback_type=normalized,
+            comment=comment,
+            run_id=run_id,
+        )
+        session_manager = self._require_loaded(loaded, "session_manager")
+        session_manager.update_latest_assistant_event_payload(
+            session_id,
+            run_id,
+            {
+                "task_id": updated.task_id,
+                "task_status": updated.status,
+                "feedback_state": normalized,
+            },
+        )
+        if not already_recorded:
+            session_manager.append_message(
+                session_id,
+                run_id=run_id,
+                role="system",
+                event_type="task_feedback_recorded",
+                event_payload={
+                    "task_id": task.task_id,
+                    "feedback_type": normalized,
+                    "comment": comment,
+                    "task_status": updated.status,
+                },
+                content=comment,
+                context_visible=False,
+            )
+
+        generated_candidates = []
+        validation = ValidationResult.from_dict(updated.validation_result)
+        if already_recorded:
+            generated_candidates = []
+        elif normalized == "satisfied" and validation is not None and validation.accepted:
+            skill_learning_service = self._require_loaded(loaded, "skill_learning_service")
+            generated_candidates = [item.to_dict() for item in skill_learning_service.build_learning_candidates()]
+        elif normalized == "abandon":
+            memory_service = self._require_loaded(loaded, "memory_service")
+            memory_service.get_store().add(
+                "memory",
+                (
+                    f"Failure memory: task {task.task_id} in session {session_id} was abandoned. "
+                    f"Reason: {(comment or 'not specified').strip()}"
+                ),
+            )
+
+        return {
+            "session_id": session_id,
+            "run_id": run_id,
+            "task_id": updated.task_id,
+            "task_status": updated.status,
+            "feedback_type": normalized,
+            "learning_candidates": generated_candidates,
+        }
+
+    async def _process_with_main_agent(
+        self,
+        message: str,
+        *,
+        runner: Any,
+        kwargs: dict[str, Any],
+    ) -> AgentRunResult:
+        loaded = self.create_loop().boot()
+        task_service = self._require_loaded(loaded, "task_service")
+        session_id = kwargs.get("session_id") or uuid4().hex
+        kwargs = dict(kwargs)
+        kwargs["session_id"] = session_id
+
+        active_task = task_service.get_latest_open_task(session_id)
+        decision = self._main_agent_router.classify(message, active_task=active_task)
+        if not decision.is_task:
+            return await runner(message, **kwargs)
+
+        task = (
+            task_service.create_task(
+                session_id=session_id,
+                description=message,
+                metadata={"router_reason": decision.reason},
+            )
+            if active_task is None or decision.starts_new_task
+            else active_task
+        )
+        return await self._run_task_mode(message, runner=runner, kwargs=kwargs, task=task)
+
+    async def _run_task_mode(
+        self,
+        message: str,
+        *,
+        runner: Any,
+        kwargs: dict[str, Any],
+        task: TaskRecord,
+    ) -> AgentRunResult:
+        loaded = self.create_loop().boot()
+        task_service = self._require_loaded(loaded, "task_service")
+        validation_service = self._require_loaded(loaded, "validation_service")
+        task_execution_planner = self._require_loaded(loaded, "task_execution_planner")
+        session_manager = self._require_loaded(loaded, "session_manager")
+        run_memory_store = self._require_loaded(loaded, "run_memory_store")
+
+        last_result: AgentRunResult | None = None
+        latest_validation: ValidationResult | None = None
+        base_execution_context = kwargs.get("execution_context")
+        provider_bundle = kwargs.get("provider_bundle") or self._make_provider_bundle_for_task(loaded, kwargs)
+        kwargs = dict(kwargs)
+        team_provider_bundle_factory = kwargs.pop("team_provider_bundle_factory", None)
+        kwargs["provider_bundle"] = provider_bundle
+
+        for attempt_index in (1, 2):
+            task_service.start_run(task.task_id, user_message=message, attempt_index=attempt_index)
+            plan = await task_execution_planner.plan(
+                task=task,
+                user_message=message,
+                attempt_index=attempt_index,
+                latest_validation=latest_validation,
+                provider_bundle=provider_bundle,
+            )
+            self._append_task_observation(
+                session_manager,
+                task.session_id,
+                event_type="task_execution_planned",
+                payload={
+                    "task_id": task.task_id,
+                    "attempt_index": attempt_index,
+                    **plan.to_event_payload(),
+                },
+            )
+            team_summaries: list[str] = []
+            team_execution_context = ""
+            if plan.is_team:
+                team_result, team_error = await self._run_team_for_task(
+                    plan,
+                    task=task,
+                    parent_session_id=kwargs["session_id"],
+                    provider_bundle_factory=team_provider_bundle_factory
+                    or self._build_team_provider_bundle_factory(loaded, kwargs),
+                )
+                if team_result is not None:
+                    team_summaries = [self._team_summary_for_validation(team_result)]
+                    team_execution_context = self._team_execution_context(plan, team_result)
+                    self._append_task_observation(
+                        session_manager,
+                        task.session_id,
+                        event_type="task_team_run_completed" if team_result.success else "task_team_run_failed",
+                        payload={
+                            "task_id": task.task_id,
+                            "attempt_index": attempt_index,
+                            "plan_mode": plan.mode,
+                            "strategy": plan.graph.strategy if plan.graph else None,
+                            "node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
+                            "team_run_ids": team_result.run_ids,
+                            "team_success": team_result.success,
+                            "node_results": self._team_node_results_for_event(plan, team_result),
+                            "reason": plan.reason,
+                            "error": None if team_result.success else "one or more team nodes failed",
+                        },
+                    )
+                else:
+                    team_summaries = [f"Team execution failed: {team_error}"]
+                    team_execution_context = self._failed_team_execution_context(plan, team_error or "unknown error")
+                    self._append_task_observation(
+                        session_manager,
+                        task.session_id,
+                        event_type="task_team_run_failed",
+                        payload={
+                            "task_id": task.task_id,
+                            "attempt_index": attempt_index,
+                            "plan_mode": plan.mode,
+                            "strategy": plan.graph.strategy if plan.graph else None,
+                            "node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
+                            "team_run_ids": [],
+                            "team_success": False,
+                            "reason": plan.reason,
+                            "error": team_error,
+                        },
+                    )
+
+            attempt_kwargs = dict(kwargs)
+            attempt_kwargs.update(
+                {
+                    "task_id": task.task_id,
+                    "task_mode": True,
+                    "attempt_index": attempt_index,
+                    "learning_candidate_enabled": False,
+                }
+            )
+            if attempt_index == 2 and latest_validation is not None:
+                revision_context = latest_validation.recommended_revision_prompt.strip()
+                if revision_context:
+                    attempt_kwargs["execution_context"] = self._join_context(
+                        base_execution_context,
+                        f"Task validation revision request:\n{revision_context}",
+                        team_execution_context,
+                    )
+            elif team_execution_context:
+                attempt_kwargs["execution_context"] = self._join_context(base_execution_context, team_execution_context)
+
+            result = await runner(message, **attempt_kwargs)
+            last_result = result
+            self._append_task_observation(
+                session_manager,
+                task.session_id,
+                event_type="task_synthesis_completed",
+                payload={
+                    "task_id": task.task_id,
+                    "attempt_index": attempt_index,
+                    "main_run_id": result.run_id,
+                    "plan_mode": plan.mode,
+                    "strategy": plan.graph.strategy if plan.graph else None,
+                },
+            )
+            task = task_service.append_run(
+                task.task_id,
+                result.run_id,
+                skill_names=self._skill_names_for_run(loaded, result.run_id),
+            )
+            validation = await validation_service.validate_task_result(
+                task=task,
+                user_message=message,
+                final_output=result.output_text,
+                transcript_excerpt=self._run_excerpt(session_manager, result.session_id, result.run_id),
+                tool_summaries=self._tool_summaries(session_manager, result.session_id, result.run_id),
+                team_summaries=team_summaries,
+                provider_bundle=provider_bundle,
+            )
+            latest_validation = validation
+            task = task_service.record_validation(task.task_id, result.run_id, validation)
+            run_memory_store.update_run_record(result.run_id, validation_result=validation.to_dict())
+            session_manager.update_latest_assistant_event_payload(
+                result.session_id,
+                result.run_id,
+                {
+                    "task_id": task.task_id,
+                    "task_status": task.status,
+                    "validation_status": "passed" if validation.accepted else "failed",
+                },
+            )
+            session_manager.append_message(
+                result.session_id,
+                run_id=result.run_id,
+                role="system",
+                event_type="task_validation_snapshotted",
+                event_payload={
+                    "task_id": task.task_id,
+                    "attempt_index": attempt_index,
+                    "validation_result": validation.to_dict(),
+                    "retry_scheduled": not validation.accepted and attempt_index == 1,
+                },
+                content=validation.recommended_revision_prompt or None,
+                context_visible=False,
+            )
+            if not validation.accepted and attempt_index == 1:
+                session_manager.set_run_context_visible(result.session_id, result.run_id, False)
+            result.task_id = task.task_id
+            result.task_status = task.status
+            result.validation_result = validation.to_dict()
+            if validation.accepted or attempt_index == 2:
+                return result
+
+        if last_result is None:  # pragma: no cover - defensive
+            raise RuntimeError("Task mode did not produce a run result")
+        return last_result
+
+    async def _run_team_for_task(
+        self,
+        plan: TaskExecutionPlan,
+        *,
+        task: TaskRecord,
+        parent_session_id: str,
+        provider_bundle_factory: Any,
+    ) -> tuple[TeamRunResult | None, str | None]:
+        if plan.graph is None:
+            return None, "team plan did not include an execution graph"
+        try:
+            from beaver.services.team_service import TeamService
+
+            result = await TeamService(self.create_loop()).run_team(
+                plan.graph,
+                parent_task_id=task.task_id,
+                parent_session_id=parent_session_id,
+                parent_run_id=None,
+                provider_bundle_factory=provider_bundle_factory,
+                learning_candidate_enabled=False,
+            )
+            return result, None
+        except Exception as exc:
+            return None, str(exc)
+
+    @staticmethod
+    def _require_loaded(loaded: Any, field_name: str) -> Any:
+        value = getattr(loaded, field_name)
+        if value is None:
+            raise RuntimeError(f"Engine loader did not provide required dependency {field_name!r}")
+        return value
+
+    @staticmethod
+    def _skill_names_for_run(loaded: Any, run_id: str) -> list[str]:
+        store = getattr(loaded, "run_memory_store", None)
+        if store is None:
+            return []
+        for record in store.list_runs():
+            if record.run_id == run_id:
+                return [receipt.skill_name for receipt in record.activated_skills]
+        return []
+
+    @staticmethod
+    def _run_excerpt(session_manager: Any, session_id: str, run_id: str) -> str:
+        lines = []
+        for event in session_manager.get_run_event_records(session_id, run_id):
+            if event.context_visible and event.content:
+                lines.append(f"{event.role}: {event.content.strip()}")
+        return "\n".join(lines[:12])[:2400]
+
+    @staticmethod
+    def _tool_summaries(session_manager: Any, session_id: str, run_id: str) -> list[str]:
+        summaries = []
+        for event in session_manager.get_run_event_records(session_id, run_id):
+            if event.event_type != "tool_result_recorded":
+                continue
+            text = (event.content or "").strip()
+            if text:
+                summaries.append(f"{event.tool_name or 'tool'}: {text[:500]}")
+        return summaries[:12]
+
+    @staticmethod
+    def _append_task_observation(
+        session_manager: Any,
+        session_id: str,
+        *,
+        event_type: str,
+        payload: dict[str, Any],
+    ) -> None:
+        session_manager.append_message(
+            session_id,
+            role="system",
+            event_type=event_type,
+            event_payload=payload,
+            content=payload.get("reason") or payload.get("error"),
+            context_visible=False,
+        )
+
+    @staticmethod
+    def _join_context(*parts: str | None) -> str:
+        return "\n\n".join(part.strip() for part in parts if part and part.strip())
+
+    @staticmethod
+    def _team_summary_for_validation(result: TeamRunResult) -> str:
+        lines = [
+            f"success={result.success}",
+            f"task_id={result.task_id or ''}",
+            "summary:",
+            result.summary,
+            "nodes:",
+        ]
+        for node in result.node_results:
+            lines.append(
+                f"- {node.node_id}: success={node.success} finish_reason={node.finish_reason} "
+                f"error={node.error or ''} output={node.output_text[:500]}"
+            )
+        return "\n".join(lines)
+
+    @staticmethod
+    def _team_node_results_for_event(plan: TaskExecutionPlan, result: TeamRunResult) -> list[dict[str, Any]]:
+        nodes = {node.node_id: node for node in plan.graph.nodes} if plan.graph else {}
+        payloads: list[dict[str, Any]] = []
+        for item in result.node_results:
+            payload = item.to_dict()
+            node = nodes.get(item.node_id)
+            if node is not None:
+                payload["selected_skill_names"] = list(node.inherited_pinned_skills)
+                payload["ephemeral_skill_names"] = [
+                    skill.name for skill in node.inherited_pinned_skill_contexts
+                ]
+                payload["skill_query"] = node.agent.metadata.get("skill_query")
+                payload["generated_skill_draft_id"] = node.agent.metadata.get("generated_skill_draft_id")
+                payload["generated_skill_name"] = node.agent.metadata.get("generated_skill_name")
+                payload["ephemeral_used"] = bool(node.inherited_pinned_skill_contexts)
+            payloads.append(payload)
+        return payloads
+
+    @staticmethod
+    def _team_execution_context(plan: TaskExecutionPlan, result: TeamRunResult) -> str:
+        node_lines = [
+            (
+                f"- {node.node_id}: success={node.success}, finish_reason={node.finish_reason}, "
+                f"run_id={node.run_id or ''}, error={node.error or ''}\n{node.output_text}"
+            )
+            for node in result.node_results
+        ]
+        return "\n\n".join(
+            item
+            for item in [
+                "Task team execution result:",
+                f"Planner reason: {plan.reason}",
+                f"Strategy: {plan.graph.strategy if plan.graph else ''}",
+                f"Team success: {result.success}",
+                f"Team summary:\n{result.summary}",
+                "Node results:\n" + "\n\n".join(node_lines),
+                (
+                    "Final synthesis instruction:\n" + plan.final_synthesis_instruction
+                    if plan.final_synthesis_instruction
+                    else None
+                ),
+                "Use the team outputs as internal evidence. Produce the final user-facing answer yourself.",
+            ]
+            if item
+        )
+
+    @staticmethod
+    def _failed_team_execution_context(plan: TaskExecutionPlan, error: str) -> str:
+        return "\n\n".join(
+            [
+                "Task team execution failed before final synthesis.",
+                f"Planner reason: {plan.reason}",
+                f"Strategy: {plan.graph.strategy if plan.graph else ''}",
+                f"Error: {error}",
+                "Proceed as the main agent and produce the best possible final answer.",
+            ]
+        )
+
+    def _build_team_provider_bundle_factory(self, loaded: Any, kwargs: dict[str, Any]) -> Any:
+        def factory(node: ExecutionNode) -> Any:
+            node_kwargs = dict(kwargs)
+            node_kwargs.pop("provider_bundle", None)
+            if node.agent.model:
+                node_kwargs["model"] = node.agent.model
+            if node.agent.provider_name:
+                node_kwargs["provider_name"] = node.agent.provider_name
+            return self._make_provider_bundle_for_task(loaded, node_kwargs)
+
+        return factory
+
+    def _make_provider_bundle_for_task(self, loaded: Any, kwargs: dict[str, Any]) -> Any:
+        config = loaded.config
+        configured_provider = config.resolve_provider_target(
+            model=kwargs.get("model"),
+            provider_name=kwargs.get("provider_name"),
+        )
+        resolved_model = configured_provider.get("model") or self.profile.default_model
+        resolved_provider_name = configured_provider.get("provider_name") or kwargs.get("provider_name")
+        return make_provider_bundle(
+            model=resolved_model,
+            provider_name=resolved_provider_name,
+            api_key=kwargs.get("api_key") or configured_provider.get("api_key"),
+            api_base=kwargs.get("api_base") or configured_provider.get("api_base"),
+            request_timeout_seconds=configured_provider.get("request_timeout_seconds"),
+            extra_headers=kwargs.get("extra_headers") or configured_provider.get("extra_headers"),
+            routing=kwargs.get("routing"),
+            fallback_target=kwargs.get("fallback_target"),
+            auxiliary_target=kwargs.get("auxiliary_target"),
+            embedding_target=kwargs.get("embedding_target") or config.resolve_embedding_target(),
+            embedding_model=kwargs.get("embedding_model") or config.default_embedding_model,
+        )

    async def handle_inbound_message(self, inbound: InboundMessage) -> OutboundMessage:
        """把 bus inbound 映射成标准 runtime 调用，并返回结构化 outbound。"""
@ -207,9 +707,26 @@ class AgentService:
                embedding_model=inbound.embedding_model,
            )
        except Exception as exc:
-            return self.build_outbound_error(inbound, detail=str(exc))
+            return self.build_outbound_error(
+                inbound,
+                detail=str(exc),
+                finish_reason=self._classify_inbound_failure(exc),
+            )
        return self.build_outbound_message(inbound, result)

+    @staticmethod
+    def _classify_inbound_failure(exc: Exception) -> str:
+        """把 runtime 异常收口为更稳定的 bus finish reason。"""
+
+        if isinstance(exc, RuntimeError):
+            detail = str(exc)
+            if (
+                "requires an active run() loop" in detail
+                or "not accepting new tasks after stop()" in detail
+            ):
+                return "stopped"
+        return "error"
+
    @staticmethod
    def build_outbound_message(inbound: InboundMessage, result: AgentRunResult) -> OutboundMessage:
        """把一次 runtime 正常结果转成 bus outbound。"""
@ -224,7 +741,12 @@ class AgentService:
            provider_name=result.provider_name,
            model=result.model,
            usage=dict(result.usage),
-            metadata={"inbound_metadata": dict(inbound.metadata)},
+            metadata={
+                "inbound_metadata": dict(inbound.metadata),
+                "task_id": getattr(result, "task_id", None),
+                "task_status": getattr(result, "task_status", None),
+                "validation_result": getattr(result, "validation_result", None),
+            },
        )

    @staticmethod