feat(beaver): 完成Task Team功能v1实现,重构后端架构支持统一内核

新增内部Task系统,包括验证、反馈门控机制,实现自动质量验证
(通过率>=0.75)和用户反馈闭环(satisfied/revise/abandon)。

实现Agent Team v1协调器,支持sequence/parallel/dag执行策略,
sub-agent复用主AgentLoop,每个run使用独立memory snapshot。

建立Skill学习pipeline,包含draft/审核/发布/回滚完整生命周期,
通过Task验证通过且用户满意才生成学习候选。

重构目录结构,移除third_party依赖,建立统一engine内核,
所有agent共享运行时基础组件。

更新ContextBuilder清理provider消息字段,增强SkillContext版本管理,
集成TaskExecutionPlanner和TaskSkillResolver实现技能解析机制。
This commit is contained in:
2026-05-08 17:14:14 +08:00
parent 5ba5c7e4c1
commit 8a12c30141
93 changed files with 16724 additions and 1247 deletions

View File

@ -4,10 +4,15 @@ from __future__ import annotations
import asyncio
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Any
from uuid import uuid4
from beaver.engine.context import ContextBuildInput, SessionContext
from beaver.engine.context import ContextBuildInput, SessionContext, SkillContext
from beaver.memory.runs import RunRecord, SkillEffectRecord
from beaver.skills.learning import RunReceiptContext
from beaver.skills.catalog.utils import strip_frontmatter
from beaver.skills.specs import SkillActivationReceipt
from beaver.engine.providers import ProviderBundle, make_provider_bundle
from beaver.tools import ToolContext
@ -38,6 +43,9 @@ class AgentRunResult:
provider_name: str | None = None
model: str | None = None
usage: dict[str, Any] = field(default_factory=dict)
task_id: str | None = None
task_status: str | None = None
validation_result: dict[str, Any] | None = None
@dataclass(slots=True)
@ -196,6 +204,13 @@ class AgentLoop:
temperature: float | None = None,
max_tool_iterations: int | None = None,
provider_bundle: ProviderBundle | None = None,
parent_session_id: str | None = None,
task_id: str | None = None,
task_mode: bool = False,
attempt_index: int | None = None,
pinned_skill_names: list[str] | None = None,
pinned_skill_contexts: list[SkillContext] | None = None,
learning_candidate_enabled: bool = False,
) -> AgentRunResult:
"""跑通最小 direct run 主链。
@ -233,6 +248,13 @@ class AgentLoop:
temperature=temperature,
max_tool_iterations=max_tool_iterations,
provider_bundle=provider_bundle,
parent_session_id=parent_session_id,
task_id=task_id,
task_mode=task_mode,
attempt_index=attempt_index,
pinned_skill_names=pinned_skill_names,
pinned_skill_contexts=pinned_skill_contexts,
learning_candidate_enabled=learning_candidate_enabled,
)
async def _process_direct_impl(
@ -258,6 +280,13 @@ class AgentLoop:
temperature: float | None = None,
max_tool_iterations: int | None = None,
provider_bundle: ProviderBundle | None = None,
parent_session_id: str | None = None,
task_id: str | None = None,
task_mode: bool = False,
attempt_index: int | None = None,
pinned_skill_names: list[str] | None = None,
pinned_skill_contexts: list[SkillContext] | None = None,
learning_candidate_enabled: bool = False,
) -> AgentRunResult:
"""真正执行一轮 direct run 的内部实现。
@ -276,6 +305,7 @@ class AgentLoop:
tool_executor = self._require_loaded("tool_executor")
skills_loader = self._require_loaded("skills_loader")
skill_assembler = self._require_loaded("skill_assembler")
skill_learning_service = self._require_loaded("skill_learning_service")
config = loaded.config
configured_provider = config.resolve_provider_target(model=model, provider_name=provider_name)
@ -296,16 +326,24 @@ class AgentLoop:
self.profile.max_tool_iterations if max_tool_iterations is None else max_tool_iterations
)
# 每次新运行开始前都通过 MemoryService 刷新 live state。
# 这样 memory policy 会收口在 service而不是散在 loop 里
memory_service.reload_for_new_run()
# 每个 run 都捕获自己的 frozen snapshot不能依赖 MemoryService
# 上的共享 `_snapshot`,否则 parallel team runs 会互相覆盖
memory_snapshot = memory_service.capture_snapshot_for_run()
if parent_session_id:
session_manager.ensure_session(
parent_session_id,
source="unknown",
model=resolved_model,
user_id=user_id,
)
session_manager.ensure_session(
resolved_session_id,
source=source,
model=resolved_model,
title=title,
user_id=user_id,
parent_session_id=parent_session_id,
)
session_manager.append_message(
resolved_session_id,
@ -316,6 +354,12 @@ class AgentLoop:
"source": source,
"model": resolved_model,
"agent_name": self.profile.name,
"task_id": task_id,
"task_mode": task_mode,
"attempt_index": attempt_index,
"parent_session_id": parent_session_id,
"pinned_skill_names": list(pinned_skill_names or []),
"pinned_skill_context_names": [skill.name for skill in pinned_skill_contexts or []],
},
content=task,
context_visible=False,
@ -330,6 +374,8 @@ class AgentLoop:
final_usage: dict[str, Any] = {}
final_provider_name: str | None = resolved_provider_name
final_model: str | None = resolved_model
run_started_at = self._utc_now()
activated_receipts: list[SkillActivationReceipt] = []
try:
bundle = provider_bundle or make_provider_bundle(
model=resolved_model,
@ -356,17 +402,38 @@ class AgentLoop:
model=skill_selector_model,
embedding_runtime=bundle.embedding_runtime,
)
skill_activation_messages = context_builder.build_skill_activation_messages(
assembled_skills.activated_skills
activated_skills = self._merge_skill_contexts(
[
*(pinned_skill_contexts or []),
*self._load_pinned_skill_contexts(skills_loader, pinned_skill_names or []),
],
assembled_skills.activated_skills,
)
skill_activation_messages = context_builder.build_skill_activation_messages(
activated_skills
)
activated_receipts = [
SkillActivationReceipt(
run_id=resolved_run_id,
session_id=resolved_session_id,
skill_name=skill.name,
skill_version=skill.version,
content_hash=skill.content_hash,
activated_at=self._utc_now(),
activation_reason=skill.activation_reason,
tool_hints=list(skill.tool_hints),
)
for skill in activated_skills
]
if skill_activation_messages:
if skill_activation_messages or activated_receipts:
session_manager.append_message(
resolved_session_id,
run_id=resolved_run_id,
role="system",
event_type="skill_activation_snapshotted",
event_payload={
"receipts": [receipt.to_dict() for receipt in activated_receipts],
"activation_messages": skill_activation_messages,
},
content="\n\n".join(message["content"] for message in skill_activation_messages) or None,
@ -381,7 +448,7 @@ class AgentLoop:
task_description=task,
registry=tool_registry,
skills_loader=skills_loader,
activated_skills=assembled_skills.activated_skills,
activated_skills=activated_skills,
embedding_runtime=bundle.embedding_runtime,
top_k=10,
)
@ -407,13 +474,14 @@ class AgentLoop:
base_system_prompt=self.profile.system_prompt,
history=session_manager.get_history(resolved_session_id),
current_user_input=task,
memory_snapshot=memory_service.get_snapshot(),
activated_skills=assembled_skills.activated_skills,
memory_snapshot=memory_snapshot,
activated_skills=activated_skills,
session_context=SessionContext(
session_id=resolved_session_id,
source=source,
model=resolved_model,
user_id=user_id,
parent_session_id=parent_session_id,
),
execution_context=execution_context,
)
@ -491,6 +559,7 @@ class AgentLoop:
run_id=resolved_run_id,
role="assistant",
event_type="assistant_message_added",
event_payload={"task_id": task_id} if task_id else None,
content=response.content,
tool_calls=assistant_tool_calls or None,
finish_reason=response.finish_reason,
@ -520,6 +589,7 @@ class AgentLoop:
run_id=resolved_run_id,
role="assistant",
event_type="assistant_message_added",
event_payload={"task_id": task_id} if task_id else None,
content=final_text,
finish_reason=final_finish_reason,
source=source,
@ -568,6 +638,9 @@ class AgentLoop:
event_payload={
"finish_reason": final_finish_reason,
"tool_iterations": iterations,
"task_id": task_id,
"task_mode": task_mode,
"attempt_index": attempt_index,
},
content=final_text,
finish_reason=final_finish_reason,
@ -577,6 +650,21 @@ class AgentLoop:
model=final_model,
user_id=user_id,
)
self._record_skill_learning(
skill_learning_service=skill_learning_service,
session_manager=session_manager,
session_id=resolved_session_id,
run_id=resolved_run_id,
task=task,
run_started_at=run_started_at,
run_ended_at=self._utc_now(),
finish_reason=final_finish_reason,
activated_receipts=activated_receipts,
success=(final_finish_reason == "stop"),
task_id=task_id,
attempt_index=attempt_index,
generate_candidates=learning_candidate_enabled,
)
return AgentRunResult(
session_id=resolved_session_id,
run_id=resolved_run_id,
@ -586,6 +674,7 @@ class AgentLoop:
provider_name=final_provider_name,
model=final_model,
usage=final_usage,
task_id=task_id,
)
except Exception as exc:
if not user_message_recorded:
@ -600,7 +689,7 @@ class AgentLoop:
model=resolved_model,
user_id=user_id,
)
return self._build_error_result(
result = self._build_error_result(
session_manager=session_manager,
session_id=resolved_session_id,
run_id=resolved_run_id,
@ -612,7 +701,24 @@ class AgentLoop:
tool_iterations=iterations,
provider_name=final_provider_name,
usage=final_usage,
task_id=task_id,
)
self._record_skill_learning(
skill_learning_service=skill_learning_service,
session_manager=session_manager,
session_id=resolved_session_id,
run_id=resolved_run_id,
task=task,
run_started_at=run_started_at,
run_ended_at=self._utc_now(),
finish_reason="error",
activated_receipts=activated_receipts,
success=False,
task_id=task_id,
attempt_index=attempt_index,
generate_candidates=learning_candidate_enabled,
)
return result
def _require_loaded(self, field_name: str) -> Any:
loaded = self.boot()
@ -621,6 +727,46 @@ class AgentLoop:
raise RuntimeError(f"Engine loader did not provide required dependency {field_name!r}")
return value
@staticmethod
def _load_pinned_skill_contexts(skills_loader: Any, skill_names: list[str]) -> list[SkillContext]:
contexts: list[SkillContext] = []
seen: set[str] = set()
for name in skill_names:
normalized = str(name).strip()
if not normalized or normalized in seen:
continue
seen.add(normalized)
record = skills_loader.get_skill_record(normalized)
raw_content = skills_loader.load_published_skill(normalized)
content = strip_frontmatter(raw_content).strip() if raw_content else ""
if record is None or not content:
raise ValueError(f"Pinned skill {normalized!r} is not available for delegated execution")
contexts.append(
SkillContext(
name=normalized,
content=content,
version=record.version,
content_hash=record.content_hash or "",
activation_reason="pinned_delegation",
tool_hints=list(record.tool_hints),
)
)
return contexts
@staticmethod
def _merge_skill_contexts(
pinned_skills: list[SkillContext],
open_skills: list[SkillContext],
) -> list[SkillContext]:
result: list[SkillContext] = []
seen: set[str] = set()
for skill in [*pinned_skills, *open_skills]:
if skill.name in seen:
continue
seen.add(skill.name)
result.append(skill)
return result
@staticmethod
def _serialize_tool_calls(tool_calls: list[Any]) -> list[dict[str, Any]]:
payload: list[dict[str, Any]] = []
@ -683,6 +829,7 @@ class AgentLoop:
tool_iterations: int,
provider_name: str | None,
usage: dict[str, Any],
task_id: str | None = None,
) -> AgentRunResult:
"""把主链中的未处理异常收口成可追踪的 assistant error turn。"""
@ -691,6 +838,7 @@ class AgentLoop:
run_id=run_id,
role="assistant",
event_type="assistant_message_added",
event_payload={"task_id": task_id} if task_id else None,
content=message,
finish_reason="error",
source=source,
@ -706,6 +854,7 @@ class AgentLoop:
event_payload={
"tool_iterations": tool_iterations,
"provider_name": provider_name,
"task_id": task_id,
},
content=message,
finish_reason="error",
@ -724,4 +873,87 @@ class AgentLoop:
provider_name=provider_name,
model=model,
usage=usage,
task_id=task_id,
)
@staticmethod
def _record_skill_learning(
*,
skill_learning_service: Any,
session_manager: Any,
session_id: str,
run_id: str,
task: str,
run_started_at: str,
run_ended_at: str,
finish_reason: str,
activated_receipts: list[SkillActivationReceipt],
success: bool,
task_id: str | None = None,
attempt_index: int | None = None,
generate_candidates: bool = False,
) -> None:
run_record = RunRecord(
run_id=run_id,
session_id=session_id,
task_id=task_id,
attempt_index=attempt_index,
task_text=task,
started_at=run_started_at,
ended_at=run_ended_at,
success=success,
finish_reason=finish_reason,
feedback={},
activated_skills=list(activated_receipts),
)
effect_records = [
SkillEffectRecord(
run_id=run_id,
skill_name=receipt.skill_name,
skill_version=receipt.skill_version,
success=success,
feedback_score=None,
notes=finish_reason,
created_at=run_ended_at,
)
for receipt in activated_receipts
]
try:
candidates = skill_learning_service.collect_run_receipts(
RunReceiptContext(run_record=run_record, effect_records=effect_records),
generate_candidates=generate_candidates,
)
except Exception as exc: # pragma: no cover - defensive hot-path guard
session_manager.append_message(
session_id,
run_id=run_id,
role="system",
event_type="skill_effects_snapshot_failed",
event_payload={
"run_record": run_record.to_dict(),
"skill_effects": [item.to_dict() for item in effect_records],
"error": str(exc),
},
content=f"Skill learning receipt recording failed: {exc}",
context_visible=False,
)
return
session_manager.append_message(
session_id,
run_id=run_id,
role="system",
event_type="skill_effects_snapshotted",
event_payload={
"run_record": run_record.to_dict(),
"skill_effects": [item.to_dict() for item in effect_records],
"learning_candidates": [candidate.to_dict() for candidate in candidates],
"learning_candidate_enabled": generate_candidates,
},
content=f"Recorded {len(effect_records)} skill effect record(s).",
context_visible=False,
)
@staticmethod
def _utc_now() -> str:
return datetime.now(timezone.utc).isoformat()