feat(engine): 添加运行时上下文支持并重构工具迭代限制

添加 RuntimeContext 类用于捕获模型运行时的日期时间信息,
包括UTC时间、本地时间和时区信息,并在系统提示中显示这些信息。

同时增加最大上下文消息数和工具迭代次数的配置选项,
将验证服务从引擎加载器中移除,并更新相关的数据结构和接口。

BREAKING CHANGE: 移除了验证服务,相关字段被替换为证据状态和接受状态。

- 添加 RuntimeContext 类和相关渲染方法
- 增加 max_context_messages 和 max_tool_iterations 配置
- 移除 ValidationService 相关代码
- 更新消息记录中的验证状态字段
- 添加原始工具调用检测和回退处理
This commit is contained in:
2026-05-26 11:18:35 +08:00
parent 16347caf5e
commit 6e9e74d1ee
57 changed files with 5710 additions and 1582 deletions

View File

@ -4,6 +4,7 @@ from .builder import (
ContextBuildInput,
ContextBuildResult,
ContextBuilder,
RuntimeContext,
SessionContext,
SkillContext,
)
@ -12,6 +13,7 @@ __all__ = [
"ContextBuildInput",
"ContextBuildResult",
"ContextBuilder",
"RuntimeContext",
"SessionContext",
"SkillContext",
]

View File

@ -80,6 +80,16 @@ class SessionContext:
parent_session_id: str | None = None
@dataclass(slots=True)
class RuntimeContext:
"""Per-run runtime facts that should be visible to the model."""
utc_datetime: str
local_datetime: str
timezone: str | None = None
utc_offset: str | None = None
@dataclass(slots=True)
class ContextBuildInput:
"""一次上下文构建所需的全部输入。
@ -103,6 +113,7 @@ class ContextBuildInput:
memory_snapshot: MemorySnapshot | None = None
activated_skills: list[SkillContext] = field(default_factory=list)
session_context: SessionContext | None = None
runtime_context: RuntimeContext | None = None
execution_context: str | None = None
extra_sections: list[str] = field(default_factory=list)
@ -143,9 +154,10 @@ class ContextBuilder:
1. Beaver user-facing assistant identity
2. base system prompt
3. session metadata
4. execution context
5. frozen memory snapshot
6. extra sections
4. runtime date/time
5. execution context
6. frozen memory snapshot
7. extra sections
这样设计的原因:
- 身份与总规则要最靠前
@ -164,6 +176,10 @@ class ContextBuilder:
if session_section:
sections.append(session_section)
runtime_section = self._render_runtime_section(build_input.runtime_context)
if runtime_section:
sections.append(runtime_section)
execution_context = (build_input.execution_context or "").strip()
if execution_context:
sections.append(f"# Execution Context\n\n{execution_context}")
@ -347,6 +363,31 @@ class ContextBuilder:
return None
return "# Current Session\n\n" + "\n".join(rows)
def _render_runtime_section(self, runtime_context: RuntimeContext | None) -> str | None:
"""Render date/time facts captured for the current model run."""
if runtime_context is None:
return None
rows: list[str] = []
if runtime_context.utc_datetime:
rows.append(f"Current UTC time: {runtime_context.utc_datetime}")
if runtime_context.local_datetime:
rows.append(f"Current local time: {runtime_context.local_datetime}")
if runtime_context.timezone:
rows.append(f"Local timezone: {runtime_context.timezone}")
if runtime_context.utc_offset:
rows.append(f"Local UTC offset: {runtime_context.utc_offset}")
if not rows:
return None
return (
"# Current Date and Time\n\n"
+ "\n".join(rows)
+ "\n\nUse this section as authoritative for relative date/time references such as "
'"today", "tomorrow", "now", "this week", and "next month".'
)
def build_skill_activation_messages(self, activated_skills: list[SkillContext]) -> list[dict[str, str]]:
"""把已激活 skill 转成显式消息。

View File

@ -24,7 +24,7 @@ from beaver.skills.learning.eval import SkillDraftEvaluator
from beaver.skills.publisher import SkillPublisher
from beaver.skills.reviews import ReviewService
from beaver.skills.specs import SkillSpecStore
from beaver.tasks import TaskExecutionPlanner, TaskService, ValidationService
from beaver.tasks import TaskExecutionPlanner, TaskService
from beaver.tasks.skill_resolver import TaskSkillResolver
from beaver.skills import SkillAssembler, SkillsLoader
from beaver.tools import ObjectBackedTool, ToolAssembler, ToolExecutor, ToolRegistry
@ -91,7 +91,6 @@ class EngineLoadResult:
task_skill_resolver: TaskSkillResolver | None = None
task_service: TaskService | None = None
task_execution_planner: TaskExecutionPlanner | None = None
validation_service: ValidationService | None = None
mcp_manager: MCPConnectionManager | None = None
mcp_report: dict[str, dict] = field(default_factory=dict)
closeables: list[tuple[str, Callable[[], None]]] = field(default_factory=list, repr=False)
@ -166,7 +165,6 @@ class EngineLoader:
task_skill_resolver: TaskSkillResolver | None = None,
task_service: TaskService | None = None,
task_execution_planner: TaskExecutionPlanner | None = None,
validation_service: ValidationService | None = None,
) -> None:
self.config = config or load_config(workspace=workspace, config_path=config_path)
configured_workspace = self.config.agents_defaults.workspace
@ -192,7 +190,6 @@ class EngineLoader:
self._task_skill_resolver = task_skill_resolver
self._task_service = task_service
self._task_execution_planner = task_execution_planner
self._validation_service = validation_service
def load(self) -> EngineLoadResult:
"""装配当前主链需要的最小 runtime 对象。"""
@ -276,7 +273,6 @@ class EngineLoader:
)
task_service = self._task_service or TaskService(workspace / "tasks")
task_execution_planner = self._task_execution_planner or TaskExecutionPlanner(task_skill_resolver=task_skill_resolver)
validation_service = self._validation_service or ValidationService()
mcp_manager = MCPConnectionManager(
self.config.tools.mcp_servers,
authz_config=self.config.authz,
@ -311,7 +307,6 @@ class EngineLoader:
task_skill_resolver=task_skill_resolver,
task_service=task_service,
task_execution_planner=task_execution_planner,
validation_service=validation_service,
mcp_manager=mcp_manager,
)
if self._session_manager is None:

View File

@ -4,12 +4,15 @@ from __future__ import annotations
import asyncio
import json
import os
import re
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Any
from uuid import uuid4
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
from beaver.engine.context import ContextBuildInput, SessionContext, SkillContext
from beaver.engine.context import ContextBuildInput, RuntimeContext, SessionContext, SkillContext
from beaver.memory.runs import RunRecord, SkillEffectRecord
from beaver.skills.learning import RunReceiptContext
from beaver.skills.catalog.utils import strip_frontmatter
@ -26,6 +29,17 @@ TOOL_FAILURE_GUIDANCE_PROMPT = (
"Use available materials, state uncertainty clearly, and provide partial confirmed results."
)
RAW_TOOL_CALL_FALLBACK = (
"The run reached the configured tool-call limit before producing a reliable final answer. "
"The model attempted another tool call instead of answering, so the raw tool call was suppressed. "
"Please request a revision to continue the task."
)
_RAW_TOOL_CALL_RE = re.compile(
r"^\s*<tool_call\b[\s\S]*?</tool_call>\s*$|^\s*<function=[^>]+>[\s\S]*?</function>\s*$",
re.IGNORECASE,
)
@dataclass(slots=True)
class AgentProfile:
@ -35,8 +49,9 @@ class AgentProfile:
system_prompt: str = ""
default_model: str = "gpt-4.1-mini"
max_tokens: int = 4096
max_context_messages: int = 1000
temperature: float = 0.2
max_tool_iterations: int = 8
max_tool_iterations: int = 30
@dataclass(slots=True)
@ -446,7 +461,7 @@ class AgentLoop:
*(pinned_skill_contexts or []),
*self._load_pinned_skill_contexts(skills_loader, pinned_skill_names or []),
]
if not include_skill_assembly or thinking_enabled is False:
if not include_skill_assembly:
activated_skills = self._merge_skill_contexts(pinned_skills, [])
else:
skill_query = skill_selection_context or task
@ -512,8 +527,6 @@ class AgentLoop:
if not include_tools:
selected_tool_specs = []
elif thinking_enabled is False:
selected_tool_specs = tool_registry.list_specs()
else:
selected_tool_specs = await tool_assembler.assemble(
task_description=task,
@ -543,7 +556,10 @@ class AgentLoop:
build_input = ContextBuildInput(
base_system_prompt=self.profile.system_prompt,
history=session_manager.get_history(resolved_session_id),
history=session_manager.get_history(
resolved_session_id,
max_messages=max(1, self.profile.max_context_messages),
),
current_user_input=task,
memory_snapshot=memory_snapshot,
activated_skills=activated_skills,
@ -554,6 +570,7 @@ class AgentLoop:
user_id=user_id,
parent_session_id=parent_session_id,
),
runtime_context=self._current_runtime_context(),
execution_context=execution_context,
extra_sections=[TOOL_FAILURE_GUIDANCE_PROMPT],
)
@ -693,6 +710,7 @@ class AgentLoop:
tool_calls=assistant_tool_calls or None,
finish_reason=response.finish_reason,
reasoning=response.reasoning_content,
context_visible=not bool(assistant_tool_calls),
source=source,
title=title,
model=final_model,
@ -707,7 +725,11 @@ class AgentLoop:
if not response.has_tool_calls:
final_text = response.content or ""
final_finish_reason = response.finish_reason or "stop"
if self._looks_like_raw_tool_call(final_text):
final_text = RAW_TOOL_CALL_FALLBACK
final_finish_reason = "invalid_tool_call_text"
else:
final_finish_reason = response.finish_reason or "stop"
break
if iterations >= resolved_max_tool_iterations:
@ -719,10 +741,7 @@ class AgentLoop:
temperature=resolved_temperature,
thinking_enabled=thinking_enabled,
)
final_text = finalized or (
"Tool loop stopped after reaching the configured iteration limit, "
"and no final answer was produced."
)
final_text = finalized or RAW_TOOL_CALL_FALLBACK
final_finish_reason = "max_tool_iterations_finalized" if finalized else "max_tool_iterations"
session_manager.append_message(
resolved_session_id,
@ -877,17 +896,14 @@ class AgentLoop:
temperature: float,
thinking_enabled: bool | None,
) -> str:
final_messages = [
*messages,
{
"role": "system",
"content": (
"The configured tool iteration budget is exhausted. Do not call tools. "
"Produce the best final answer from the existing conversation and tool results. "
"State uncertainty explicitly."
),
},
]
final_messages = AgentLoop._with_system_guidance(
messages,
(
"The configured tool iteration budget is exhausted. Do not call tools. "
"Produce the best final answer from the existing conversation and tool results. "
"State uncertainty explicitly."
),
)
kwargs: dict[str, Any] = {
"messages": final_messages,
"tools": None,
@ -898,7 +914,27 @@ class AgentLoop:
if thinking_enabled is not None:
kwargs["thinking_enabled"] = thinking_enabled
response = await provider.chat(**kwargs)
return (response.content or "").strip()
if response.has_tool_calls:
return ""
content = (response.content or "").strip()
if AgentLoop._looks_like_raw_tool_call(content):
return ""
return content
@staticmethod
def _looks_like_raw_tool_call(content: str | None) -> bool:
if not content:
return False
return bool(_RAW_TOOL_CALL_RE.match(content))
@staticmethod
def _with_system_guidance(messages: list[dict[str, Any]], guidance: str) -> list[dict[str, Any]]:
copied = [dict(message) for message in messages]
if copied and copied[0].get("role") == "system":
existing = str(copied[0].get("content") or "").strip()
copied[0]["content"] = "\n\n".join(part for part in (existing, guidance.strip()) if part)
return copied
return [{"role": "system", "content": guidance.strip()}, *copied]
@staticmethod
def _load_pinned_skill_contexts(skills_loader: Any, skill_names: list[str]) -> list[SkillContext]:
@ -1133,3 +1169,49 @@ class AgentLoop:
@staticmethod
def _utc_now() -> str:
return datetime.now(timezone.utc).isoformat()
@staticmethod
def _current_runtime_context() -> RuntimeContext:
utc_now = datetime.now(timezone.utc)
timezone_name = AgentLoop._configured_timezone_name()
local_now = datetime.now().astimezone()
rendered_timezone = local_now.tzname()
if timezone_name:
try:
local_now = utc_now.astimezone(ZoneInfo(timezone_name))
rendered_timezone = timezone_name
except ZoneInfoNotFoundError:
rendered_timezone = local_now.tzname() or timezone_name
return RuntimeContext(
utc_datetime=utc_now.isoformat(),
local_datetime=local_now.isoformat(),
timezone=rendered_timezone,
utc_offset=AgentLoop._format_utc_offset(local_now),
)
@staticmethod
def _configured_timezone_name() -> str | None:
for value in (os.getenv("BEAVER_RUNTIME_TIMEZONE"), os.getenv("TZ")):
cleaned = (value or "").strip()
if cleaned:
return cleaned
try:
timezone_file = "/etc/timezone"
if os.path.exists(timezone_file):
with open(timezone_file, encoding="utf-8") as file:
cleaned = file.read().strip()
if cleaned:
return cleaned
except OSError:
return None
return None
@staticmethod
def _format_utc_offset(value: datetime) -> str | None:
raw = value.strftime("%z")
if not raw:
return None
return f"{raw[:3]}:{raw[3:]}"

View File

@ -119,13 +119,23 @@ class LiteLLMProvider(LLMProvider):
@staticmethod
def _sanitize_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
sanitized = []
system_contents: list[str] = []
for message in messages:
clean = {key: value for key, value in message.items() if key in _ALLOWED_MSG_KEYS}
if clean.get("role") == "system":
content = clean.get("content")
if isinstance(content, str) and content.strip():
system_contents.append(content.strip())
elif content is not None:
system_contents.append(str(content))
continue
if clean.get("role") == "assistant" and "content" not in clean:
clean["content"] = None
if isinstance(clean.get("tool_calls"), list):
clean["tool_calls"] = LiteLLMProvider._sanitize_tool_calls(clean["tool_calls"])
sanitized.append(clean)
if system_contents:
sanitized.insert(0, {"role": "system", "content": "\n\n".join(system_contents)})
return sanitized
@staticmethod

View File

@ -84,8 +84,10 @@ class MessageRecord:
payload["task_id"] = self.event_payload.get("task_id")
if self.event_payload.get("task_status"):
payload["task_status"] = self.event_payload.get("task_status")
if self.event_payload.get("validation_status"):
payload["validation_status"] = self.event_payload.get("validation_status")
if self.event_payload.get("evidence_status"):
payload["evidence_status"] = self.event_payload.get("evidence_status")
if self.event_payload.get("acceptance_state"):
payload["acceptance_state"] = self.event_payload.get("acceptance_state")
if self.event_payload.get("feedback_state"):
payload["feedback_state"] = self.event_payload.get("feedback_state")
if self.event_payload.get("feedback_error"):

View File

@ -86,6 +86,18 @@ def _parse_agent_defaults(data: dict[str, Any]) -> AgentDefaultsConfig:
model=_string(defaults.get("model") or data.get("model")),
provider=_string(defaults.get("provider") or data.get("provider")),
embedding_model=_string(defaults.get("embeddingModel") or defaults.get("embedding_model") or data.get("embeddingModel")),
max_context_messages=_int(
defaults.get("maxContextMessages")
or defaults.get("max_context_messages")
or data.get("maxContextMessages")
or data.get("max_context_messages")
),
max_tool_iterations=_int(
defaults.get("maxToolIterations")
or defaults.get("max_tool_iterations")
or data.get("maxToolIterations")
or data.get("max_tool_iterations")
),
)
@ -217,6 +229,13 @@ def _float(value: Any) -> float | None:
return float(value)
def _int(value: Any) -> int | None:
parsed = _float(value)
if parsed is None:
return None
return int(parsed)
def _bool(value: Any, *, default: bool) -> bool:
if isinstance(value, bool):
return value

View File

@ -25,6 +25,8 @@ class AgentDefaultsConfig:
model: str | None = None
provider: str | None = None
embedding_model: str | None = None
max_context_messages: int | None = None
max_tool_iterations: int | None = None
@dataclass(slots=True)

View File

@ -44,6 +44,8 @@ from .files import (
workspace_file_path,
)
from .schemas import (
WebChatAcceptanceRequest,
WebChatAcceptanceResponse,
WebChatFeedbackRequest,
WebChatFeedbackResponse,
WebChatRequest,
@ -155,6 +157,13 @@ except ModuleNotFoundError: # pragma: no cover - fallback for skeleton-only env
return decorator
RAW_TOOL_CALL_DISPLAY_FALLBACK = (
"The run reached the configured tool-call limit before producing a reliable final answer. "
"The model attempted another tool call instead of answering, so the raw tool call was suppressed. "
"Please request a revision to continue the task."
)
@asynccontextmanager
async def _app_lifespan(
app: FastAPI,
@ -365,6 +374,7 @@ def create_app(
"workspace_exists": loaded.workspace.exists(),
"model": config.default_model or agent_service.profile.default_model,
"max_tokens": agent_service.profile.max_tokens,
"max_context_messages": agent_service.profile.max_context_messages,
"temperature": agent_service.profile.temperature,
"max_tool_iterations": agent_service.profile.max_tool_iterations,
"providers": providers_status,
@ -1719,7 +1729,8 @@ def create_app(
usage=result.usage,
task_id=result.task_id,
task_status=result.task_status,
validation_result=result.validation_result,
evidence_status="recorded" if result.task_id else None,
validation_result=None,
)
fallback_target = _model_dump(payload.fallback_target)
@ -1769,7 +1780,8 @@ def create_app(
usage=result.usage,
task_id=result.task_id,
task_status=result.task_status,
validation_result=result.validation_result,
evidence_status="recorded" if result.task_id else None,
validation_result=None,
)
@app.websocket("/ws/{session_id:path}")
@ -1882,6 +1894,30 @@ def create_app(
}
)
@app.post(
"/api/chat/acceptance",
response_model=WebChatAcceptanceResponse,
responses={
400: {"model": WebErrorResponse},
404: {"model": WebErrorResponse},
},
)
async def chat_acceptance(request: Request, payload: WebChatAcceptanceRequest) -> WebChatAcceptanceResponse:
agent_service = get_agent_service(request)
try:
result = await agent_service.submit_acceptance(
session_id=payload.session_id,
run_id=payload.run_id,
acceptance_type=payload.acceptance_type,
comment=payload.comment,
)
except ValueError as exc:
detail = str(exc)
status_code = 404 if "No internal task" in detail else 400
raise HTTPException(status_code=status_code, detail=detail) from exc
return WebChatAcceptanceResponse(**result)
@app.post(
"/api/chat/feedback",
response_model=WebChatFeedbackResponse,
@ -1893,10 +1929,10 @@ def create_app(
async def chat_feedback(request: Request, payload: WebChatFeedbackRequest) -> WebChatFeedbackResponse:
agent_service = get_agent_service(request)
try:
result = await agent_service.submit_feedback(
result = await agent_service.submit_acceptance(
session_id=payload.session_id,
run_id=payload.run_id,
feedback_type=payload.feedback_type,
acceptance_type=payload.feedback_type,
comment=payload.comment,
)
except ValueError as exc:
@ -1915,15 +1951,21 @@ def _session_detail(session_manager: Any, session_id: str, session: dict[str, An
role = event.get("role")
if role not in {"user", "assistant"}:
continue
content = event.get("content") or ""
comparable_content = str(content).replace("\u200b", "").replace("\u200c", "").replace("\u200d", "").replace("\ufeff", "")
if role == "assistant" and not comparable_content.strip():
continue
content = _sanitize_user_visible_assistant_content(role=role, content=content)
messages.append(
{
"role": role,
"content": event.get("content") or "",
"content": content,
"timestamp": _iso_from_timestamp(event.get("timestamp")),
"run_id": event.get("run_id"),
"task_id": event.get("task_id"),
"task_status": event.get("task_status"),
"validation_status": event.get("validation_status"),
"evidence_status": event.get("evidence_status"),
"acceptance_state": event.get("acceptance_state"),
"feedback_state": event.get("feedback_state"),
"feedback_error": event.get("feedback_error"),
"message_type": event.get("message_type"),
@ -2142,6 +2184,7 @@ def _task_run_views(task: Any, events: list[Any], session_manager: Any, run_memo
content = (record.content or "").strip()
if not content:
continue
content = _sanitize_user_visible_assistant_content(role=record.role, content=content)
messages.append(
{
"role": record.role,
@ -2150,7 +2193,6 @@ def _task_run_views(task: Any, events: list[Any], session_manager: Any, run_memo
"tool_name": record.tool_name,
}
)
validation = run_record.validation_result if run_record is not None else None
views.append(
{
"run_id": run_id,
@ -2163,7 +2205,8 @@ def _task_run_views(task: Any, events: list[Any], session_manager: Any, run_memo
"attempt_index": run_record.attempt_index if run_record is not None else None,
"task_text": run_record.task_text if run_record is not None else "",
"messages": messages,
"validation_result": validation,
"evidence_status": "recorded",
"validation_result": None,
}
)
return views
@ -2428,12 +2471,6 @@ def _model_dump(value: Any) -> dict[str, Any] | None:
return dict(value)
def _validation_status(validation_result: dict[str, Any] | None) -> str:
if validation_result is None:
return "unknown"
return "passed" if validation_result.get("accepted") is True else "failed"
def _websocket_input_metadata(payload: dict[str, Any]) -> dict[str, Any]:
metadata = payload.get("metadata") if isinstance(payload.get("metadata"), dict) else {}
result: dict[str, Any] = dict(metadata)
@ -2467,13 +2504,15 @@ def _int_or_none(value: Any) -> int | None:
def _websocket_message_payload(result: Any, *, input_payload: dict[str, Any]) -> dict[str, Any]:
validation_result = getattr(result, "validation_result", None)
task_id = getattr(result, "task_id", None)
task_status = getattr(result, "task_status", None)
return {
"type": "message",
"role": "assistant",
"content": getattr(result, "output_text", "") or "",
"content": _sanitize_user_visible_assistant_content(
role="assistant",
content=getattr(result, "output_text", "") or "",
),
"session_id": getattr(result, "session_id", None),
"run_id": getattr(result, "run_id", None),
"finish_reason": getattr(result, "finish_reason", None),
@ -2483,17 +2522,39 @@ def _websocket_message_payload(result: Any, *, input_payload: dict[str, Any]) ->
"usage": dict(getattr(result, "usage", {}) or {}),
"task_id": task_id,
"task_status": task_status,
"validation_result": validation_result,
"validation_status": _validation_status(validation_result),
"evidence_status": "recorded" if task_id else None,
"validation_result": None,
"metadata": {
"task_id": task_id,
"task_status": task_status,
"validation_result": validation_result,
"evidence_status": "recorded" if task_id else None,
"input_metadata": _websocket_input_metadata(input_payload),
},
}
def _sanitize_user_visible_assistant_content(*, role: str, content: str) -> str:
if role != "assistant":
return content
if _looks_like_raw_tool_call(content):
return RAW_TOOL_CALL_DISPLAY_FALLBACK
return content
def _looks_like_raw_tool_call(content: str | None) -> bool:
if not content:
return False
stripped = content.strip()
lowered = stripped.lower()
return (
lowered.startswith("<tool_call")
and lowered.endswith("</tool_call>")
) or (
lowered.startswith("<function=")
and lowered.endswith("</function>")
)
def _provider_enabled(provider_name: str, provider_cfg: Any) -> bool:
if provider_cfg is None or provider_name == "custom":
return False
@ -2980,6 +3041,7 @@ def _write_config_json(path: Path, data: dict[str, Any]) -> None:
def _reload_agent_config(agent_service: AgentService, config_path: Path) -> None:
config = load_config(config_path=config_path)
agent_service.loader.config = config
agent_service._apply_configured_profile_defaults() # noqa: SLF001
loop = getattr(agent_service, "_loop", None)
loaded = getattr(loop, "loaded", None) if loop is not None else None
if loaded is not None:

View File

@ -1,6 +1,8 @@
"""Web request and response schemas."""
from .chat import (
WebChatAcceptanceRequest,
WebChatAcceptanceResponse,
WebChatFeedbackRequest,
WebChatFeedbackResponse,
WebChatRequest,
@ -13,6 +15,8 @@ from .chat import (
)
__all__ = [
"WebChatAcceptanceRequest",
"WebChatAcceptanceResponse",
"WebChatFeedbackRequest",
"WebChatFeedbackResponse",
"WebChatRequest",

View File

@ -82,11 +82,34 @@ class WebChatResponse(BaseModel):
usage: dict[str, Any] = Field(default_factory=dict)
task_id: str | None = None
task_status: str | None = None
evidence_status: str | None = None
acceptance_state: str | None = None
validation_result: dict[str, Any] | None = None
class WebChatAcceptanceRequest(BaseModel):
"""User acceptance on the latest assistant result in chat."""
session_id: str
run_id: str
acceptance_type: str
comment: str | None = None
class WebChatAcceptanceResponse(BaseModel):
"""Acceptance recording result."""
session_id: str
run_id: str
task_id: str
task_status: str
acceptance_type: str
feedback_type: str
learning_candidates: list[dict[str, Any]] = Field(default_factory=list)
class WebChatFeedbackRequest(BaseModel):
"""Feedback on the latest assistant result in chat."""
"""Backward-compatible feedback payload."""
session_id: str
run_id: str
@ -94,15 +117,8 @@ class WebChatFeedbackRequest(BaseModel):
comment: str | None = None
class WebChatFeedbackResponse(BaseModel):
"""Feedback recording result."""
session_id: str
run_id: str
task_id: str
task_status: str
feedback_type: str
learning_candidates: list[dict[str, Any]] = Field(default_factory=list)
class WebChatFeedbackResponse(WebChatAcceptanceResponse):
"""Backward-compatible feedback response."""
class WebProviderConfigRequest(BaseModel):

View File

@ -29,9 +29,9 @@ from beaver.tasks import (
TaskEvidencePacket,
TaskExecutionPlan,
TaskRecord,
ValidationResult,
render_task_evidence,
)
from beaver.tasks.service import normalize_acceptance_type
NOTIFICATION_SESSION_ID = "notify:default:scheduled"
@ -60,11 +60,19 @@ class AgentService:
) -> None:
self.profile = profile or AgentProfile()
self.loader = loader or EngineLoader(workspace=workspace, config_path=config_path)
self._apply_configured_profile_defaults()
self._loop: AgentLoop | None = None
self._run_task: asyncio.Task[None] | None = None
self._main_agent_router = MainAgentRouter()
self._runtime_services: dict[str, Any] = {}
def _apply_configured_profile_defaults(self) -> None:
defaults = self.loader.config.agents_defaults
if defaults.max_context_messages is not None:
self.profile.max_context_messages = max(1, defaults.max_context_messages)
if defaults.max_tool_iterations is not None:
self.profile.max_tool_iterations = max(0, defaults.max_tool_iterations)
def create_loop(self) -> AgentLoop:
"""创建并缓存当前 service 使用的 AgentLoop。"""
@ -232,7 +240,7 @@ class AgentService:
Scheduled jobs are product-level Tasks, not hidden one-off agent turns.
This entry bypasses the main-agent classifier and forces Task mode so
every trigger produces a TaskRecord, validation, feedback state, and a
every trigger produces a TaskRecord, evidence, acceptance state, and a
run_id that the scheduled-task history can link to.
"""
@ -280,9 +288,9 @@ class AgentService:
result.run_id,
{
"message_type": "scheduled_reply",
"scheduled_job_id": job.id,
"scheduled_run_id": run.scheduled_run_id,
"cron_job_name": job.name,
"scheduled_job_id": cron_job_id,
"scheduled_run_id": scheduled_run_id,
"cron_job_name": cron_job_name,
"mode": "notification",
},
)
@ -403,15 +411,15 @@ class AgentService:
},
)
async def submit_feedback(
async def submit_acceptance(
self,
*,
session_id: str,
run_id: str,
feedback_type: str,
acceptance_type: str,
comment: str | None = None,
) -> dict[str, Any]:
"""Record chat feedback for the internal task linked to a run."""
"""Record user acceptance for the internal task linked to a run."""
loaded = self.create_loop().boot()
task_service = self._require_loaded(loaded, "task_service")
@ -419,32 +427,31 @@ class AgentService:
if task is None or task.session_id != session_id:
raise ValueError(f"No internal task found for run_id={run_id!r}")
normalized = feedback_type.strip().lower()
if normalized not in {"satisfied", "revise", "abandon"}:
raise ValueError("feedback_type must be one of: satisfied, revise, abandon")
normalized = normalize_acceptance_type(acceptance_type)
legacy_feedback_type = "satisfied" if normalized == "accept" else normalized
already_recorded = any(
item.get("run_id") == run_id and item.get("feedback_type") == normalized
item.get("run_id") == run_id and item.get("acceptance_type") == normalized
for item in task.feedback
)
conflicting_feedback = next(
conflicting_acceptance = next(
(
item
for item in task.feedback
if item.get("run_id") == run_id and item.get("feedback_type") != normalized
if item.get("run_id") == run_id and item.get("acceptance_type") != normalized
),
None,
)
if conflicting_feedback is not None:
if conflicting_acceptance is not None:
raise ValueError(
f"Feedback for run_id={run_id!r} was already recorded as "
f"{conflicting_feedback.get('feedback_type')!r}"
f"Acceptance for run_id={run_id!r} was already recorded as "
f"{conflicting_acceptance.get('acceptance_type')!r}"
)
if task.status in {"closed", "abandoned"} and not already_recorded:
raise ValueError(f"Task {task.task_id} is already finalized as {task.status!r}")
updated = task if already_recorded else task_service.add_feedback(
updated = task if already_recorded else task_service.add_acceptance(
task.task_id,
feedback_type=normalized,
acceptance_type=normalized,
comment=comment,
run_id=run_id,
)
@ -455,7 +462,8 @@ class AgentService:
{
"task_id": updated.task_id,
"task_status": updated.status,
"feedback_state": normalized,
"acceptance_state": normalized,
"feedback_state": legacy_feedback_type,
},
)
if not already_recorded:
@ -463,10 +471,11 @@ class AgentService:
session_id,
run_id=run_id,
role="system",
event_type="task_feedback_recorded",
event_type="task_acceptance_recorded",
event_payload={
"task_id": task.task_id,
"feedback_type": normalized,
"acceptance_type": normalized,
"feedback_type": legacy_feedback_type,
"comment": comment,
"task_status": updated.status,
},
@ -475,35 +484,36 @@ class AgentService:
)
generated_candidates = []
validation = ValidationResult.from_dict(updated.validation_result)
if not already_recorded:
run_memory_store = self._require_loaded(loaded, "run_memory_store")
feedback_payload = {
"feedback_type": normalized,
acceptance_payload = {
"acceptance_type": normalized,
"feedback_type": legacy_feedback_type,
"comment": comment or "",
"task_status": updated.status,
"final_accepted_run_id": updated.metadata.get("final_accepted_run_id"),
}
run_memory_store.update_run_record(
run_id,
success=normalized == "satisfied",
feedback=feedback_payload,
success=normalized == "accept",
feedback=acceptance_payload,
)
run_memory_store.update_skill_effects_for_run(
run_id,
success=normalized == "satisfied",
feedback_score=self._feedback_score_for_learning(normalized, validation),
success=normalized == "accept",
feedback_score=self._acceptance_score_for_learning(normalized),
notes=(comment or normalized).strip(),
)
skill_learning_service = self._require_loaded(loaded, "skill_learning_service")
skill_learning_service.rescore_skill_versions()
if already_recorded:
generated_candidates = []
elif normalized == "satisfied" and validation is not None and validation.accepted:
elif normalized == "accept":
generated_candidates = [
item.to_dict()
for item in skill_learning_service.build_learning_candidates_for_task(
updated.task_id,
trigger_run_id=run_id,
final_accepted_run_id=run_id,
)
]
elif normalized == "abandon":
@ -514,7 +524,8 @@ class AgentService:
event_type="task_failure_evidence_recorded",
event_payload={
"task_id": updated.task_id,
"feedback_type": normalized,
"acceptance_type": normalized,
"feedback_type": legacy_feedback_type,
"comment": comment or "",
"task_status": updated.status,
"durable_memory_written": False,
@ -528,10 +539,28 @@ class AgentService:
"run_id": run_id,
"task_id": updated.task_id,
"task_status": updated.status,
"feedback_type": normalized,
"acceptance_type": normalized,
"feedback_type": legacy_feedback_type,
"learning_candidates": generated_candidates,
}
async def submit_feedback(
self,
*,
session_id: str,
run_id: str,
feedback_type: str,
comment: str | None = None,
) -> dict[str, Any]:
"""Backward-compatible wrapper for older clients."""
return await self.submit_acceptance(
session_id=session_id,
run_id=run_id,
acceptance_type=feedback_type,
comment=comment,
)
async def _process_with_main_agent(
self,
message: str,
@ -591,7 +620,7 @@ class AgentService:
else active_task
)
if active_task is not None and decision.action == "revise_task" and task.task_id == active_task.task_id:
task = self._record_revision_feedback_for_task(
task = self._record_revision_acceptance_for_task(
loaded,
task=task,
session_id=session_id,
@ -599,7 +628,7 @@ class AgentService:
)
return await self._run_task_mode(message, runner=runner, kwargs=kwargs, task=task)
def _record_revision_feedback_for_task(
def _record_revision_acceptance_for_task(
self,
loaded: Any,
*,
@ -607,9 +636,9 @@ class AgentService:
session_id: str,
comment: str,
) -> TaskRecord:
"""Mark the latest feedback-eligible run as revised before continuing a task."""
"""Mark the latest acceptance-eligible run as revised before continuing a task."""
if task.status not in {"awaiting_feedback", "needs_revision"}:
if task.status not in {"awaiting_acceptance", "needs_revision"}:
return task
run_id = next((item for item in reversed(task.run_ids) if item), None)
if not run_id:
@ -617,15 +646,15 @@ class AgentService:
existing = next((item for item in task.feedback if item.get("run_id") == run_id), None)
if existing is not None:
if existing.get("feedback_type") != "revise":
if existing.get("acceptance_type") != "revise":
return task
updated = task
already_recorded = True
else:
task_service = self._require_loaded(loaded, "task_service")
updated = task_service.add_feedback(
updated = task_service.add_acceptance(
task.task_id,
feedback_type="revise",
acceptance_type="revise",
comment=comment,
run_id=run_id,
)
@ -638,6 +667,7 @@ class AgentService:
{
"task_id": updated.task_id,
"task_status": updated.status,
"acceptance_state": "revise",
"feedback_state": "revise",
},
)
@ -648,9 +678,10 @@ class AgentService:
session_id,
run_id=run_id,
role="system",
event_type="task_feedback_recorded",
event_type="task_acceptance_recorded",
event_payload={
"task_id": updated.task_id,
"acceptance_type": "revise",
"feedback_type": "revise",
"comment": comment,
"task_status": updated.status,
@ -659,12 +690,12 @@ class AgentService:
content=comment,
context_visible=False,
)
validation = ValidationResult.from_dict(updated.validation_result)
run_memory_store = self._require_loaded(loaded, "run_memory_store")
run_memory_store.update_run_record(
run_id,
success=False,
feedback={
"acceptance_type": "revise",
"feedback_type": "revise",
"comment": comment,
"task_status": updated.status,
@ -673,7 +704,7 @@ class AgentService:
run_memory_store.update_skill_effects_for_run(
run_id,
success=False,
feedback_score=self._feedback_score_for_learning("revise", validation),
feedback_score=self._acceptance_score_for_learning("revise"),
notes=comment.strip() or "revise",
)
skill_learning_service = self._require_loaded(loaded, "skill_learning_service")
@ -690,236 +721,185 @@ class AgentService:
) -> AgentRunResult:
loaded = self.create_loop().boot()
task_service = self._require_loaded(loaded, "task_service")
validation_service = self._require_loaded(loaded, "validation_service")
task_execution_planner = self._require_loaded(loaded, "task_execution_planner")
session_manager = self._require_loaded(loaded, "session_manager")
run_memory_store = self._require_loaded(loaded, "run_memory_store")
last_result: AgentRunResult | None = None
latest_validation: ValidationResult | None = None
base_execution_context = kwargs.get("execution_context")
provider_bundle = kwargs.get("provider_bundle") or self._make_provider_bundle_for_task(loaded, kwargs)
kwargs = dict(kwargs)
team_provider_bundle_factory = kwargs.pop("team_provider_bundle_factory", None)
kwargs["provider_bundle"] = provider_bundle
for attempt_index in (1, 2):
task_service.start_run(task.task_id, user_message=message, attempt_index=attempt_index)
plan = await task_execution_planner.plan(
attempt_index = int(task.metadata.get("latest_attempt_index") or 0) + 1
task_service.start_run(task.task_id, user_message=message, attempt_index=attempt_index)
plan = await task_execution_planner.plan(
task=task,
user_message=message,
attempt_index=attempt_index,
provider_bundle=provider_bundle,
)
self._append_task_observation(
session_manager,
task.session_id,
event_type="task_execution_planned",
payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
**plan.to_event_payload(),
},
)
team_summaries: list[str] = []
team_execution_context = ""
team_result: TeamRunResult | None = None
if plan.is_team:
team_result, team_error = await self._run_team_for_task(
plan,
task=task,
user_message=message,
attempt_index=attempt_index,
latest_validation=latest_validation,
provider_bundle=provider_bundle,
parent_session_id=kwargs["session_id"],
provider_bundle_factory=team_provider_bundle_factory
or self._build_team_provider_bundle_factory(loaded, kwargs),
)
self._append_task_observation(
session_manager,
task.session_id,
event_type="task_execution_planned",
payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
**plan.to_event_payload(),
},
)
team_summaries: list[str] = []
team_execution_context = ""
team_result: TeamRunResult | None = None
if plan.is_team:
team_result, team_error = await self._run_team_for_task(
plan,
task=task,
parent_session_id=kwargs["session_id"],
provider_bundle_factory=team_provider_bundle_factory
or self._build_team_provider_bundle_factory(loaded, kwargs),
if team_result is not None:
team_summaries = [self._team_summary_for_validation(team_result)]
team_packet = TaskEvidencePacket(
task_id=task.task_id,
attempt_index=attempt_index,
main_run=None,
team_runs=self._team_run_evidence(team_result),
team_node_results=list(team_result.node_results),
final_output="",
)
team_execution_context = self._join_context(
self._team_execution_context(plan, team_result),
"Rendered team evidence:\n" + render_task_evidence(team_packet),
)
self._append_task_observation(
session_manager,
task.session_id,
event_type="task_team_run_completed" if team_result.success else "task_team_run_failed",
payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
"plan_mode": plan.mode,
"strategy": plan.graph.strategy if plan.graph else None,
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
"team_run_ids": team_result.run_ids,
"team_success": team_result.success,
"node_results": self._team_node_results_for_event(plan, team_result),
"reason": plan.reason,
"error": None if team_result.success else "one or more team nodes failed",
},
)
else:
team_summaries = [f"Team execution failed: {team_error}"]
team_execution_context = self._failed_team_execution_context(plan, team_error or "unknown error")
self._append_task_observation(
session_manager,
task.session_id,
event_type="task_team_run_failed",
payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
"plan_mode": plan.mode,
"strategy": plan.graph.strategy if plan.graph else None,
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
"team_run_ids": [],
"team_success": False,
"reason": plan.reason,
"error": team_error,
},
)
if team_result is not None:
team_summaries = [self._team_summary_for_validation(team_result)]
team_packet = TaskEvidencePacket(
task_id=task.task_id,
attempt_index=attempt_index,
main_run=None,
team_runs=self._team_run_evidence(team_result),
team_node_results=list(team_result.node_results),
final_output="",
)
team_execution_context = self._join_context(
self._team_execution_context(plan, team_result),
"Rendered team evidence:\n" + render_task_evidence(team_packet),
)
self._append_task_observation(
session_manager,
task.session_id,
event_type="task_team_run_completed" if team_result.success else "task_team_run_failed",
payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
"plan_mode": plan.mode,
"strategy": plan.graph.strategy if plan.graph else None,
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
"team_run_ids": team_result.run_ids,
"team_success": team_result.success,
"node_results": self._team_node_results_for_event(plan, team_result),
"reason": plan.reason,
"error": None if team_result.success else "one or more team nodes failed",
},
)
else:
team_summaries = [f"Team execution failed: {team_error}"]
team_execution_context = self._failed_team_execution_context(plan, team_error or "unknown error")
self._append_task_observation(
session_manager,
task.session_id,
event_type="task_team_run_failed",
payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
"plan_mode": plan.mode,
"strategy": plan.graph.strategy if plan.graph else None,
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
"team_run_ids": [],
"team_success": False,
"reason": plan.reason,
"error": team_error,
},
)
attempt_kwargs = dict(kwargs)
attempt_kwargs.update(
{
"task_id": task.task_id,
"task_mode": True,
"attempt_index": attempt_index,
"allow_candidate_generation": False,
}
)
if attempt_index == 2 and latest_validation is not None:
revision_context = latest_validation.recommended_revision_prompt.strip()
if revision_context:
attempt_kwargs["execution_context"] = self._join_context(
base_execution_context,
f"Task validation revision request:\n{revision_context}",
team_execution_context,
)
elif team_execution_context:
attempt_kwargs["execution_context"] = self._join_context(base_execution_context, team_execution_context)
if plan.is_team and team_execution_context:
attempt_kwargs["include_tools"] = False
attempt_kwargs["max_tool_iterations"] = 0
attempt_kwargs["skill_selection_context"] = self._build_skill_selection_context(
task=task,
user_message=message,
attempt_index=attempt_index,
latest_validation=latest_validation,
plan=plan,
team_summaries=team_summaries,
)
result = await runner(message, **attempt_kwargs)
last_result = result
self._append_task_observation(
session_manager,
task.session_id,
event_type="task_synthesis_completed",
payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
"main_run_id": result.run_id,
"plan_mode": plan.mode,
"strategy": plan.graph.strategy if plan.graph else None,
},
)
task = task_service.append_run(
task.task_id,
result.run_id,
skill_names=self._skill_names_for_run(loaded, result.run_id),
)
evidence_packet = self._build_task_evidence_packet(
session_manager=session_manager,
task=task,
attempt_index=attempt_index,
result=result,
team_result=team_result,
)
evidence_text = render_task_evidence(evidence_packet)
validation = await validation_service.validate_task_result(
task=task,
user_message=message,
final_output=result.output_text,
evidence_packet=evidence_packet,
evidence_text=evidence_text,
transcript_excerpt=self._run_excerpt(session_manager, result.session_id, result.run_id),
tool_summaries=self._tool_summaries(session_manager, result.session_id, result.run_id),
team_summaries=team_summaries,
provider_bundle=provider_bundle,
)
latest_validation = validation
has_usable_answer = bool(result.output_text.strip()) and (
"Tool loop stopped after reaching the configured iteration limit." not in result.output_text
)
task = task_service.record_validation(
task.task_id,
result.run_id,
validation,
final_attempt=(
attempt_index == 2
or validation.status in {"accepted", "insufficient_evidence", "validator_error"}
),
has_usable_answer=has_usable_answer,
)
run_memory_store.update_run_record(result.run_id, validation_result=validation.to_dict())
session_manager.update_latest_assistant_event_payload(
result.session_id,
result.run_id,
{
"task_id": task.task_id,
"task_status": task.status,
"validation_status": "passed" if validation.accepted else "failed",
},
)
validation_debug = {
"evidence_run_ids": [
item.run_id for item in [evidence_packet.main_run, *evidence_packet.team_runs] if item is not None
],
"evidence_session_ids": [
item.session_id
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
if item is not None
],
"tool_result_count": sum(
len(item.tool_results)
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
if item is not None
),
"evidence_length": len(evidence_text),
attempt_kwargs = dict(kwargs)
attempt_kwargs.update(
{
"task_id": task.task_id,
"task_mode": True,
"attempt_index": attempt_index,
"allow_candidate_generation": False,
}
retry_scheduled = validation.status == "rejected" and attempt_index == 1
session_manager.append_message(
result.session_id,
run_id=result.run_id,
role="system",
event_type="task_validation_snapshotted",
event_payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
"validation_result": validation.to_dict(),
"validation_debug": validation_debug,
"retry_scheduled": retry_scheduled,
},
content=validation.recommended_revision_prompt or None,
context_visible=False,
)
if retry_scheduled:
session_manager.set_run_context_visible(result.session_id, result.run_id, False)
result.task_id = task.task_id
result.task_status = task.status
result.validation_result = validation.to_dict()
if not retry_scheduled:
return result
)
if team_execution_context:
attempt_kwargs["execution_context"] = self._join_context(base_execution_context, team_execution_context)
if plan.is_team and team_execution_context:
attempt_kwargs["include_tools"] = False
attempt_kwargs["max_tool_iterations"] = 0
attempt_kwargs["skill_selection_context"] = self._build_skill_selection_context(
task=task,
user_message=message,
attempt_index=attempt_index,
plan=plan,
team_summaries=team_summaries,
)
if last_result is None: # pragma: no cover - defensive
raise RuntimeError("Task mode did not produce a run result")
return last_result
result = await runner(message, **attempt_kwargs)
self._append_task_observation(
session_manager,
task.session_id,
event_type="task_synthesis_completed",
payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
"main_run_id": result.run_id,
"plan_mode": plan.mode,
"strategy": plan.graph.strategy if plan.graph else None,
},
)
task = task_service.append_run(
task.task_id,
result.run_id,
skill_names=self._skill_names_for_run(loaded, result.run_id),
)
evidence_packet = self._build_task_evidence_packet(
session_manager=session_manager,
task=task,
attempt_index=attempt_index,
result=result,
team_result=team_result,
)
evidence_text = render_task_evidence(evidence_packet)
evidence_debug = {
"evidence_run_ids": [
item.run_id for item in [evidence_packet.main_run, *evidence_packet.team_runs] if item is not None
],
"evidence_session_ids": [
item.session_id
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
if item is not None
],
"tool_result_count": sum(
len(item.tool_results)
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
if item is not None
),
"evidence_length": len(evidence_text),
}
session_manager.update_latest_assistant_event_payload(
result.session_id,
result.run_id,
{
"task_id": task.task_id,
"task_status": task.status,
"evidence_status": "recorded",
},
)
session_manager.append_message(
result.session_id,
run_id=result.run_id,
role="system",
event_type="task_evidence_recorded",
event_payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
"evidence_debug": evidence_debug,
},
content=None,
context_visible=False,
)
result.task_id = task.task_id
result.task_status = task.status
result.validation_result = None
return result
async def _run_team_for_task(
self,
@ -986,12 +966,10 @@ class AgentService:
return []
@staticmethod
def _feedback_score_for_learning(feedback_type: str, validation: ValidationResult | None) -> float:
if feedback_type == "satisfied":
if validation is not None:
return max(0.0, min(1.0, float(validation.score)))
def _acceptance_score_for_learning(acceptance_type: str) -> float:
if acceptance_type == "accept":
return 1.0
if feedback_type == "revise":
if acceptance_type == "revise":
return 0.5
return 0.0
@ -1001,12 +979,11 @@ class AgentService:
task: TaskRecord,
user_message: str,
attempt_index: int,
latest_validation: ValidationResult | None = None,
plan: TaskExecutionPlan | None = None,
team_summaries: list[str] | None = None,
) -> str:
phase = f"attempt_{attempt_index}"
if latest_validation is not None:
if task.feedback and task.feedback[-1].get("acceptance_type") == "revise":
phase = f"revision_attempt_{attempt_index}"
elif plan is not None and plan.is_team:
phase = f"team_synthesis_attempt_{attempt_index}"
@ -1027,24 +1004,14 @@ class AgentService:
)
else:
sections.append("Previously activated skills:\nNone")
if latest_validation is not None:
validation_lines = [
f"accepted: {latest_validation.accepted}",
f"score: {latest_validation.score}",
]
if latest_validation.issues:
validation_lines.append("issues:\n" + "\n".join(f"- {item}" for item in latest_validation.issues))
if latest_validation.missing_requirements:
validation_lines.append(
"missing requirements:\n"
+ "\n".join(f"- {item}" for item in latest_validation.missing_requirements)
)
if latest_validation.recommended_revision_prompt:
validation_lines.append(
"recommended revision:\n"
+ latest_validation.recommended_revision_prompt
)
sections.append("Validation feedback:\n" + "\n".join(validation_lines))
if task.feedback:
history_lines = []
for item in task.feedback[-5:]:
kind = item.get("acceptance_type") or item.get("feedback_type")
comment = item.get("comment") or ""
run_id = item.get("run_id") or ""
history_lines.append(f"- {kind} run={run_id}: {comment}".strip())
sections.append("Task acceptance history:\n" + "\n".join(history_lines))
if plan is not None:
plan_lines = [
f"mode: {plan.mode}",
@ -1313,7 +1280,8 @@ class AgentService:
"inbound_metadata": dict(inbound.metadata),
"task_id": getattr(result, "task_id", None),
"task_status": getattr(result, "task_status", None),
"validation_result": getattr(result, "validation_result", None),
"evidence_status": "recorded" if getattr(result, "task_id", None) else None,
"validation_result": None,
},
)

View File

@ -235,26 +235,45 @@ class SessionProcessProjector:
metadata=dict(payload),
)
elif record.event_type == "task_validation_snapshotted":
validation = payload.get("validation_result") if isinstance(payload.get("validation_result"), dict) else {}
accepted = bool(validation.get("accepted"))
root["status"] = "done" if accepted or attempt_index == 2 else "waiting"
root["finished_at"] = created_at if root["status"] == "done" else None
elif record.event_type == "task_evidence_recorded":
root["status"] = "waiting"
root["finished_at"] = None
add_event(
event_id=_event_id(record, "validation"),
event_id=_event_id(record, "evidence"),
run_id=record.run_id or root_run_id,
parent_run_id=root_run_id if record.run_id else None,
kind="run_status",
actor_type="system",
actor_id="validator",
actor_name="Validator",
text=(
f"Validation {'passed' if accepted else 'failed'} "
f"(score={validation.get('score')})."
+ (" Retry scheduled." if payload.get("retry_scheduled") else "")
),
actor_id="evidence-recorder",
actor_name="Evidence",
text="Task evidence was recorded; waiting for user acceptance.",
created_at=created_at,
status="done" if accepted else "error",
status="done",
metadata=dict(payload),
)
elif record.event_type == "task_acceptance_recorded":
acceptance_type = str(payload.get("acceptance_type") or payload.get("feedback_type") or "")
if acceptance_type == "accept":
root["status"] = "done"
root["finished_at"] = created_at
elif acceptance_type == "abandon":
root["status"] = "cancelled"
root["finished_at"] = created_at
else:
root["status"] = "waiting"
root["finished_at"] = None
add_event(
event_id=_event_id(record, "acceptance"),
run_id=record.run_id or root_run_id,
parent_run_id=root_run_id if record.run_id else None,
kind="run_status",
actor_type="user",
actor_id="user-acceptance",
actor_name="User Acceptance",
text=f"User acceptance recorded: {acceptance_type or 'unknown'}.",
created_at=created_at,
status="done",
metadata=dict(payload),
)

View File

@ -69,15 +69,24 @@ class SkillLearningService:
existing_ids.add(candidate.candidate_id)
return candidates
def build_learning_candidates_for_task(self, task_id: str, *, trigger_run_id: str) -> list[SkillLearningCandidate]:
"""Build candidates scoped to a single validated and satisfied Task run."""
def build_learning_candidates_for_task(
self,
task_id: str,
*,
final_accepted_run_id: str | None = None,
trigger_run_id: str | None = None,
) -> list[SkillLearningCandidate]:
"""Build candidates from a user-accepted Task and all of its runs."""
final_accepted_run_id = final_accepted_run_id or trigger_run_id
if not final_accepted_run_id:
return []
runs = [record for record in self.run_store.list_runs() if record.task_id == task_id]
trigger_run = next((record for record in runs if record.run_id == trigger_run_id), None)
if trigger_run is None or not self._is_confirmed_positive_run(trigger_run):
final_run = next((record for record in runs if record.run_id == final_accepted_run_id), None)
if final_run is None or not self._is_task_accepted_run(final_run):
return []
source_runs = [record for record in runs if self._is_confirmed_positive_run(record)]
source_runs = sorted(runs, key=lambda item: (item.started_at, item.run_id))
if not source_runs:
return []
@ -100,11 +109,16 @@ class SkillLearningService:
source_session_ids=source_session_ids,
related_skill_names=[],
reason=f"Task {task_id} completed successfully without a published skill; consider extracting reusable guidance.",
evidence={"task_id": task_id, "trigger_run_id": trigger_run_id, "theme": self._task_theme(trigger_run.task_text)},
evidence={
"task_id": task_id,
"final_accepted_run_id": final_accepted_run_id,
"source_run_ids": source_run_ids,
"theme": self._task_theme(final_run.task_text),
},
status="open",
priority=1,
confidence=0.8,
trigger_reason="validation_accepted_and_user_satisfied",
trigger_reason="task_accepted",
)
)
else:
@ -137,13 +151,14 @@ class SkillLearningService:
),
evidence={
"task_id": task_id,
"trigger_run_id": trigger_run_id,
"final_accepted_run_id": final_accepted_run_id,
"source_run_ids": source_run_ids,
"skill_version": receipt.skill_version,
},
status="open",
priority=1,
confidence=0.7,
trigger_reason="validation_accepted_and_user_satisfied",
trigger_reason="task_accepted",
)
)
@ -269,7 +284,7 @@ class SkillLearningService:
groups.setdefault(key, []).append(record)
candidates: list[SkillLearningCandidate] = []
for theme, runs in groups.items():
successful = [record for record in runs if self._is_confirmed_positive_run(record)]
successful = [record for record in runs if self._is_task_accepted_run(record)]
if len(successful) < 2:
continue
if any(record.activated_skills for record in successful):
@ -290,7 +305,7 @@ class SkillLearningService:
def _build_merge_candidates(self) -> list[SkillLearningCandidate]:
pair_counts: dict[tuple[str, str], list[RunRecord]] = {}
for record in self.run_store.list_runs():
if not self._is_confirmed_positive_run(record):
if not self._is_task_accepted_run(record):
continue
unique = sorted({receipt.skill_name for receipt in record.activated_skills})
for pair in combinations(unique, 2):
@ -351,14 +366,15 @@ class SkillLearningService:
return effects
@staticmethod
def _is_confirmed_positive_run(record: RunRecord) -> bool:
validation = record.validation_result or {}
def _is_task_accepted_run(record: RunRecord) -> bool:
feedback = record.feedback or {}
acceptance_type = feedback.get("acceptance_type")
if acceptance_type is None and feedback.get("feedback_type") == "satisfied":
acceptance_type = "accept"
return (
bool(record.success)
and bool(record.task_id)
and validation.get("accepted") is True
and feedback.get("feedback_type") == "satisfied"
and acceptance_type == "accept"
)
@staticmethod

View File

@ -6,7 +6,6 @@ from .planner import TaskExecutionPlan, TaskExecutionPlanner
from .router import MainAgentRouter
from .service import TaskService
from .skill_resolver import SkillResolutionReport, TaskSkillResolver
from .validation import ValidationService
__all__ = [
"EvidenceBuilder",
@ -24,6 +23,5 @@ __all__ = [
"ToolEvidence",
"ValidationResult",
"ValidationStatus",
"ValidationService",
"render_task_evidence",
]

View File

@ -1,4 +1,4 @@
"""Models for internal task tracking and validation."""
"""Models for internal task tracking and user acceptance."""
from __future__ import annotations
@ -9,7 +9,12 @@ from typing import Any, Literal
ValidationStatus = Literal["accepted", "rejected", "insufficient_evidence", "validator_error"]
VALIDATION_STATUSES = {"accepted", "rejected", "insufficient_evidence", "validator_error"}
TASK_OPEN_STATUSES = {"open", "running", "validating", "awaiting_feedback", "needs_review", "needs_revision"}
TASK_OPEN_STATUSES = {"open", "running", "awaiting_acceptance", "needs_revision"}
LEGACY_STATUS_MAP = {
"validating": "running",
"awaiting_feedback": "awaiting_acceptance",
"needs_review": "awaiting_acceptance",
}
@dataclass(slots=True)
@ -113,11 +118,11 @@ class TaskRecord:
@property
def is_execution_active(self) -> bool:
return self.status in {"running", "validating"}
return self.status == "running"
@property
def requires_user_action(self) -> bool:
return self.status in {"awaiting_feedback", "needs_review", "needs_revision"}
return self.status in {"awaiting_acceptance", "needs_revision"}
def to_dict(self) -> dict[str, Any]:
return {
@ -137,6 +142,7 @@ class TaskRecord:
"satisfaction": self.satisfaction,
"run_ids": list(self.run_ids),
"skill_names": list(self.skill_names),
"acceptance": list(self.feedback),
"feedback": list(self.feedback),
"validation_result": self.validation_result,
"metadata": dict(self.metadata),
@ -152,7 +158,7 @@ class TaskRecord:
goal=str(payload.get("goal") or payload.get("description") or ""),
constraints=[str(item) for item in payload.get("constraints") or []],
priority=int(payload.get("priority", 0) or 0),
status=str(payload.get("status") or "open"),
status=LEGACY_STATUS_MAP.get(str(payload.get("status") or "open"), str(payload.get("status") or "open")),
creator=str(payload.get("creator") or "main-agent"),
created_at=str(payload.get("created_at") or ""),
updated_at=str(payload.get("updated_at") or ""),
@ -161,7 +167,11 @@ class TaskRecord:
satisfaction=_optional_float(payload.get("satisfaction")),
run_ids=[str(item) for item in payload.get("run_ids") or []],
skill_names=[str(item) for item in payload.get("skill_names") or []],
feedback=[dict(item) for item in payload.get("feedback") or [] if isinstance(item, dict)],
feedback=[
_normalize_acceptance_entry(dict(item))
for item in (payload.get("acceptance") or payload.get("feedback") or [])
if isinstance(item, dict)
],
validation_result=dict(payload["validation_result"]) if isinstance(payload.get("validation_result"), dict) else None,
metadata=dict(payload.get("metadata") or {}),
)
@ -226,3 +236,13 @@ def _optional_float(value: Any) -> float | None:
if value in (None, ""):
return None
return float(value)
def _normalize_acceptance_entry(entry: dict[str, Any]) -> dict[str, Any]:
if entry.get("acceptance_type") is None and entry.get("feedback_type") is not None:
feedback_type = str(entry.get("feedback_type") or "")
entry["acceptance_type"] = "accept" if feedback_type == "satisfied" else feedback_type
if entry.get("feedback_type") is None and entry.get("acceptance_type") is not None:
acceptance_type = str(entry.get("acceptance_type") or "")
entry["feedback_type"] = "satisfied" if acceptance_type == "accept" else acceptance_type
return entry

View File

@ -10,7 +10,7 @@ from typing import Any, Literal
from beaver.coordinator.models import AgentDescriptor, ExecutionGraph, ExecutionNode
from beaver.engine.providers import ProviderBundle
from .models import TaskRecord, ValidationResult
from .models import TaskRecord
from .skill_resolver import SkillResolutionReport, TaskSkillResolver
@ -76,7 +76,6 @@ class TaskExecutionPlanner:
task: TaskRecord,
user_message: str,
attempt_index: int,
latest_validation: ValidationResult | None = None,
provider_bundle: ProviderBundle | None = None,
timeout_seconds: float = 30.0,
) -> TaskExecutionPlan:
@ -105,7 +104,6 @@ class TaskExecutionPlanner:
task=task,
user_message=user_message,
attempt_index=attempt_index,
latest_validation=latest_validation,
),
},
],
@ -230,14 +228,10 @@ class TaskExecutionPlanner:
task: TaskRecord,
user_message: str,
attempt_index: int,
latest_validation: ValidationResult | None,
) -> str:
validation_note = ""
if latest_validation is not None:
validation_note = (
"\nPrevious validation issues:\n"
+ json.dumps(latest_validation.to_dict(), ensure_ascii=False)
)
history_note = ""
if task.feedback:
history_note = "\nRelevant task history:\n" + json.dumps(task.feedback[-5:], ensure_ascii=False)
return (
"Decide execution mode for this internal Task attempt.\n"
"Use mode=team only when independent research, review, implementation slices, or staged checks "
@ -254,7 +248,7 @@ class TaskExecutionPlanner:
f"Task goal:\n{task.goal}\n\n"
f"Current user request:\n{user_message}\n\n"
f"Attempt index: {attempt_index}\n"
f"{validation_note}"
f"{history_note}"
)
@staticmethod

View File

@ -7,7 +7,7 @@ from pathlib import Path
from typing import Any
from uuid import uuid4
from .models import TaskEvent, TaskRecord, ValidationResult
from .models import TaskEvent, TaskRecord
from .store import TaskStore
@ -105,38 +105,70 @@ class TaskService:
for name in skill_names or []:
if name not in task.skill_names:
task.skill_names.append(name)
task.status = "awaiting_acceptance"
task.updated_at = self._now()
self.store.upsert_task(task)
self._event(task, "run_completed", run_id=run_id, payload={"skill_names": skill_names or []})
self._event(task, "evidence_recorded", run_id=run_id, payload={"skill_names": skill_names or []})
return task
def record_validation(
def add_acceptance(
self,
task_id: str,
run_id: str,
validation: ValidationResult,
*,
final_attempt: bool = True,
has_usable_answer: bool = True,
acceptance_type: str,
comment: str | None = None,
run_id: str | None = None,
) -> TaskRecord:
task = self._require(task_id)
now = self._now()
if validation.status == "accepted":
task.status = "awaiting_feedback"
elif validation.status in {"insufficient_evidence", "validator_error"}:
task.status = "needs_review"
elif validation.status == "rejected" and not final_attempt:
normalized = normalize_acceptance_type(acceptance_type)
matching_acceptance = any(
item.get("run_id") == run_id and item.get("acceptance_type") == normalized
for item in task.feedback
)
conflicting_acceptance = next(
(
item
for item in task.feedback
if item.get("run_id") == run_id and item.get("acceptance_type") != normalized
),
None,
)
if conflicting_acceptance is not None:
raise ValueError(
f"Acceptance for run_id={run_id!r} was already recorded as "
f"{conflicting_acceptance.get('acceptance_type')!r}"
)
if task.status in {"closed", "abandoned"} and not matching_acceptance:
raise ValueError(f"Task {task.task_id} is already finalized as {task.status!r}")
if matching_acceptance:
return task
entry = {
"acceptance_type": normalized,
"feedback_type": "satisfied" if normalized == "accept" else normalized,
"comment": comment or "",
"run_id": run_id,
"created_at": now,
}
task.feedback.append(entry)
if normalized == "revise":
task.status = "needs_revision"
elif validation.status == "rejected" and has_usable_answer:
task.status = "needs_review"
else:
task.status = "failed"
elif normalized == "abandon":
task.status = "abandoned"
task.closed_at = now
task.close_reason = "automatic validation rejected the final attempt"
task.close_reason = comment or "abandoned"
elif normalized == "accept":
task.status = "closed"
task.closed_at = now
task.close_reason = "accepted"
task.satisfaction = 1.0
if run_id:
task.metadata["final_accepted_run_id"] = run_id
task.updated_at = now
task.validation_result = validation.to_dict()
self.store.upsert_task(task)
self._event(task, "validated", run_id=run_id, payload=validation.to_dict())
self._event(task, f"acceptance_{normalized}", run_id=run_id, payload=entry)
return task
def add_feedback(
@ -147,52 +179,12 @@ class TaskService:
comment: str | None = None,
run_id: str | None = None,
) -> TaskRecord:
task = self._require(task_id)
now = self._now()
matching_feedback = any(
item.get("run_id") == run_id and item.get("feedback_type") == feedback_type
for item in task.feedback
return self.add_acceptance(
task_id,
acceptance_type=feedback_type,
comment=comment,
run_id=run_id,
)
conflicting_feedback = next(
(
item
for item in task.feedback
if item.get("run_id") == run_id and item.get("feedback_type") != feedback_type
),
None,
)
if conflicting_feedback is not None:
raise ValueError(
f"Feedback for run_id={run_id!r} was already recorded as "
f"{conflicting_feedback.get('feedback_type')!r}"
)
if task.status in {"closed", "abandoned"} and not matching_feedback:
raise ValueError(f"Task {task.task_id} is already finalized as {task.status!r}")
if matching_feedback:
return task
entry = {
"feedback_type": feedback_type,
"comment": comment or "",
"run_id": run_id,
"created_at": now,
}
task.feedback.append(entry)
if feedback_type == "revise":
task.status = "needs_revision"
elif feedback_type == "abandon":
task.status = "abandoned"
task.closed_at = now
task.close_reason = comment or "abandoned"
elif feedback_type == "satisfied":
task.status = "closed"
task.closed_at = now
task.close_reason = "satisfied"
task.satisfaction = 1.0
task.updated_at = now
self.store.upsert_task(task)
self._event(task, f"feedback_{feedback_type}", run_id=run_id, payload=entry)
return task
def close_task(self, task_id: str, *, reason: str = "closed") -> TaskRecord:
task = self._require(task_id)
@ -267,3 +259,12 @@ def short_task_title(text: str) -> str:
if len(words) <= 4:
return cleaned[:40]
return " ".join(words[:4])[:40]
def normalize_acceptance_type(value: str) -> str:
normalized = (value or "").strip().lower()
if normalized == "satisfied":
return "accept"
if normalized not in {"accept", "revise", "abandon"}:
raise ValueError("acceptance_type must be one of: accept, revise, abandon")
return normalized

View File

@ -1,154 +0,0 @@
"""Automatic validation for internal Task mode."""
from __future__ import annotations
import json
from typing import Any
from beaver.engine.providers import ProviderBundle
from .models import TaskRecord, ValidationResult
class ValidationService:
async def validate_task_result(
self,
*,
task: TaskRecord,
user_message: str,
final_output: str,
evidence_packet: Any | None = None,
evidence_text: str = "",
transcript_excerpt: str = "",
tool_summaries: list[str] | None = None,
team_summaries: list[str] | None = None,
provider_bundle: ProviderBundle | None = None,
) -> ValidationResult:
provider = None
model = None
if provider_bundle is not None:
provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
model = getattr(runtime, "model", None)
if provider is not None:
try:
return await self._validate_with_provider(
provider=provider,
model=model,
task=task,
user_message=user_message,
final_output=final_output,
evidence_text=evidence_text,
transcript_excerpt=transcript_excerpt,
tool_summaries=tool_summaries or [],
team_summaries=team_summaries or [],
)
except Exception as exc:
return ValidationResult(
status="validator_error",
score=0.0,
issues=[f"Validator failed: {exc}"],
evidence_gaps=["Automatic validation failed before producing a reliable decision."],
missing_requirements=["User review is required because automatic validation failed."],
recommended_revision_prompt=(
"Review the answer and evidence, then decide whether to revise or accept it."
),
validator="llm_error",
)
return self._heuristic_validate(final_output)
async def _validate_with_provider(
self,
*,
provider: Any,
model: str | None,
task: TaskRecord,
user_message: str,
final_output: str,
evidence_text: str,
transcript_excerpt: str,
tool_summaries: list[str],
team_summaries: list[str],
) -> ValidationResult:
legacy_context = "" if evidence_text else (
f"Transcript excerpt:\n{transcript_excerpt}\n\n"
f"Tool summaries:\n{json.dumps(tool_summaries, ensure_ascii=False)}\n\n"
f"Team summaries:\n{json.dumps(team_summaries, ensure_ascii=False)}\n\n"
)
prompt = (
"Validate whether the assistant output satisfies the task. "
"Return only compact JSON with keys: passed, score, issues, "
"missing_requirements, recommended_revision_prompt.\n\n"
f"Task goal:\n{task.goal}\n\n"
f"Current user request:\n{user_message}\n\n"
f"Evidence packet:\n{evidence_text}\n\n"
f"{legacy_context}"
f"Assistant final output:\n{final_output}"
)
response = await provider.chat(
messages=[
{"role": "system", "content": "You are a strict task result validator."},
{"role": "user", "content": prompt},
],
tools=None,
model=model,
max_tokens=4096,
temperature=0.0,
)
payload = self._parse_json_object(response.content or "")
status = payload.get("status")
if status not in {"accepted", "rejected", "insufficient_evidence", "validator_error"}:
status = (
"accepted"
if payload.get("passed") and float(payload.get("score", 0.0) or 0.0) >= 0.75
else "rejected"
)
return ValidationResult(
status=status,
score=max(0.0, min(1.0, float(payload.get("score", 0.0) or 0.0))),
issues=[str(item) for item in payload.get("issues") or []],
missing_requirements=[str(item) for item in payload.get("missing_requirements") or []],
evidence_gaps=[str(item) for item in payload.get("evidence_gaps") or []],
recommended_revision_prompt=str(payload.get("recommended_revision_prompt") or ""),
validator="llm",
)
@staticmethod
def _heuristic_validate(final_output: str) -> ValidationResult:
text = final_output.strip()
if not text:
return ValidationResult(
passed=False,
score=0.0,
issues=["Assistant output is empty."],
missing_requirements=["A non-empty result is required."],
recommended_revision_prompt="Produce a complete, non-empty answer for the task.",
validator="heuristic",
)
lowered = text.lower()
if "run failed before completion" in lowered or "tool loop stopped" in lowered:
return ValidationResult(
passed=False,
score=0.35,
issues=["The run did not complete cleanly."],
missing_requirements=["A successful final result is required."],
recommended_revision_prompt="Retry the task and address the failure before returning the final answer.",
validator="heuristic",
)
return ValidationResult(passed=True, score=0.85, validator="heuristic")
@staticmethod
def _parse_json_object(text: str) -> dict[str, Any]:
cleaned = text.strip()
if cleaned.startswith("```"):
cleaned = cleaned.strip("`")
if cleaned.lower().startswith("json"):
cleaned = cleaned[4:].strip()
start = cleaned.find("{")
end = cleaned.rfind("}")
if start >= 0 and end >= start:
cleaned = cleaned[start : end + 1]
payload = json.loads(cleaned)
if not isinstance(payload, dict):
raise ValueError("validator response must be a JSON object")
return payload

View File

@ -0,0 +1,28 @@
from __future__ import annotations
from beaver.engine.context import ContextBuildInput, ContextBuilder, RuntimeContext, SessionContext
def test_context_builder_injects_current_date_and_time() -> None:
result = ContextBuilder().build_messages(
ContextBuildInput(
base_system_prompt="Follow user requests.",
current_user_input="今天几号?",
session_context=SessionContext(session_id="web:alpha", source="web", model="stub-model"),
runtime_context=RuntimeContext(
utc_datetime="2026-05-26T01:10:00+00:00",
local_datetime="2026-05-26T09:10:00+08:00",
timezone="Asia/Shanghai",
utc_offset="+08:00",
),
)
)
system_prompt = result.messages[0]["content"]
assert "# Current Date and Time" in system_prompt
assert "Current UTC time: 2026-05-26T01:10:00+00:00" in system_prompt
assert "Current local time: 2026-05-26T09:10:00+08:00" in system_prompt
assert "Local timezone: Asia/Shanghai" in system_prompt
assert "Local UTC offset: +08:00" in system_prompt
assert '"today", "tomorrow", "now", "this week", and "next month"' in system_prompt
assert result.messages[-1] == {"role": "user", "content": "今天几号?"}

View File

@ -18,8 +18,8 @@ class FakeResult:
model: str | None = "fake-model"
usage: dict[str, Any] = field(default_factory=dict)
task_id: str | None = "task-1"
task_status: str | None = "awaiting_feedback"
validation_result: dict[str, Any] | None = field(default_factory=lambda: {"accepted": True})
task_status: str | None = "awaiting_acceptance"
validation_result: dict[str, Any] | None = None
class FakeService:
@ -79,8 +79,9 @@ def test_gateway_routes_memory_channel_roundtrip() -> None:
assert message.session_id == "s1"
assert message.finish_reason == "stop"
assert message.metadata["task_id"] == "task-1"
assert message.metadata["task_status"] == "awaiting_feedback"
assert message.metadata["validation_result"] == {"accepted": True}
assert message.metadata["task_status"] == "awaiting_acceptance"
assert message.metadata["evidence_status"] == "recorded"
assert message.metadata["validation_result"] is None
stop_event.set()
await asyncio.wait_for(task, timeout=2)

View File

@ -113,6 +113,19 @@ def test_litellm_provider_preserves_reasoning_content_for_tool_round_trip() -> N
assert LiteLLMProvider._sanitize_messages(messages)[0]["reasoning_content"] == "must be passed back"
def test_litellm_provider_merges_late_system_messages_to_front() -> None:
messages = [
{"role": "system", "content": "base"},
{"role": "user", "content": "question"},
{"role": "system", "content": "finalize without tools"},
]
sanitized = LiteLLMProvider._sanitize_messages(messages)
assert [message["role"] for message in sanitized] == ["system", "user"]
assert sanitized[0]["content"] == "base\n\nfinalize without tools"
def test_thinking_mode_is_forced_disabled_even_when_requested_enabled(monkeypatch: pytest.MonkeyPatch) -> None:
captured: dict = {}

View File

@ -79,7 +79,7 @@ def _task() -> TaskRecord:
goal="实现任务连续性",
constraints=[],
priority=0,
status="awaiting_feedback",
status="awaiting_acceptance",
creator="test",
created_at="now",
updated_at="now",

View File

@ -35,6 +35,7 @@ class StubProvider(LLMProvider):
model: str | None = None,
max_tokens: int = 4096,
temperature: float = 0.7,
thinking_enabled: bool | None = None,
) -> LLMResponse:
if not self._responses:
raise AssertionError("No stubbed provider responses left")
@ -47,11 +48,22 @@ class StubProvider(LLMProvider):
class StubSkillAssembler:
def __init__(self, activated_skills: list[SkillContext]) -> None:
self.activated_skills = activated_skills
self.calls: list[dict] = []
async def assemble(self, **kwargs) -> SkillAssemblyResult:
self.calls.append(kwargs)
return SkillAssemblyResult(activated_skills=list(self.activated_skills))
class RecordingToolAssembler:
def __init__(self) -> None:
self.calls: list[dict] = []
async def assemble(self, **kwargs):
self.calls.append(kwargs)
return kwargs["registry"].get_specs(["memory"])
def _tool_call(*, name: str = "echo", arguments: dict | None = None, call_id: str = "call-1") -> SimpleNamespace:
return SimpleNamespace(
id=call_id,
@ -576,6 +588,48 @@ def test_agent_loop_records_skill_receipts_and_effects(tmp_path: Path) -> None:
assert effect_records[-1].run_id == result.run_id
def test_thinking_disabled_still_uses_skill_and_tool_assembly(tmp_path: Path) -> None:
skill = SkillContext(
name="docker-debug",
content="Use docker logs before editing config.",
version="v0007",
content_hash="hash-v7",
activation_reason="llm_selected",
tool_hints=["terminal"],
)
skill_assembler = StubSkillAssembler([skill])
tool_assembler = RecordingToolAssembler()
loader = EngineLoader(
workspace=tmp_path,
skill_assembler=skill_assembler,
tool_assembler=tool_assembler,
)
loop = AgentLoop(loader=loader)
bundle = ProviderBundle(
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
main_provider=StubProvider(
[LLMResponse(content="Done", finish_reason="stop", provider_name="stub", model="stub-model")]
),
)
result = asyncio.run(
loop.process_direct(
"Why is the Docker container crashing?",
provider_bundle=bundle,
thinking_enabled=False,
)
)
loaded = loop.boot()
events = loaded.session_manager.get_run_event_records(result.session_id, result.run_id)
tool_selection = next(event for event in events if event.event_type == "tool_selection_snapshotted")
assert skill_assembler.calls
assert skill_assembler.calls[0]["thinking_enabled"] is False
assert tool_assembler.calls
assert [skill.name for skill in tool_assembler.calls[0]["activated_skills"]] == ["docker-debug"]
assert tool_selection.event_payload["tool_names"] == ["memory"]
def test_agent_loop_records_max_tool_iterations_as_failed_skill_effect(tmp_path: Path) -> None:
skill = SkillContext(
name="docker-debug",
@ -635,6 +689,52 @@ def test_agent_loop_records_max_tool_iterations_as_failed_skill_effect(tmp_path:
assert effect_records[-1].success is False
def test_agent_loop_suppresses_raw_tool_call_when_finalizing_after_tool_limit(tmp_path: Path) -> None:
loader = EngineLoader(
workspace=tmp_path,
skill_assembler=StubSkillAssembler([]),
)
loop = AgentLoop(loader=loader)
bundle = ProviderBundle(
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
main_provider=StubProvider(
[
LLMResponse(
content="Need a tool.",
finish_reason="tool_calls",
tool_calls=[_tool_call()],
provider_name="stub",
model="stub-model",
),
LLMResponse(
content=(
"<tool_call>\n"
"<function=mcp_local_web_mcp_web_fetch>\n"
"<parameter=url>https://example.com</parameter>\n"
"</function>\n"
"</tool_call>"
),
finish_reason="stop",
provider_name="stub",
model="stub-model",
),
]
),
)
result = asyncio.run(
loop.process_direct(
"Fetch the latest result",
provider_bundle=bundle,
max_tool_iterations=0,
)
)
assert result.finish_reason == "max_tool_iterations"
assert "<tool_call>" not in result.output_text
assert "raw tool call was suppressed" in result.output_text
def test_llm_request_snapshot_defaults_to_compact_payload(tmp_path: Path) -> None:
loop = AgentLoop(loader=EngineLoader(workspace=tmp_path, skill_assembler=StubSkillAssembler([])))
bundle = ProviderBundle(

View File

@ -101,12 +101,11 @@ def test_process_projection_maps_task_team_events(tmp_path: Path) -> None:
"web:test",
run_id="main-run",
role="system",
event_type="task_validation_snapshotted",
event_type="task_evidence_recorded",
event_payload={
"task_id": "task-1",
"attempt_index": 1,
"validation_result": {"accepted": True, "score": 0.9},
"retry_scheduled": False,
"evidence_status": "recorded",
},
context_visible=False,
)
@ -121,7 +120,7 @@ def test_process_projection_maps_task_team_events(tmp_path: Path) -> None:
assert sub_run["metadata"]["selected_skill_names"] == ["research-workflow"]
assert sub_run["metadata"]["skill_query"] == "research workflow"
assert sub_run["metadata"]["ephemeral_guidance_id"] is None
assert any(event["actor_name"] == "Validator" for event in projection["events"])
assert any(event["actor_name"] == "Evidence" for event in projection["events"])
assert any(run["session_id"] == "web:test" for run in projection["runs"])

View File

@ -4,23 +4,17 @@ import asyncio
from pathlib import Path
from types import SimpleNamespace
import pytest
from beaver.coordinator import AgentDescriptor, ExecutionGraph, ExecutionNode
from beaver.engine import EngineLoader
from beaver.engine.context.builder import ContextBuilder, ContextBuildInput
from beaver.engine.providers.base import LLMProvider, LLMResponse
from beaver.engine.providers.factory import ProviderBundle
from beaver.services.agent_service import AgentService
from beaver.skills.assembler import SkillAssemblyResult
from beaver.tasks import TaskExecutionPlan, TaskRecord, TaskService, ValidationResult, ValidationService
from beaver.tasks import TaskExecutionPlan, TaskService
class StubProvider(LLMProvider):
def __init__(self, responses: list[LLMResponse]) -> None:
super().__init__()
self._responses = list(responses)
self.calls: list[dict[str, object]] = []
async def chat(
self,
@ -30,7 +24,6 @@ class StubProvider(LLMProvider):
max_tokens: int = 4096,
temperature: float = 0.7,
) -> LLMResponse:
self.calls.append({"messages": messages, "tools": tools, "model": model})
if not self._responses:
raise AssertionError("No stubbed provider responses left")
return self._responses.pop(0)
@ -39,30 +32,9 @@ class StubProvider(LLMProvider):
return "stub-model"
class StubValidationService:
def __init__(self, results: list[ValidationResult]) -> None:
self.results = list(results)
self.calls: list[dict] = []
async def validate_task_result(self, **kwargs) -> ValidationResult:
self.calls.append(kwargs)
if not self.results:
raise AssertionError("No stubbed validation results left")
return self.results.pop(0)
class StubTaskExecutionPlanner:
def __init__(self, plans: list[TaskExecutionPlan] | None = None) -> None:
self.plans = list(plans or [TaskExecutionPlan.single("test-single")])
self.calls = []
async def plan(self, **kwargs) -> TaskExecutionPlan:
self.calls.append(kwargs)
if len(self.plans) == 1:
return self.plans[0]
if not self.plans:
raise AssertionError("No stubbed execution plans left")
return self.plans.pop(0)
return TaskExecutionPlan.single("test-single")
class FakeLearningCandidate:
@ -70,15 +42,6 @@ class FakeLearningCandidate:
return {"candidate_id": "candidate-1", "kind": "new_skill", "status": "open"}
class RecordingSkillAssembler:
def __init__(self) -> None:
self.task_descriptions: list[str] = []
async def assemble(self, **kwargs) -> SkillAssemblyResult:
self.task_descriptions.append(kwargs["task_description"])
return SkillAssemblyResult()
def _route_response(action: str = "new_task", short_title: str = "Test task") -> LLMResponse:
return LLMResponse(
content=f'{{"action":"{action}","reason":"test route","short_title":"{short_title}"}}',
@ -107,828 +70,157 @@ def _bundle(*responses: str, route_action: str = "new_task") -> ProviderBundle:
)
def _single_planner() -> StubTaskExecutionPlanner:
return StubTaskExecutionPlanner([TaskExecutionPlan.single("test-single")])
def _team_plan(strategy: str = "sequence") -> TaskExecutionPlan:
return TaskExecutionPlan(
mode="team",
reason="test-team",
graph=ExecutionGraph(
strategy=strategy, # type: ignore[arg-type]
nodes=[
ExecutionNode(
node_id="research",
task="research implementation options",
agent=AgentDescriptor(name="researcher", role="research"),
)
],
),
final_synthesis_instruction="Use the sub-agent result to produce the final answer.",
)
def _provider_bundle(provider: StubProvider) -> ProviderBundle:
return ProviderBundle(
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
main_provider=provider,
auxiliary_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
auxiliary_provider=StubProvider([_route_response("new_task")]),
)
def _main_only_bundle(*responses: str) -> ProviderBundle:
return ProviderBundle(
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
main_provider=StubProvider(
[
LLMResponse(
content=response,
finish_reason="stop",
provider_name="stub",
model="stub-model",
)
for response in responses
]
),
)
def _task_record(status: str) -> TaskRecord:
return TaskRecord(
task_id="task-1",
session_id="session-1",
description="test task",
goal="test task",
constraints=[],
priority=0,
status=status,
creator="main-agent",
created_at="2026-05-22T00:00:00+00:00",
updated_at="2026-05-22T00:00:00+00:00",
)
def test_simple_question_does_not_create_task(tmp_path: Path) -> None:
def test_task_run_records_evidence_and_waits_for_acceptance(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=_single_planner(),
validation_service=StubValidationService([]),
task_execution_planner=StubTaskExecutionPlanner(),
)
)
result = asyncio.run(
service.process_direct(
"hello?",
session_id="web:simple",
provider_bundle=_bundle("hi", route_action="simple_chat"),
)
)
loaded = service.create_loop().boot()
assert result.task_id is None
assert loaded.task_service.store.list_tasks() == []
def test_complex_request_creates_task_and_records_validation(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=_single_planner(),
validation_service=StubValidationService(
[ValidationResult(passed=True, score=0.9, validator="test")]
),
"draft release notes",
session_id="web:test",
provider_bundle=_bundle("Done"),
)
)
result = asyncio.run(
service.process_direct(
"implement the new report workflow",
session_id="web:task",
provider_bundle=_bundle("implemented"),
)
)
loaded = service.create_loop().boot()
task = loaded.task_service.get_task_by_run_id(result.run_id)
events = loaded.session_manager.get_run_event_records(result.session_id, result.run_id)
run_record = loaded.run_memory_store.list_runs()[-1]
skill_effects = next(event for event in events if event.event_type == "skill_effects_snapshotted")
assert result.task_id is not None
task_service = service.create_loop().boot().task_service
assert task_service is not None
task = task_service.get_task(result.task_id or "")
assert task is not None
assert task.status == "awaiting_feedback"
assert any(event.event_type == "task_validation_snapshotted" for event in events)
assert run_record.task_id == result.task_id
assert run_record.validation_result["accepted"] is True
assert skill_effects.event_payload["candidate_generation_allowed"] is False
assert skill_effects.event_payload["learning_candidates"] == []
assert task.metadata["short_title"] == "Test task"
assert task.status == "awaiting_acceptance"
assert task.validation_result is None
assert result.validation_result is None
event_types = [event.event_type for event in task_service.list_events(task.task_id)]
assert "evidence_recorded" in event_types
assert "validated" not in event_types
def test_task_mode_uses_task_aware_skill_selection_context(tmp_path: Path) -> None:
skill_assembler = RecordingSkillAssembler()
def test_acceptance_closes_task_and_triggers_learning(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=_single_planner(),
validation_service=StubValidationService(
[ValidationResult(passed=True, score=1.0, validator="test")]
),
skill_assembler=skill_assembler,
)
)
result = asyncio.run(
service.process_direct(
"继续按刚才的方案改",
session_id="web:task-skill-query",
provider_bundle=_bundle("done", route_action="new_task"),
)
)
assert result.task_id
assert skill_assembler.task_descriptions
query = skill_assembler.task_descriptions[0]
assert "Task goal:" in query
assert "Current user request:" in query
assert "Previously activated skills:" in query
assert "If no published skill matches, return []" in query
def test_active_task_continues_until_llm_closes_it(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=_single_planner(),
validation_service=StubValidationService(
[
ValidationResult(passed=True, score=0.9, validator="test"),
ValidationResult(passed=True, score=0.9, validator="test"),
]
),
)
)
first = asyncio.run(
service.process_direct(
"implement the search workflow",
session_id="web:continue",
provider_bundle=_bundle("first done", route_action="new_task"),
)
)
second = asyncio.run(
service.process_direct(
"also add tests for it",
session_id="web:continue",
provider_bundle=_bundle("tests added", route_action="continue_task"),
)
)
loaded = service.create_loop().boot()
task = loaded.task_service.get_task(first.task_id)
assert task is not None
assert second.task_id == first.task_id
assert len(task.run_ids) == 2
closed = asyncio.run(
service.process_direct(
"这个任务结束了",
session_id="web:continue",
provider_bundle=_bundle("好的,已结束。", route_action="close_task"),
)
)
task = loaded.task_service.get_task(first.task_id)
assert closed.task_id is None
assert task is not None
assert task.status == "closed"
assert loaded.task_service.active_task_view("web:continue") is None
def test_active_task_revision_input_records_feedback_and_reruns(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=_single_planner(),
validation_service=StubValidationService(
[
ValidationResult(passed=True, score=0.9, validator="test"),
ValidationResult(passed=True, score=0.95, validator="test"),
]
),
)
)
first = asyncio.run(
service.process_direct(
"查询珠海天气",
session_id="web:revise-direct",
provider_bundle=_bundle("珠海天气概览", route_action="new_task"),
)
)
second = asyncio.run(
service.process_direct(
"再详细一点,并加上明后天穿衣建议",
session_id="web:revise-direct",
provider_bundle=_bundle("更新后的珠海天气和穿衣建议", route_action="revise_task"),
)
)
loaded = service.create_loop().boot()
task = loaded.task_service.get_task(first.task_id)
messages = loaded.session_manager.get_messages_as_conversation(first.session_id)
first_assistant = [
message
for message in messages
if message.get("role") == "assistant" and message.get("run_id") == first.run_id
][-1]
user_messages = [message.get("content") for message in messages if message.get("role") == "user"]
assert second.task_id == first.task_id
assert task is not None
assert task.status == "awaiting_feedback"
assert len(task.run_ids) == 2
assert task.feedback == [
{
"feedback_type": "revise",
"comment": "再详细一点,并加上明后天穿衣建议",
"run_id": first.run_id,
"created_at": task.feedback[0]["created_at"],
}
]
assert first_assistant["feedback_state"] == "revise"
assert "再详细一点,并加上明后天穿衣建议" in user_messages
def test_explicit_revision_feedback_then_input_reruns_without_duplicate_feedback(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=_single_planner(),
validation_service=StubValidationService(
[
ValidationResult(passed=True, score=0.9, validator="test"),
ValidationResult(passed=True, score=0.95, validator="test"),
]
),
)
)
first = asyncio.run(
service.process_direct(
"查询珠海天气",
session_id="web:explicit-revise",
provider_bundle=_bundle("珠海天气概览", route_action="new_task"),
)
)
feedback = asyncio.run(
service.submit_feedback(
session_id=first.session_id,
run_id=first.run_id,
feedback_type="revise",
comment="准备补充穿衣建议",
)
)
second = asyncio.run(
service.process_direct(
"加上明后天穿衣建议",
session_id="web:explicit-revise",
provider_bundle=_bundle("更新后的珠海天气和穿衣建议", route_action="revise_task"),
)
)
loaded = service.create_loop().boot()
task = loaded.task_service.get_task(first.task_id)
assert feedback["task_status"] == "needs_revision"
assert second.task_id == first.task_id
assert task is not None
assert task.status == "awaiting_feedback"
assert len(task.run_ids) == 2
assert len(task.feedback) == 1
assert task.feedback[0]["feedback_type"] == "revise"
assert task.feedback[0]["comment"] == "准备补充穿衣建议"
def test_validation_result_status_drives_accepted_and_passed() -> None:
accepted = ValidationResult(status="accepted", score=0.9, validator="test")
insufficient = ValidationResult(status="insufficient_evidence", score=0.9, validator="test")
rejected = ValidationResult(status="rejected", score=0.9, validator="test")
assert accepted.passed is True
assert accepted.accepted is True
assert insufficient.passed is False
assert insufficient.accepted is False
assert rejected.passed is False
assert rejected.accepted is False
def test_validation_result_from_legacy_payload_maps_to_status() -> None:
accepted = ValidationResult.from_dict({"passed": True, "score": 0.9, "validator": "legacy"})
low_score = ValidationResult.from_dict({"passed": True, "score": 0.7, "validator": "legacy"})
rejected = ValidationResult.from_dict({"passed": False, "score": 0.2, "validator": "legacy"})
assert accepted is not None
assert accepted.status == "accepted"
assert low_score is not None
assert low_score.status == "rejected"
assert rejected is not None
assert rejected.status == "rejected"
def test_validation_result_rejects_unknown_status() -> None:
with pytest.raises(ValueError, match="unknown validation status"):
ValidationResult(status="pending", score=0.9, validator="test") # type: ignore[arg-type]
def test_validation_result_from_dict_rejects_unknown_explicit_status() -> None:
with pytest.raises(ValueError, match="unknown validation status"):
ValidationResult.from_dict({"status": "pending", "passed": True, "score": 0.9})
def test_validation_result_evidence_gaps_round_trip() -> None:
validation = ValidationResult(
status="insufficient_evidence",
score=0.4,
evidence_gaps=["missing command output", "missing file reference"],
validator="test",
)
restored = ValidationResult.from_dict(validation.to_dict())
assert restored is not None
assert restored.status == "insufficient_evidence"
assert restored.evidence_gaps == ["missing command output", "missing file reference"]
assert restored.to_dict()["evidence_gaps"] == ["missing command output", "missing file reference"]
def test_task_record_status_helpers_distinguish_review_and_failed() -> None:
needs_review = _task_record("needs_review")
failed = _task_record("failed")
assert needs_review.is_open is True
assert needs_review.is_execution_active is False
assert needs_review.requires_user_action is True
assert failed.is_open is False
assert failed.is_execution_active is False
assert failed.requires_user_action is False
def test_task_service_api_payload_emits_status_helpers(tmp_path: Path) -> None:
service = TaskService(tmp_path)
task = _task_record("needs_review")
payload = service.to_api_dict(task)
assert payload["is_open"] is True
assert payload["is_execution_active"] is False
assert payload["requires_user_action"] is True
def test_validation_failure_retries_once(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=_single_planner(),
validation_service=StubValidationService(
[
ValidationResult(
passed=False,
score=0.2,
issues=["missing tests"],
recommended_revision_prompt="Add tests before final response.",
validator="test",
),
ValidationResult(passed=True, score=0.88, validator="test"),
]
),
)
)
result = asyncio.run(
service.process_direct(
"implement and validate the task",
session_id="web:retry",
provider_bundle=_bundle("first draft", "revised draft"),
)
)
loaded = service.create_loop().boot()
task = loaded.task_service.get_task(result.task_id)
assert result.output_text == "revised draft"
assert result.validation_result["accepted"] is True
assert task is not None
assert len(task.run_ids) == 2
visible_messages = loaded.session_manager.get_messages_as_conversation(result.session_id)
visible_contents = [message.get("content") for message in visible_messages]
assert "first draft" not in visible_contents
assert "revised draft" in visible_contents
def test_feedback_closes_or_abandons_internal_task(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=_single_planner(),
validation_service=StubValidationService(
[ValidationResult(passed=True, score=0.9, validator="test")]
),
task_execution_planner=StubTaskExecutionPlanner(),
)
)
result = asyncio.run(
service.process_direct(
"implement feedback handling",
session_id="web:feedback",
provider_bundle=_bundle("done"),
"write implementation plan",
session_id="web:acceptance",
provider_bundle=_bundle("Plan"),
)
)
loaded = service.create_loop().boot()
learning_calls = []
def build_learning_candidates_for_task(task_id: str, *, trigger_run_id: str) -> list[FakeLearningCandidate]:
learning_calls.append((task_id, trigger_run_id))
loaded = service.create_loop().boot()
generated: list[tuple[str, str]] = []
def build_learning_candidates_for_task(
task_id: str,
*,
final_accepted_run_id: str | None = None,
trigger_run_id: str | None = None,
) -> list[FakeLearningCandidate]:
generated.append((task_id, final_accepted_run_id or trigger_run_id or ""))
return [FakeLearningCandidate()]
loaded.skill_learning_service.build_learning_candidates_for_task = build_learning_candidates_for_task
feedback = asyncio.run(
service.submit_feedback(
session_id=result.session_id,
response = asyncio.run(
service.submit_acceptance(
session_id="web:acceptance",
run_id=result.run_id,
feedback_type="satisfied",
acceptance_type="accept",
)
)
assert feedback["task_status"] == "closed"
assert feedback["learning_candidates"] == [
assert response["task_status"] == "closed"
assert response["acceptance_type"] == "accept"
assert response["learning_candidates"] == [
{"candidate_id": "candidate-1", "kind": "new_skill", "status": "open"}
]
assert learning_calls == [(result.task_id, result.run_id)]
assert generated == [(result.task_id, result.run_id)]
service2 = AgentService(
loader=EngineLoader(
workspace=tmp_path / "abandon",
task_execution_planner=_single_planner(),
validation_service=StubValidationService(
[
ValidationResult(passed=False, score=0.3, validator="test"),
ValidationResult(passed=False, score=0.3, validator="test"),
]
),
)
)
abandoned = asyncio.run(
service2.process_direct(
"implement another workflow",
session_id="web:abandon",
provider_bundle=_bundle("not enough", "still not enough"),
)
)
abandon_feedback = asyncio.run(
service2.submit_feedback(
session_id=abandoned.session_id,
run_id=abandoned.run_id,
feedback_type="abandon",
comment="too costly",
)
)
assert abandon_feedback["task_status"] == "abandoned"
assert abandon_feedback["learning_candidates"] == []
loaded2 = service2.create_loop().boot()
failure_events = [
event
for event in loaded2.session_manager.get_run_event_records(abandoned.session_id, abandoned.run_id)
if event.event_type == "task_failure_evidence_recorded"
]
assert len(failure_events) == 1
assert loaded2.memory_service.get_store().memory_entries == []
task_service = loaded.task_service
assert task_service is not None
task = task_service.get_task(result.task_id or "")
assert task is not None
assert task.metadata["final_accepted_run_id"] == result.run_id
def test_feedback_is_idempotent_and_projected_to_assistant_message(tmp_path: Path) -> None:
def test_revise_and_abandon_do_not_trigger_learning(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=_single_planner(),
validation_service=StubValidationService(
[ValidationResult(passed=True, score=0.9, validator="test")]
),
task_execution_planner=StubTaskExecutionPlanner(),
)
)
result = asyncio.run(
service.process_direct(
"implement feedback projection",
session_id="web:feedback-projection",
provider_bundle=_bundle("done"),
"summarize notes",
session_id="web:revise",
provider_bundle=_bundle("Summary"),
)
)
loaded = service.create_loop().boot()
first = asyncio.run(
service.submit_feedback(
session_id=result.session_id,
response = asyncio.run(
service.submit_acceptance(
session_id="web:revise",
run_id=result.run_id,
feedback_type="satisfied",
acceptance_type="revise",
comment="Add decisions",
)
)
second = asyncio.run(
assert response["task_status"] == "needs_revision"
assert response["learning_candidates"] == []
task_service = service.create_loop().boot().task_service
assert task_service is not None
task = task_service.get_task(result.task_id or "")
assert task is not None
assert task.feedback[0]["acceptance_type"] == "revise"
def test_legacy_feedback_endpoint_maps_satisfied_to_accept(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=StubTaskExecutionPlanner(),
)
)
result = asyncio.run(
service.process_direct(
"prepare checklist",
session_id="web:legacy",
provider_bundle=_bundle("Checklist"),
)
)
response = asyncio.run(
service.submit_feedback(
session_id=result.session_id,
session_id="web:legacy",
run_id=result.run_id,
feedback_type="satisfied",
)
)
feedback_events = [
event
for event in loaded.session_manager.get_run_event_records(result.session_id, result.run_id)
if event.event_type == "task_feedback_recorded"
]
assistant = [
message
for message in loaded.session_manager.get_messages_as_conversation(result.session_id)
if message.get("role") == "assistant" and message.get("run_id") == result.run_id
][-1]
assert first["task_status"] == "closed"
assert second["task_status"] == "closed"
assert len(feedback_events) == 1
assert assistant["feedback_state"] == "satisfied"
assert assistant["task_status"] == "closed"
assert assistant["validation_status"] == "passed"
with pytest.raises(ValueError, match="already recorded"):
asyncio.run(
service.submit_feedback(
session_id=result.session_id,
run_id=result.run_id,
feedback_type="abandon",
)
)
task = loaded.task_service.get_task(result.task_id)
assert task is not None
assert task.status == "closed"
assert response["acceptance_type"] == "accept"
assert response["feedback_type"] == "satisfied"
assert response["task_status"] == "closed"
def test_task_mode_team_plan_runs_subagent_then_main_synthesis(tmp_path: Path) -> None:
main_provider = StubProvider(
[
LLMResponse(content="final synthesized answer", finish_reason="stop", provider_name="stub", model="stub-model")
]
)
sub_provider = StubProvider(
[
LLMResponse(content="sub-agent evidence", finish_reason="stop", provider_name="stub", model="stub-model")
]
)
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=StubTaskExecutionPlanner([_team_plan()]),
validation_service=StubValidationService([ValidationResult(passed=True, score=0.9, validator="test")]),
)
)
def test_task_service_maps_legacy_status_and_feedback(tmp_path: Path) -> None:
service = TaskService(tmp_path)
task = service.create_task(session_id="s", description="legacy")
task.status = "awaiting_feedback"
task.feedback.append({"feedback_type": "satisfied", "run_id": "run-1"})
service.store.upsert_task(task)
result = asyncio.run(
service.process_direct(
"implement team-backed workflow",
session_id="web:team",
provider_bundle=_provider_bundle(main_provider),
team_provider_bundle_factory=lambda node: _provider_bundle(sub_provider),
)
)
loaded = service.create_loop().boot()
task = loaded.task_service.get_task(result.task_id)
events = loaded.session_manager.get_event_records(result.session_id)
loaded = service.get_task(task.task_id)
assert result.output_text == "final synthesized answer"
assert task is not None
assert len(task.run_ids) == 2
assert result.run_id == task.run_ids[-1]
assert any(event.event_type == "task_execution_planned" for event in events)
assert any(event.event_type == "task_team_run_completed" for event in events)
assert "sub-agent evidence" in main_provider.calls[0]["messages"][0]["content"]
assert "sub-agent evidence" != result.output_text
def test_task_mode_team_synthesis_runs_without_tools_and_receives_evidence(tmp_path: Path) -> None:
main_provider = StubProvider(
[
LLMResponse(content="final synthesized answer", finish_reason="stop", provider_name="stub", model="stub-model")
]
)
sub_provider = StubProvider(
[
LLMResponse(content="sub-agent evidence", finish_reason="stop", provider_name="stub", model="stub-model")
]
)
validation = StubValidationService([ValidationResult(status="accepted", score=0.9, validator="test")])
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=StubTaskExecutionPlanner([_team_plan()]),
validation_service=validation,
)
)
result = asyncio.run(
service.process_direct(
"implement team-backed workflow",
session_id="web:team-no-tools",
provider_bundle=_provider_bundle(main_provider),
team_provider_bundle_factory=lambda node: _provider_bundle(sub_provider),
)
)
assert result.output_text == "final synthesized answer"
assert main_provider.calls[0]["tools"] is None
assert "sub-agent evidence" in main_provider.calls[0]["messages"][0]["content"]
assert "Task evidence packet" in validation.calls[0]["evidence_text"]
def test_task_mode_team_failure_still_uses_main_synthesis(tmp_path: Path) -> None:
main_provider = StubProvider(
[
LLMResponse(content="fallback synthesized answer", finish_reason="stop", provider_name="stub", model="stub-model")
]
)
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=StubTaskExecutionPlanner([_team_plan()]),
validation_service=StubValidationService([ValidationResult(passed=True, score=0.9, validator="test")]),
)
)
result = asyncio.run(
service.process_direct(
"implement workflow despite team failure",
session_id="web:team-failure",
provider_bundle=_provider_bundle(main_provider),
team_provider_bundle_factory=lambda node: (_ for _ in ()).throw(RuntimeError("sub-agent unavailable")),
)
)
loaded = service.create_loop().boot()
events = loaded.session_manager.get_event_records(result.session_id)
assert result.output_text == "fallback synthesized answer"
assert any(event.event_type == "task_team_run_failed" for event in events)
assert "sub-agent unavailable" in main_provider.calls[0]["messages"][0]["content"]
assert "same class of tools fails repeatedly" in main_provider.calls[0]["messages"][0]["content"]
assert "user-visible fallback answer" in main_provider.calls[0]["messages"][0]["content"]
def test_insufficient_evidence_moves_task_to_needs_review(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=_single_planner(),
validation_service=StubValidationService(
[
ValidationResult(
status="insufficient_evidence",
score=0.4,
evidence_gaps=["source missing"],
validator="test",
)
]
),
)
)
result = asyncio.run(
service.process_direct(
"answer with uncertain evidence",
session_id="web:needs-review",
provider_bundle=_bundle("possible answer"),
)
)
loaded = service.create_loop().boot()
task = loaded.task_service.get_task(result.task_id)
events = loaded.session_manager.get_run_event_records(result.session_id, result.run_id)
validation_event = next(event for event in events if event.event_type == "task_validation_snapshotted")
assert task is not None
assert task.status == "needs_review"
assert task.requires_user_action is True
assert task.is_execution_active is False
assert validation_event.event_payload["validation_result"]["status"] == "insufficient_evidence"
assert validation_event.event_payload["retry_scheduled"] is False
assert validation_event.event_payload["validation_debug"]["tool_result_count"] >= 0
def test_task_mode_team_retry_hides_first_synthesis_run(tmp_path: Path) -> None:
main_provider = StubProvider(
[
LLMResponse(content="first synthesized answer", finish_reason="stop", provider_name="stub", model="stub-model"),
LLMResponse(content="revised synthesized answer", finish_reason="stop", provider_name="stub", model="stub-model"),
]
)
sub_providers = [
StubProvider([LLMResponse(content="first evidence", finish_reason="stop", provider_name="stub", model="stub-model")]),
StubProvider([LLMResponse(content="second evidence", finish_reason="stop", provider_name="stub", model="stub-model")]),
]
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=StubTaskExecutionPlanner([_team_plan(), _team_plan()]),
validation_service=StubValidationService(
[
ValidationResult(passed=False, score=0.2, recommended_revision_prompt="revise", validator="test"),
ValidationResult(passed=True, score=0.9, validator="test"),
]
),
)
)
result = asyncio.run(
service.process_direct(
"implement and validate with team",
session_id="web:team-retry",
provider_bundle=_provider_bundle(main_provider),
team_provider_bundle_factory=lambda node: _provider_bundle(sub_providers.pop(0)),
)
)
loaded = service.create_loop().boot()
task = loaded.task_service.get_task(result.task_id)
visible = loaded.session_manager.get_messages_as_conversation(result.session_id)
visible_contents = [message.get("content") for message in visible]
run_records = {record.run_id: record for record in loaded.run_memory_store.list_runs()}
assert result.output_text == "revised synthesized answer"
assert task is not None
assert len(task.run_ids) == 4
assert "first synthesized answer" not in visible_contents
assert "revised synthesized answer" in visible_contents
for run_id in task.run_ids:
record = run_records[run_id]
events = loaded.session_manager.get_run_event_records(record.session_id, run_id)
skill_effects = [event for event in events if event.event_type == "skill_effects_snapshotted"]
assert skill_effects
assert skill_effects[-1].event_payload["candidate_generation_allowed"] is False
def test_context_builder_strips_ui_projection_fields_from_provider_history() -> None:
result = ContextBuilder().build_messages(
ContextBuildInput(
history=[
{
"role": "assistant",
"content": "done",
"run_id": "run-1",
"task_id": "task-1",
"task_status": "closed",
"validation_status": "passed",
"feedback_state": "satisfied",
}
],
)
)
assistant = result.messages[-1]
assert assistant == {"role": "assistant", "content": "done"}
def test_context_builder_normalizes_persisted_tool_arguments() -> None:
result = ContextBuilder().build_messages(
ContextBuildInput(
history=[
{
"role": "assistant",
"content": None,
"tool_calls": [
{
"id": "call-1",
"type": "function",
"function": {
"name": "cron",
"arguments": {"action": "add", "mode": "notification"},
},
}
],
}
],
)
)
tool_call = result.messages[-1]["tool_calls"][0]
assert tool_call["function"]["arguments"] == '{"action": "add", "mode": "notification"}'
def test_llm_validator_parse_failure_is_not_accepted(tmp_path: Path) -> None:
task_service = TaskService(tmp_path / "tasks")
task = task_service.create_task(session_id="web:validator", description="implement validator handling")
validation = asyncio.run(
ValidationService().validate_task_result(
task=task,
user_message="implement validator handling",
final_output="done",
provider_bundle=_main_only_bundle("not json"),
)
)
assert validation.accepted is False
assert validation.status == "validator_error"
assert validation.validator == "llm_error"
assert validation.issues
assert loaded is not None
assert loaded.status == "awaiting_acceptance"
assert loaded.feedback[0]["acceptance_type"] == "accept"

View File

@ -20,8 +20,8 @@ class StubRunResult:
model: str | None = "stub-model"
usage: dict[str, Any] = field(default_factory=lambda: {"total_tokens": 3})
task_id: str | None = "task-1"
task_status: str | None = "awaiting_feedback"
validation_result: dict[str, Any] | None = field(default_factory=lambda: {"accepted": True})
task_status: str | None = "awaiting_acceptance"
validation_result: dict[str, Any] | None = None
class StubAgentService(AgentService):
@ -101,9 +101,10 @@ def test_websocket_message_returns_chat_metadata_and_session_updated() -> None:
assert message["session_id"] == "web:alpha"
assert message["run_id"] == "run-1"
assert message["task_id"] == "task-1"
assert message["task_status"] == "awaiting_feedback"
assert message["validation_result"] == {"accepted": True}
assert message["validation_status"] == "passed"
assert message["task_status"] == "awaiting_acceptance"
assert message["evidence_status"] == "recorded"
assert message["validation_result"] is None
assert "validation_status" not in message
assert message["metadata"]["input_metadata"] == {
"source": "test",
"attachments": [{"file_id": "file-1", "name": "a.txt"}],

View File

@ -19,7 +19,7 @@ import {
uploadFile,
wsManager,
} from '@/lib/api';
import { mergeServerWithPendingUsers, shouldMergePendingUsers } from '@/lib/chat-messages';
import { mergeServerWithPendingUsers, shouldDisplayChatMessage, shouldMergePendingUsers } from '@/lib/chat-messages';
import { pickAppText } from '@/lib/i18n/core';
import { useAppI18n } from '@/lib/i18n/provider';
import { buildSessionProgressView } from '@/lib/session-progress';
@ -32,7 +32,7 @@ function isSessionUpdatedEvent(data: WsEvent | Record<string, unknown>): data is
function activeTaskStatusLabel(status: string, locale: 'zh-CN' | 'en-US') {
if (status === 'needs_revision') return pickAppText(locale, '待修改', 'Needs revision');
if (status === 'awaiting_feedback') return pickAppText(locale, '待反馈', 'Awaiting feedback');
if (status === 'awaiting_acceptance') return pickAppText(locale, '待验收', 'Awaiting acceptance');
if (status === 'running') return pickAppText(locale, '进行中', 'Running');
return pickAppText(locale, '进行中', 'Active');
}
@ -157,10 +157,11 @@ export default function ChatPage() {
setSessionProcess(key, process);
}
void loadActiveTask(key);
const shouldMergePending = shouldMergePendingUsers(detail.messages, localSnapshot, waitingForReply);
const displayMessages = detail.messages.filter(shouldDisplayChatMessage);
const shouldMergePending = shouldMergePendingUsers(displayMessages, localSnapshot, waitingForReply);
const nextMessages = shouldMergePending
? mergeServerWithPendingUsers(detail.messages, localSnapshot)
: detail.messages;
? mergeServerWithPendingUsers(displayMessages, localSnapshot)
: displayMessages;
setMessages(nextMessages);
shouldSnapToLatestRef.current = true;
const last = nextMessages[nextMessages.length - 1];
@ -217,15 +218,11 @@ export default function ChatPage() {
if (data.type === 'status' && data.status === 'thinking') {
setIsThinking(true);
} else if (data.type === 'message' && data.role === 'assistant') {
const validationResult = data.validation_result ?? data.metadata?.validation_result;
const validationStatus = data.validation_status
? data.validation_status
: validationResult
? ((validationResult as Record<string, unknown>).accepted === true ? 'passed' : 'failed')
: 'unknown';
setIsThinking(false);
setIsLoading(false);
addMessage({
const rawEvidenceStatus = data.evidence_status ?? data.metadata?.evidence_status;
const evidenceStatus = rawEvidenceStatus === 'recorded' ? 'recorded' : undefined;
const assistantMessage = {
role: 'assistant',
content: typeof data.content === 'string' ? data.content : '',
timestamp: new Date().toISOString(),
@ -233,8 +230,11 @@ export default function ChatPage() {
run_id: typeof data.run_id === 'string' ? data.run_id : undefined,
task_id: data.task_id ?? data.metadata?.task_id ?? null,
task_status: data.task_status ?? data.metadata?.task_status ?? null,
validation_status: validationStatus,
});
evidence_status: evidenceStatus,
} as const;
if (shouldDisplayChatMessage(assistantMessage)) {
addMessage(assistantMessage);
}
void loadSessionMessages(typeof data.session_id === 'string' ? data.session_id : useChatStore.getState().sessionId);
void loadActiveTask(typeof data.session_id === 'string' ? data.session_id : useChatStore.getState().sessionId);
loadSessions();
@ -359,17 +359,18 @@ export default function ChatPage() {
await loadSessions();
return;
}
addMessage({
const assistantMessage = {
role: 'assistant',
content: result.response,
timestamp: new Date().toISOString(),
run_id: result.run_id,
task_id: result.task_id,
task_status: result.task_status,
validation_status: result.validation_result
? (result.validation_result.accepted === true ? 'passed' : 'failed')
: 'unknown',
});
evidence_status: result.evidence_status === 'recorded' ? 'recorded' : undefined,
} as const;
if (shouldDisplayChatMessage(assistantMessage)) {
addMessage(assistantMessage);
}
void getSessionProcess(sessionId).then((process) => setSessionProcess(sessionId, process)).catch(() => null);
void loadActiveTask(sessionId);
loadSessions();
@ -393,7 +394,7 @@ export default function ChatPage() {
}
}, [addMessage, clearInputDraft, input, isLoading, loadActiveTask, loadSessionMessages, loadSessions, locale, pendingFiles, revisionTargetRunId, sessionId, setIsLoading, setIsThinking, setSessionProcess, thinkingModeEnabled, updateMessageFeedback]);
const handleFeedback = useCallback(async (runId: string, feedbackType: 'satisfied' | 'revise' | 'abandon', comment?: string) => {
const handleFeedback = useCallback(async (runId: string, feedbackType: 'accept' | 'revise' | 'abandon', comment?: string) => {
updateMessageFeedback(runId, feedbackType);
try {
await submitChatFeedback({

View File

@ -1238,7 +1238,7 @@ function riskLabel(risk: string, t: (zh: string, en: string) => string): string
function triggerReasonLabel(reason: string, t: (zh: string, en: string) => string): string {
const labels: Record<string, string> = {
validation_accepted_and_user_satisfied: t('任务验证通过且用户满意', 'Validation accepted and user satisfied'),
task_accepted: t('任务已接受', 'Task accepted'),
};
return labels[reason] || reason;
}

View File

@ -3,7 +3,7 @@
import Link from 'next/link';
import { useParams, useRouter } from 'next/navigation';
import React, { useMemo, useState } from 'react';
import { AlertCircle, ArrowLeft, Bot, CheckCircle2, Download, FileText, HelpCircle, Loader2, MessageSquare, RefreshCw, ThumbsUp, Trash2, User, XCircle } from 'lucide-react';
import { AlertCircle, ArrowLeft, Bot, CheckCircle2, Download, FileText, Loader2, MessageSquare, RefreshCw, ThumbsUp, Trash2, User, XCircle } from 'lucide-react';
import { TaskRuntimeStatusBadge, formatTaskRuntimeDuration, formatTaskRuntimeTime, progressPercent } from '@/components/task-runtime/TaskRuntimeShared';
import { Badge } from '@/components/ui/badge';
@ -17,8 +17,9 @@ import { buildTaskRuntimeView, type TaskRuntimeNodeView } from '@/lib/task-runti
import { useChatStore } from '@/lib/store';
import type { BackendTask, BackendTaskRun, ProcessArtifact, ProcessEvent, ProcessRun } from '@/types';
type TaskFeedbackType = 'satisfied' | 'revise' | 'abandon';
type TaskFeedbackType = 'accept' | 'revise' | 'abandon';
type TaskFeedbackItem = {
acceptance_type?: unknown;
feedback_type?: unknown;
comment?: unknown;
created_at?: unknown;
@ -151,12 +152,6 @@ export default function TaskDetailPage() {
const backendFeedbackRunId = backendTask ? pickFeedbackRunId(backendTask) : null;
if (!task && backendTask) {
const validation = backendTask.validation_result;
const accepted = Boolean(validation?.accepted);
const validationIssues = [
...arrayOfStrings(validation?.issues),
...arrayOfStrings(validation?.missing_requirements),
];
const feedbackItems = backendTask.feedback || [];
return (
<div className="mx-auto max-w-5xl space-y-6 p-6">
@ -232,57 +227,6 @@ export default function TaskDetailPage() {
</CardContent>
</Card>
<Card>
<CardHeader>
<CardTitle className="text-base">{pickAppText(locale, '验证和反馈', 'Validation and feedback')}</CardTitle>
</CardHeader>
<CardContent className="space-y-4 text-sm">
<div className="rounded-lg border border-border bg-muted/25 p-4">
<div className="flex items-center gap-2">
{validation ? (
accepted ? <CheckCircle2 className="h-5 w-5 text-[#657162]" /> : <XCircle className="h-5 w-5 text-destructive" />
) : (
<HelpCircle className="h-5 w-5 text-muted-foreground" />
)}
<div className="font-medium">
{validation
? accepted
? pickAppText(locale, '验证通过', 'Validation passed')
: pickAppText(locale, '需要继续修改', 'Needs revision')
: pickAppText(locale, '尚未验证', 'Not validated yet')}
</div>
</div>
{validation ? (
<div className="mt-2 text-muted-foreground">
{pickAppText(locale, '评分', 'Score')}: {String(validation.score ?? '-')} · {pickAppText(locale, '验证器', 'Validator')}: {String(validation.validator ?? '-')}
</div>
) : null}
{validationIssues.length > 0 && (
<ul className="mt-3 list-disc space-y-1 pl-5 text-muted-foreground">
{validationIssues.map((item, index) => <li key={`${item}:${index}`}>{item}</li>)}
</ul>
)}
{typeof validation?.recommended_revision_prompt === 'string' && validation.recommended_revision_prompt && (
<p className="mt-3 rounded-md bg-background p-3 text-muted-foreground">{validation.recommended_revision_prompt}</p>
)}
</div>
<div className="space-y-2">
<div className="font-medium">{pickAppText(locale, '用户反馈', 'User feedback')}</div>
{feedbackItems.length === 0 ? (
<p className="text-muted-foreground">{pickAppText(locale, '还没有用户反馈。', 'No user feedback yet.')}</p>
) : (
feedbackItems.map((item, index) => (
<div key={index} className="rounded-md border border-border p-3">
<div className="font-medium">{humanFeedback(String(item.feedback_type || ''), locale)}</div>
{item.comment ? <p className="mt-1 text-muted-foreground">{String(item.comment)}</p> : null}
{item.created_at ? <p className="mt-1 text-xs text-muted-foreground">{formatTaskRuntimeTime(String(item.created_at), locale)}</p> : null}
</div>
))
)}
</div>
</CardContent>
</Card>
</div>
);
}
@ -476,6 +420,7 @@ export default function TaskDetailPage() {
comment,
});
setRuntimeFeedback({
acceptance_type: feedbackType,
feedback_type: feedbackType,
comment: comment || '',
created_at: new Date().toISOString(),
@ -660,14 +605,14 @@ function TaskFeedbackPanel({
return (
<Card>
<CardHeader>
<CardTitle className="text-base">{pickAppText(locale, '任务反馈', 'Task feedback')}</CardTitle>
<CardTitle className="text-base">{pickAppText(locale, '任务验收', 'Task acceptance')}</CardTitle>
</CardHeader>
<CardContent className="space-y-4">
{recordedFeedback ? (
<div className="rounded-md border border-border bg-muted/25 p-3 text-sm">
<div className="flex items-center gap-2 font-medium">
<CheckCircle2 className="h-4 w-4 text-[#657162]" />
{pickAppText(locale, '已提交反馈', 'Feedback submitted')}: {humanFeedback(String(recordedFeedback.feedback_type || ''), locale)}
{pickAppText(locale, '已提交验收', 'Acceptance submitted')}: {humanFeedback(String(recordedFeedback.acceptance_type || recordedFeedback.feedback_type || ''), locale)}
</div>
{recordedFeedback.comment ? (
<p className="mt-2 text-muted-foreground">{String(recordedFeedback.comment)}</p>
@ -678,22 +623,22 @@ function TaskFeedbackPanel({
</div>
) : isFinalized ? (
<div className="rounded-md border border-border bg-muted/25 p-3 text-sm text-muted-foreground">
{pickAppText(locale, '任务已结束,不能再提交新的反馈。', 'This task is finalized and cannot accept new feedback.')}
{pickAppText(locale, '任务已结束,不能再提交新的验收。', 'This task is finalized and cannot accept new acceptance.')}
</div>
) : !runId ? (
<div className="rounded-md border border-border bg-muted/25 p-3 text-sm text-muted-foreground">
{pickAppText(locale, '暂无可反馈的运行记录。', 'No run is available for feedback yet.')}
{pickAppText(locale, '暂无可验收的运行记录。', 'No run is available for acceptance yet.')}
</div>
) : null}
<div className="grid gap-2 sm:grid-cols-3">
<FeedbackButton
type="satisfied"
type="accept"
icon={<ThumbsUp className="mr-2 h-4 w-4" />}
label={pickAppText(locale, '满意', 'Satisfied')}
label={pickAppText(locale, '接受', 'Accept')}
actionBusy={actionBusy}
disabled={!canSubmit}
onClick={() => submit('satisfied', comment.trim() || undefined)}
onClick={() => submit('accept', comment.trim() || undefined)}
/>
<FeedbackButton
type="revise"
@ -717,10 +662,10 @@ function TaskFeedbackPanel({
value={comment}
onChange={(event) => setComment(event.target.value)}
disabled={Boolean(recordedFeedback) || isFinalized || Boolean(actionBusy)}
placeholder={pickAppText(locale, '需要修改时写下具体要求;满意或放弃可选填说明。', 'Describe requested changes; notes are optional for satisfied or abandon.')}
placeholder={pickAppText(locale, '需要修改时写下具体要求;接受或放弃可选填说明。', 'Describe requested changes; notes are optional for accept or abandon.')}
/>
<div className="text-xs text-muted-foreground">
{pickAppText(locale, '反馈将记录到当前任务运行:', 'Feedback will be recorded on run: ')}
{pickAppText(locale, '验收将记录到当前任务运行:', 'Acceptance will be recorded on run: ')}
<span className="font-mono">{runId || '-'}</span>
<span className="mx-1">·</span>
{pickAppText(locale, '会话:', 'Session: ')}
@ -807,8 +752,7 @@ function humanTaskStatus(status: string, locale: 'zh-CN' | 'en-US') {
const map: Record<string, [string, string]> = {
open: ['已创建', 'Open'],
running: ['执行中', 'Running'],
validating: ['验证中', 'Validating'],
awaiting_feedback: ['等待反馈', 'Awaiting feedback'],
awaiting_acceptance: ['等待验收', 'Awaiting acceptance'],
needs_revision: ['需要修改', 'Needs revision'],
closed: ['已完成', 'Closed'],
abandoned: ['已放弃', 'Abandoned'],
@ -818,10 +762,10 @@ function humanTaskStatus(status: string, locale: 'zh-CN' | 'en-US') {
}
function humanFeedback(type: string, locale: 'zh-CN' | 'en-US') {
if (type === 'satisfied') return pickAppText(locale, '满意', 'Satisfied');
if (type === 'accept' || type === 'satisfied') return pickAppText(locale, '接受', 'Accepted');
if (type === 'revise') return pickAppText(locale, '请求修改', 'Revision requested');
if (type === 'abandon') return pickAppText(locale, '放弃任务', 'Abandoned');
return type || pickAppText(locale, '反馈', 'Feedback');
return type || pickAppText(locale, '验收', 'Acceptance');
}
function humanFinishReason(reason: string, locale: 'zh-CN' | 'en-US') {
@ -848,7 +792,3 @@ function feedbackForRun(items: TaskFeedbackItem[], runId: string | null): TaskFe
function latestFeedback(items: TaskFeedbackItem[]): TaskFeedbackItem | null {
return [...items].reverse()[0] ?? null;
}
function arrayOfStrings(value: unknown): string[] {
return Array.isArray(value) ? value.map((item) => String(item)).filter(Boolean) : [];
}

View File

@ -142,7 +142,7 @@ function OrdinaryTasks() {
</div>
</TableCell>
<TableCell>
<Badge variant={task.status === 'awaiting_feedback' || task.status === 'closed' ? 'default' : 'secondary'}>
<Badge variant={task.status === 'awaiting_acceptance' || task.status === 'closed' ? 'default' : 'secondary'}>
{taskStatusLabel(task.status, locale)}
</Badge>
</TableCell>
@ -185,8 +185,7 @@ function taskStatusLabel(status: string, locale: 'zh-CN' | 'en-US') {
const labels: Record<string, [string, string]> = {
open: ['已创建', 'Open'],
running: ['执行中', 'Running'],
validating: ['验证中', 'Validating'],
awaiting_feedback: ['等待反馈', 'Awaiting feedback'],
awaiting_acceptance: ['等待验收', 'Awaiting acceptance'],
needs_revision: ['需要修改', 'Needs revision'],
closed: ['已完成', 'Closed'],
abandoned: ['已放弃', 'Abandoned'],

View File

@ -27,7 +27,7 @@ export function ChatWorkbench({
processArtifacts: ProcessArtifact[];
selectedRunId: string | null;
onSelectRun: (runId: string) => void;
onFeedback: (runId: string, feedbackType: 'satisfied' | 'revise' | 'abandon', comment?: string) => void;
onFeedback: (runId: string, feedbackType: 'accept' | 'revise' | 'abandon', comment?: string) => void;
onRequestRevision: (runId: string) => void;
}) {
return (

View File

@ -6,7 +6,7 @@ import { Bot, CheckCircle2, ChevronRight, Loader2, Paperclip, RefreshCcw, Thumbs
import type { ChatMessage, ProcessArtifact, ProcessEvent, ProcessRun } from '@/types';
import { getAccessToken, getFileUrl } from '@/lib/api';
import { getTaskCardMessageIndexes } from '@/lib/chat-messages';
import { getTaskCardMessageIndexes, hasVisibleChatContent, normalizedMessageText, shouldDisplayChatMessage } from '@/lib/chat-messages';
import { AgentTeamBlock } from '@/components/chat-workbench/AgentTeamBlock';
import { MarkdownContent } from '@/components/chat-workbench/MarkdownContent';
import { ScrollArea } from '@/components/ui/scroll-area';
@ -49,19 +49,14 @@ function MessageBubble({
message: ChatMessage;
showTaskCard: boolean;
canSendFeedback: boolean;
onFeedback: (runId: string, feedbackType: 'satisfied' | 'revise' | 'abandon', comment?: string) => void;
onFeedback: (runId: string, feedbackType: 'accept' | 'revise' | 'abandon', comment?: string) => void;
onRequestRevision: (runId: string) => void;
}) {
const { locale } = useAppI18n();
const isUser = message.role === 'user';
const textContent = typeof message.content === 'string' ? message.content : String(message.content || '');
const [feedbackMode, setFeedbackMode] = React.useState<'satisfied' | null>(null);
const textContent = normalizedMessageText(message.content);
const [feedbackMode, setFeedbackMode] = React.useState<'accept' | null>(null);
const [feedbackComment, setFeedbackComment] = React.useState('');
const validationFailed = message.validation_status === 'failed';
const validationDetails =
validationFailed
? pickAppText(locale, '详细原因会在任务验证区展示;展开任务可查看验证报告。', 'Detailed reasons are shown in the task validation area. Open the task to inspect the validation report.')
: '';
return (
<div className={`flex gap-3 ${isUser ? 'justify-end' : ''}`}>
@ -142,22 +137,14 @@ function MessageBubble({
</div>
</div>
)}
{!isUser && validationFailed && (
<details className="mt-3 rounded-md border border-destructive/30 bg-destructive/5 p-3">
<summary className="cursor-pointer text-base font-semibold text-destructive">
{pickAppText(locale, '验证失败', 'Validation failed')}
</summary>
<p className="mt-2 text-xs leading-5 text-muted-foreground">{validationDetails}</p>
</details>
)}
{!isUser && (canSendFeedback || message.feedback_state) && message.run_id && (
<div className="mt-3 space-y-2 border-t border-border/70 pt-3">
{message.feedback_state ? (
<div className="flex items-center gap-2 text-xs text-muted-foreground">
<CheckCircle2 className="h-3.5 w-3.5" />
<span>
{message.feedback_state === 'satisfied'
? pickAppText(locale, '已标记满意', 'Marked satisfied')
{message.feedback_state === 'accept' || message.feedback_state === 'satisfied'
? pickAppText(locale, '已接受', 'Accepted')
: message.feedback_state === 'revise'
? pickAppText(locale, '已请求修改', 'Revision requested')
: pickAppText(locale, '已放弃任务', 'Task abandoned')}
@ -168,11 +155,11 @@ function MessageBubble({
<div className="flex flex-wrap items-center gap-2">
<button
type="button"
onClick={() => setFeedbackMode('satisfied')}
onClick={() => setFeedbackMode('accept')}
className="inline-flex h-8 items-center gap-1 rounded-md border border-border px-3 text-xs text-muted-foreground hover:bg-accent hover:text-foreground"
>
<ThumbsUp className="h-3.5 w-3.5" />
{pickAppText(locale, '满意', 'Satisfied')}
{pickAppText(locale, '接受', 'Accept')}
</button>
<button
type="button"
@ -222,13 +209,6 @@ function MessageBubble({
)}
</>
)}
{message.validation_status && message.validation_status !== 'unknown' && (
<span className="text-xs text-muted-foreground">
{message.validation_status === 'passed'
? pickAppText(locale, '验证通过', 'Validated')
: pickAppText(locale, '验证未通过', 'Validation failed')}
</span>
)}
{message.feedback_error && (
<span className="text-xs text-destructive">{message.feedback_error}</span>
)}
@ -264,6 +244,17 @@ function shouldHideSystemAgentMessage(message: ChatMessage): boolean {
);
}
function hasRenderableMessageContent(message: ChatMessage): boolean {
return hasVisibleChatContent(message);
}
function shouldHideMessage(message: ChatMessage): boolean {
if (shouldHideSystemAgentMessage(message)) {
return true;
}
return !shouldDisplayChatMessage(message);
}
function parseTimelineTime(value?: string | null): number | null {
if (!value) return null;
const parsed = new Date(value).getTime();
@ -342,12 +333,12 @@ export function MessageList({
processArtifacts: ProcessArtifact[];
selectedRunId: string | null;
onSelectRun: (runId: string) => void;
onFeedback: (runId: string, feedbackType: 'satisfied' | 'revise' | 'abandon', comment?: string) => void;
onFeedback: (runId: string, feedbackType: 'accept' | 'revise' | 'abandon', comment?: string) => void;
onRequestRevision: (runId: string) => void;
}) {
const { locale } = useAppI18n();
const visibleMessages = React.useMemo(
() => messages.filter((message) => !shouldHideSystemAgentMessage(message)),
() => messages.filter((message) => !shouldHideMessage(message)),
[messages]
);
const teamGroups = React.useMemo(
@ -385,14 +376,21 @@ export function MessageList({
() => getTaskCardMessageIndexes(visibleMessages),
[visibleMessages]
);
const latestFeedbackRunId = [...visibleMessages]
.reverse()
.find((message) =>
message.role === 'assistant'
&& message.run_id
&& message.task_id
&& message.task_status === 'awaiting_feedback'
)?.run_id;
const latestFeedbackMessageIndex = (() => {
for (let index = visibleMessages.length - 1; index >= 0; index -= 1) {
const message = visibleMessages[index];
if (
message.role === 'assistant'
&& message.run_id
&& message.task_id
&& message.task_status === 'awaiting_acceptance'
&& hasRenderableMessageContent(message)
) {
return index;
}
}
return -1;
})();
return (
<ScrollArea className="h-full px-8" viewportRef={viewportRef}>
@ -411,7 +409,7 @@ export function MessageList({
key={item.key}
message={item.message}
showTaskCard={taskCardMessageIndexes.has(item.messageIndex)}
canSendFeedback={Boolean(latestFeedbackRunId && item.message.run_id === latestFeedbackRunId)}
canSendFeedback={item.messageIndex === latestFeedbackMessageIndex}
onFeedback={onFeedback}
onRequestRevision={onRequestRevision}
/>

View File

@ -271,7 +271,7 @@ export async function sendMessage(
run_id?: string;
task_id?: string | null;
task_status?: string | null;
validation_result?: Record<string, unknown> | null;
evidence_status?: string | null;
}> {
const body: Record<string, unknown> = { message, session_id: sessionId };
if (attachments && attachments.length > 0) {
@ -293,7 +293,7 @@ export async function sendMessage(
finish_reason?: string;
task_id?: string | null;
task_status?: string | null;
validation_result?: Record<string, unknown> | null;
evidence_status?: string | null;
}>('/api/chat', {
method: 'POST',
body: JSON.stringify(body),
@ -305,28 +305,29 @@ export async function sendMessage(
run_id: result.run_id,
task_id: result.task_id,
task_status: result.task_status,
validation_result: result.validation_result,
evidence_status: result.evidence_status,
};
}
export async function submitChatFeedback(params: {
sessionId: string;
runId: string;
feedbackType: 'satisfied' | 'revise' | 'abandon';
feedbackType: 'accept' | 'revise' | 'abandon';
comment?: string;
}): Promise<{
session_id: string;
run_id: string;
task_id: string;
task_status: string;
acceptance_type: string;
feedback_type: string;
}> {
return fetchJSON('/api/chat/feedback', {
return fetchJSON('/api/chat/acceptance', {
method: 'POST',
body: JSON.stringify({
session_id: params.sessionId,
run_id: params.runId,
feedback_type: params.feedbackType,
acceptance_type: params.feedbackType,
comment: params.comment,
}),
});

View File

@ -1,6 +1,6 @@
import { describe, expect, it } from 'vitest';
import { getTaskCardMessageIndexes, mergeServerWithPendingUsers, shouldMergePendingUsers } from '@/lib/chat-messages';
import { getTaskCardMessageIndexes, mergeServerWithPendingUsers, shouldDisplayChatMessage, shouldMergePendingUsers } from '@/lib/chat-messages';
import type { ChatMessage } from '@/types';
describe('chat message helpers', () => {
@ -85,10 +85,17 @@ describe('chat message helpers', () => {
content: 'Final answer.',
run_id: 'run-1',
task_id: 'task-1',
task_status: 'awaiting_feedback',
task_status: 'awaiting_acceptance',
},
];
expect(Array.from(getTaskCardMessageIndexes(messages))).toEqual([2]);
});
it('hides empty assistant records from session history', () => {
expect(shouldDisplayChatMessage({ role: 'assistant', content: '', task_id: 'task-1', run_id: 'run-1' })).toBe(false);
expect(shouldDisplayChatMessage({ role: 'assistant', content: '\u200B\uFEFF', task_id: 'task-1', run_id: 'run-1' })).toBe(false);
expect(shouldDisplayChatMessage({ role: 'assistant', content: 'Final answer.', task_id: 'task-1', run_id: 'run-1' })).toBe(true);
expect(shouldDisplayChatMessage({ role: 'user', content: '' })).toBe(true);
});
});

View File

@ -1,5 +1,28 @@
import type { ChatMessage } from '@/types';
const INVISIBLE_CONTENT_CHARS = /[\u200B-\u200D\uFEFF]/g;
export function normalizedMessageText(content: unknown): string {
if (typeof content === 'string') {
return content.replace(INVISIBLE_CONTENT_CHARS, '').trim();
}
if (content == null) {
return '';
}
return String(content).replace(INVISIBLE_CONTENT_CHARS, '').trim();
}
export function hasVisibleChatContent(msg: ChatMessage): boolean {
if (normalizedMessageText(msg.content)) {
return true;
}
return Boolean(msg.attachments?.length);
}
export function shouldDisplayChatMessage(msg: ChatMessage): boolean {
return msg.role !== 'assistant' || hasVisibleChatContent(msg);
}
export function messageFingerprint(msg: ChatMessage): string {
const attachmentKey = (msg.attachments ?? [])
.map((a) => `${a.file_id ?? ''}:${a.name}:${a.content_type}:${a.size ?? ''}`)

View File

@ -48,8 +48,9 @@ export interface ChatMessage {
run_id?: string;
task_id?: string | null;
task_status?: string | null;
validation_status?: 'passed' | 'failed' | 'unknown';
feedback_state?: 'satisfied' | 'revise' | 'abandon';
evidence_status?: 'recorded';
acceptance_state?: 'accept' | 'revise' | 'abandon';
feedback_state?: 'accept' | 'satisfied' | 'revise' | 'abandon';
feedback_error?: string;
message_type?: string | null;
scheduled_job_id?: string | null;
@ -153,6 +154,7 @@ export interface SystemStatus {
workspace_exists: boolean;
model: string;
max_tokens: number;
max_context_messages?: number;
temperature: number;
max_tool_iterations: number;
providers: ProviderStatus[];
@ -315,6 +317,7 @@ export interface BackendTaskRun {
attempt_index?: number | null;
task_text?: string;
messages: BackendTaskRunMessage[];
evidence_status?: string | null;
validation_result?: Record<string, unknown> | null;
}
@ -972,12 +975,12 @@ export interface ChatAssistantEvent {
run_id?: string;
task_id?: string | null;
task_status?: string | null;
validation_status?: 'passed' | 'failed' | 'unknown';
evidence_status?: 'recorded';
validation_result?: Record<string, unknown> | null;
metadata?: {
task_id?: string | null;
task_status?: string | null;
validation_result?: Record<string, unknown> | null;
evidence_status?: string | null;
[key: string]: unknown;
};
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,224 @@
* {
box-sizing: border-box;
}
:root {
--ink: #111827;
--muted: #4b5563;
--line: #9ca3af;
--soft: #f3f4f6;
--paper: #ffffff;
--accent: #0f766e;
--warn: #92400e;
}
body {
margin: 0;
background: #e5e7eb;
color: var(--ink);
font-family: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
line-height: 1.55;
}
a {
color: var(--accent);
text-decoration: none;
}
a:hover {
text-decoration: underline;
}
.page {
max-width: 1240px;
margin: 0 auto;
background: var(--paper);
min-height: 100vh;
border-left: 1px solid var(--line);
border-right: 1px solid var(--line);
}
.topbar {
border-bottom: 2px solid var(--ink);
padding: 28px 36px 22px;
}
.topbar h1 {
margin: 0 0 8px;
font-size: 30px;
letter-spacing: 0;
}
.topbar p {
margin: 0;
max-width: 920px;
color: var(--muted);
}
.nav {
display: flex;
flex-wrap: wrap;
gap: 8px;
padding: 14px 36px;
border-bottom: 1px solid var(--line);
background: var(--soft);
}
.nav a {
display: inline-block;
border: 1px solid var(--line);
background: #fff;
color: var(--ink);
padding: 6px 10px;
font-size: 13px;
}
.content {
padding: 32px 36px 52px;
}
h2 {
margin: 34px 0 12px;
padding-bottom: 6px;
border-bottom: 1px solid var(--line);
font-size: 22px;
}
h3 {
margin: 24px 0 10px;
font-size: 17px;
}
.lead {
max-width: 980px;
color: var(--muted);
}
.grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(260px, 1fr));
gap: 14px;
}
.card,
.module {
border: 1px solid var(--line);
background: #fff;
padding: 14px;
}
.module h3,
.card h3 {
margin-top: 0;
}
.meta {
font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace;
font-size: 12px;
color: var(--muted);
}
.flow {
display: flex;
flex-wrap: wrap;
align-items: stretch;
gap: 10px;
margin: 16px 0 22px;
padding: 14px;
border: 1px solid var(--line);
background: #f9fafb;
}
.step {
border: 1px solid var(--ink);
background: #fff;
padding: 10px 12px;
min-width: 150px;
flex: 1 1 150px;
}
.step strong {
display: block;
margin-bottom: 4px;
}
.arrow {
align-self: center;
color: var(--muted);
font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace;
}
.subflow {
display: grid;
grid-template-columns: 1fr;
gap: 8px;
margin: 10px 0;
}
.subflow div {
border-left: 4px solid var(--accent);
background: #f9fafb;
padding: 9px 11px;
}
.table {
width: 100%;
border-collapse: collapse;
margin: 14px 0;
}
.table th,
.table td {
border: 1px solid var(--line);
padding: 9px 10px;
text-align: left;
vertical-align: top;
}
.table th {
background: var(--soft);
}
.code,
pre {
border: 1px solid var(--line);
background: #f9fafb;
padding: 12px;
overflow: auto;
font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace;
font-size: 13px;
}
.callout {
border-left: 4px solid var(--warn);
background: #fffbeb;
padding: 12px 14px;
margin: 16px 0;
}
.toc {
columns: 2;
column-gap: 32px;
}
.toc li {
break-inside: avoid;
margin: 6px 0;
}
@media (max-width: 760px) {
.topbar,
.content,
.nav {
padding-left: 18px;
padding-right: 18px;
}
.arrow {
display: none;
}
.toc {
columns: 1;
}
}

View File

@ -0,0 +1,60 @@
<!doctype html>
<html lang="zh-CN">
<head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1"><title>Coordinator 模块蓝图</title><link rel="stylesheet" href="blueprint.css"></head>
<body><main class="page">
<header class="topbar"><h1>Coordinator</h1><p>Coordinator 负责把 planner 输出的 team graph 落成可执行节点。它不自己造模型调用,而是把每个节点包装成 DelegationEnvelope交给 LocalAgentRunner 复用 AgentLoop。</p></header>
<nav class="nav"><a href="index.html">索引</a><a href="tasks.html">Tasks</a><a href="engine.html">Engine</a><a href="prompt-atlas.html">Prompt Atlas</a></nav>
<section class="content">
<h2>大模块流程</h2>
<div class="flow">
<div class="step"><strong>ExecutionGraph</strong>sequence / parallel / dag</div><div class="arrow">-&gt;</div>
<div class="step"><strong>Scheduler</strong>找可运行节点</div><div class="arrow">-&gt;</div>
<div class="step"><strong>Envelope</strong>注入 parent/session/deps/skills</div><div class="arrow">-&gt;</div>
<div class="step"><strong>LocalAgentRunner</strong>启动子 AgentLoop run</div><div class="arrow">-&gt;</div>
<div class="step"><strong>NodeRunResult</strong>输出、证据、错误</div>
</div>
<h2>小模块拆分</h2>
<article class="module">
<h3>models</h3>
<p>定义 <code>AgentDescriptor</code><code>ExecutionNode</code><code>ExecutionGraph</code><code>DelegationEnvelope</code><code>NodeRunResult</code><code>TeamRunResult</code></p>
<div class="subflow">
<div>ExecutionNode 包含 node_id、task、agent、depends_on、constraints、expected_output、pinned skills。</div>
<div>ExecutionGraph.validate() 检查 strategy、节点 id、依赖合法性。</div>
<div>DelegationEnvelope 是真正传给 LocalAgentRunner 的运行包。</div>
</div>
</article>
<article class="module">
<h3>TeamGraphScheduler</h3>
<p>按照 sequence、parallel 或 dag 策略运行节点。依赖节点完成后,其 output 会进入后续节点的 dependency_outputs。</p>
<div class="subflow">
<div>sequence按 nodes 顺序逐个运行。</div>
<div>parallel无依赖节点可并行运行。</div>
<div>dag每轮选出依赖已满足节点运行后更新完成集。</div>
<div>汇总 success、node_results、run_ids、errors。</div>
</div>
</article>
<article class="module">
<h3>LocalAgentRunner</h3>
<p>本地委派执行器。它给每个 node 生成 child session id并把 node task 作为 user input 交给 AgentLoop。子 agent 的 execution_context 和 skill_selection_context 都在这里拼装。</p>
<div class="subflow">
<div>child session id = parent_session:team:node:随机后缀。</div>
<div>execution_context 写入 parent task/run、delegated worker、constraints、expected output、dependency outputs、pinned skills、ephemeral guidance。</div>
<div>skill_selection_context 写入 node task、phase=team_node、skill query、required capabilities、dependency outputs 和选择指令。</div>
<div>AgentLoop 完成后用 EvidenceBuilder 建 node evidence。</div>
</div>
<p>详细字段见 <a href="prompt-atlas.html#delegated-contexts">Prompt Atlas</a></p>
</article>
<article class="module">
<h3>registry / resolver</h3>
<p>AgentRegistry 和 TargetResolver 是后续多 agent/多运行目标扩展点。当前主要路径用 generic_skill_worker 和本地 AgentLoop。</p>
<div class="subflow">
<div>注册 agent descriptor。</div>
<div>解析目标 provider/model 或本地 runner。</div>
<div>为将来远程/专用 agent 留边界。</div>
</div>
</article>
</section></main></body></html>

View File

@ -0,0 +1,63 @@
<!doctype html>
<html lang="zh-CN">
<head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1"><title>Engine 模块蓝图</title><link rel="stylesheet" href="blueprint.css"></head>
<body><main class="page">
<header class="topbar"><h1>Engine</h1><p>Engine 是执行内核:加载 runtime 依赖、捕获 memory、选择 skill/tool、组装 prompt、调用 provider、执行 tool loop并把全过程写回 session。</p></header>
<nav class="nav"><a href="index.html">索引</a><a href="providers.html">Providers</a><a href="tools.html">Tools</a><a href="skills.html">Skills</a><a href="prompt-atlas.html">Prompt Atlas</a></nav>
<section class="content">
<h2>大模块流程</h2>
<div class="flow">
<div class="step"><strong>boot</strong>EngineLoader 加载配置/registry/stores/services</div><div class="arrow">-&gt;</div>
<div class="step"><strong>run start</strong>ensure session + frozen memory</div><div class="arrow">-&gt;</div>
<div class="step"><strong>能力装配</strong>skills + tools</div><div class="arrow">-&gt;</div>
<div class="step"><strong>上下文构建</strong>ContextBuilder messages</div><div class="arrow">-&gt;</div>
<div class="step"><strong>循环</strong>provider chat -> tool calls -> tool results</div><div class="arrow">-&gt;</div>
<div class="step"><strong>收尾</strong>run_completed + receipts</div>
</div>
<h2>小模块拆分</h2>
<article class="module">
<h3>loader</h3>
<p>EngineLoader 是依赖装配根。它创建 config、session manager、memory service、tool registry/executor/assembler、skills loader/assembler、task services、MCP manager 等。</p>
<div class="subflow">
<div>读取 workspace/config。</div>
<div>初始化 stores 与 registries。</div>
<div>注册内置工具与 MCP 工具。</div>
<div>返回 EngineLoadResult 供 AgentLoop 复用。</div>
</div>
</article>
<article class="module">
<h3>AgentLoop</h3>
<p>AgentLoop 是所有 root agent 和 delegated agent 共用的执行 kernel。它支持直接 process也支持 run queue 的 submit_direct 串行消费。</p>
<div class="subflow">
<div>解析 provider/model/max_tokens/temperature/tool iteration。</div>
<div>确保 session记录 run_started、intent decision、skill/tool selection snapshot。</div>
<div>调用 provider若返回 tool_calls则逐个 ToolExecutor 执行并追加 tool message。</div>
<div>到达工具迭代上限时,追加 finalizer system message 再要求模型无工具收尾。</div>
<div>记录 run_completed、usage、skill effect receipts。</div>
</div>
<p>Prompt 相关:主链 context、tool failure guidance、tool limit finalizer 都在 <a href="prompt-atlas.html#agent-loop">Prompt Atlas</a> 展开。</p>
</article>
<article class="module">
<h3>ContextBuilder</h3>
<p>唯一负责 provider messages 形状的组件。它把身份、base system prompt、session、execution context、memory、extra sections 拼成 system prompt再把 skill 正文作为 user-role activation message 插入。</p>
<div class="subflow">
<div>build_system_prompt固定顺序拼 section<code>---</code> 分隔。</div>
<div>build_skill_activation_messages每个 SkillContext 变成一条 user 消息。</div>
<div>build_messagessystem -> skill messages -> history(跳过 system) -> current user input。</div>
<div>add_assistant_message/add_tool_resulttool loop 中追加 provider 兼容消息。</div>
</div>
</article>
<article class="module">
<h3>session</h3>
<p>session store/manager/search 负责把运行过程落成可追溯事件。可见历史和审计事件分离,避免 internal snapshots 进入下一轮 prompt。</p>
<div class="subflow">
<div>append_message 记录 role/event_type/content/tool_calls/context_visible。</div>
<div>get_history 给 ContextBuilder 提供裁剪后的可见上下文。</div>
<div>search/FTS 支持 session_search 工具和 UI 检索。</div>
</div>
</article>
</section></main></body></html>

View File

@ -0,0 +1,60 @@
<!doctype html>
<html lang="zh-CN">
<head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1"><title>Foundation 模块蓝图</title><link rel="stylesheet" href="blueprint.css"></head>
<body><main class="page">
<header class="topbar"><h1>Foundation</h1><p>底座模块提供配置、事件、cron 数据模型和 embedding 检索。它不直接处理业务请求,而是让上层 runtime 有稳定的配置来源、轻量事件通道和通用相似度检索能力。</p></header>
<nav class="nav"><a href="index.html">索引</a><a href="services.html">Services</a><a href="engine.html">Engine</a><a href="prompt-atlas.html">Prompt Atlas</a></nav>
<section class="content">
<h2>大模块流程</h2>
<div class="flow">
<div class="step"><strong>环境/文件配置</strong><code>Config.load()</code></div><div class="arrow">-&gt;</div>
<div class="step"><strong>运行目标解析</strong>main / auxiliary / embedding target</div><div class="arrow">-&gt;</div>
<div class="step"><strong>能力底座</strong>MessageBus / EmbeddingRetriever / Cron models</div><div class="arrow">-&gt;</div>
<div class="step"><strong>上层消费</strong>EngineLoader、SkillAssembler、ToolAssembler、CronService</div>
</div>
<h2>小模块拆分</h2>
<article class="module">
<h3>config</h3>
<p>负责把环境变量和配置文件收敛为 runtime 可用的 provider、embedding、workspace、数据库、MCP、cron 等配置。上层不会直接散读环境变量,而是通过配置对象解析目标。</p>
<div class="subflow">
<div>读取环境/默认值,形成配置对象。</div>
<div>入口传入 model/provider_name 时,覆盖或补充默认 provider target。</div>
<div><code>resolve_provider_target()</code><code>resolve_embedding_target()</code> 输出 provider factory 能消费的字典。</div>
</div>
<p class="meta">关键文件:<code>beaver/foundation/config/*</code></p>
</article>
<article class="module">
<h3>events</h3>
<p>轻量消息总线,给内部组件做事件发布/订阅。当前核心链路更多依赖 session event 记录MessageBus 是后续异步投影、通知、可观察性的扩展点。</p>
<div class="subflow">
<div>组件注册 handler。</div>
<div>业务动作发布消息。</div>
<div>订阅者异步消费,不反向污染核心执行逻辑。</div>
</div>
</article>
<article class="module">
<h3>cron models</h3>
<p>定义 cron job、run record、payload、状态等结构。Services 层用这些模型创建计划任务、记录触发历史,并把结果映射成 notification 或 Task。</p>
<div class="subflow">
<div>CronJob 保存用户配置、调度表达式和 payload。</div>
<div>CronRunRecord 保存单次触发、输出、错误、关联 task/run。</div>
<div>CronService 负责状态迁移AgentService 负责真正执行。</div>
</div>
</article>
<article class="module">
<h3>embedding</h3>
<p>通用 embedding retriever 被 skill 和 tool 装配共用。它输入 query 和候选项,输出 top-k 候选;当远程 embedding 不可用时,有 fallback top-k 机制保证链路不完全中断。</p>
<div class="subflow">
<div>候选项统一渲染为 name/description/text。</div>
<div>调用 embedding runtime 计算相似度。</div>
<div>按 top-k 输出候选,供 LLM 二次选择或直接加入工具集。</div>
</div>
</article>
<h2>核对点</h2>
<p>这个模块主要是“稳定输入”。后续如果修改 provider、embedding 或 cron 行为,优先核对配置解析是否继续保持单一入口,避免在 Service 或 Engine 中散落环境变量读取。</p>
</section></main></body></html>

View File

@ -0,0 +1,87 @@
<!doctype html>
<html lang="zh-CN">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Beaver Backend 多页模块蓝图</title>
<link rel="stylesheet" href="blueprint.css">
</head>
<body>
<main class="page">
<header class="topbar">
<h1>Beaver Backend 多页模块蓝图</h1>
<p>基于 <code>app-instance/backend/beaver</code> 源码拆出的模块级审阅文档。每个大模块独立成页,页内继续拆小模块、执行流程、数据流和 prompt 组装点。</p>
</header>
<nav class="nav">
<a href="index.html">索引</a>
<a href="foundation.html">Foundation</a>
<a href="interfaces.html">Interfaces</a>
<a href="services.html">Services</a>
<a href="engine.html">Engine</a>
<a href="providers.html">Providers</a>
<a href="tasks.html">Tasks</a>
<a href="coordinator.html">Coordinator</a>
<a href="tools.html">Tools</a>
<a href="skills.html">Skills</a>
<a href="memory.html">Memory</a>
<a href="integrations.html">Integrations</a>
<a href="permissions.html">Permissions</a>
<a href="prompt-atlas.html">Prompt Atlas</a>
</nav>
<section class="content">
<h2>项目在做什么</h2>
<p class="lead">这个后端是 Beaver 的本地/服务端 agent runtime。它把 Web、CLI、MCP、定时任务等入口统一成 session run再通过 Intent Agent 判断是否进入内部 Task 模式Task 模式会计划单 agent 或 team graph装配 skills/tools/memory调用 provider执行工具循环收集 evidence并把结果交给用户验收。只有用户接受后的 Task evidence 才进入 skill learning生成候选、草稿、审核和发布链路。</p>
<div class="flow">
<div class="step"><strong>入口</strong>Web / CLI / Gateway / Cron</div>
<div class="arrow">-&gt;</div>
<div class="step"><strong>服务编排</strong>AgentService / CronService / TeamService</div>
<div class="arrow">-&gt;</div>
<div class="step"><strong>运行内核</strong>AgentLoop + ContextBuilder</div>
<div class="arrow">-&gt;</div>
<div class="step"><strong>能力装配</strong>Skills / Tools / Memory</div>
<div class="arrow">-&gt;</div>
<div class="step"><strong>模型与工具</strong>Providers + ToolExecutor</div>
<div class="arrow">-&gt;</div>
<div class="step"><strong>任务验收</strong>Evidence / User Acceptance</div>
</div>
<h2>大模块页</h2>
<div class="grid">
<article class="module"><h3><a href="foundation.html">Foundation</a></h3><p>配置、事件总线、cron 数据结构、embedding 检索基础设施。</p><p class="meta">beaver/foundation</p></article>
<article class="module"><h3><a href="interfaces.html">Interfaces</a></h3><p>Web API、静态 UI、CLI、Gateway、MCP server 和外部通道入口。</p><p class="meta">beaver/interfaces</p></article>
<article class="module"><h3><a href="services.html">Services</a></h3><p>产品级用例层,负责把入口请求变成 agent run、task run、cron run、team run。</p><p class="meta">beaver/services</p></article>
<article class="module"><h3><a href="engine.html">Engine</a></h3><p>运行内核,负责加载依赖、构造上下文、执行 LLM/tool loop、落 session 事件。</p><p class="meta">beaver/engine</p></article>
<article class="module"><h3><a href="providers.html">Providers</a></h3><p>统一 provider 协议和 OpenAI/LiteLLM/Anthropic/Codex/Custom 的适配转换。</p><p class="meta">beaver/engine/providers</p></article>
<article class="module"><h3><a href="tasks.html">Tasks</a></h3><p>内部 Task 模式路由、计划、team 技能解析、事实证据和用户验收。</p><p class="meta">beaver/tasks</p></article>
<article class="module"><h3><a href="coordinator.html">Coordinator</a></h3><p>把 team graph 节点调度到本地子 agent管理依赖输出和节点证据。</p><p class="meta">beaver/coordinator</p></article>
<article class="module"><h3><a href="tools.html">Tools</a></h3><p>工具规格、注册表、按 run 选择工具、执行器、内置工具和 MCP 工具包装。</p><p class="meta">beaver/tools</p></article>
<article class="module"><h3><a href="skills.html">Skills</a></h3><p>Skill 目录、LLM 选择、注入、草稿、审核、发布、学习流水线。</p><p class="meta">beaver/skills</p></article>
<article class="module"><h3><a href="memory.html">Memory</a></h3><p>curated memory、session SQLite、run receipt、skill learning store。</p><p class="meta">beaver/memory</p></article>
<article class="module"><h3><a href="integrations.html">Integrations</a></h3><p>MCP、AuthZ、Outlook 等外部集成边界。</p><p class="meta">beaver/integrations</p></article>
<article class="module"><h3><a href="permissions.html">Permissions</a></h3><p>当前是预留骨架,用来承载后续权限策略。</p><p class="meta">beaver/permissions</p></article>
</div>
<h2>Prompt 组装总图</h2>
<p>所有直接构造 LLM messages 的位置集中在 <a href="prompt-atlas.html">Prompt Atlas</a>。模块页中只保留与该模块有关的 prompt 流程详细字段顺序、system/user message 内容结构、fallback 逻辑都在 Prompt Atlas 展开。</p>
<div class="flow">
<div class="step"><strong>Intent Agent</strong>判断 simple/task/revise/new/close</div>
<div class="arrow">-&gt;</div>
<div class="step"><strong>Planner</strong>判断 single/team生成 graph</div>
<div class="arrow">-&gt;</div>
<div class="step"><strong>Skill Select</strong>选择 published skill 或临时 guidance</div>
<div class="arrow">-&gt;</div>
<div class="step"><strong>ContextBuilder</strong>system prompt + skill activation + history + user</div>
<div class="arrow">-&gt;</div>
<div class="step"><strong>User Acceptance</strong>用户接受、修改或放弃</div>
</div>
<h2>已知核对点</h2>
<div class="callout">
<strong>定时任务路径有一处疑似运行时错误:</strong>
<code>services/agent_service.py</code><code>run_scheduled_task()</code> 末尾写 event payload 时引用了未定义的 <code>job</code><code>run</code>,应改用函数参数 <code>cron_job_id</code><code>scheduled_run_id</code><code>cron_job_name</code>
</div>
</section>
</main>
</body>
</html>

View File

@ -0,0 +1,47 @@
<!doctype html>
<html lang="zh-CN">
<head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1"><title>Integrations 模块蓝图</title><link rel="stylesheet" href="blueprint.css"></head>
<body><main class="page">
<header class="topbar"><h1>Integrations</h1><p>Integrations 是外部系统边界。当前主要包括 MCP client 连接、AuthZ 客户端、Outlook以及 A2A/WhatsApp 的占位接入。</p></header>
<nav class="nav"><a href="index.html">索引</a><a href="tools.html">Tools</a><a href="services.html">Services</a></nav>
<section class="content">
<h2>大模块流程</h2>
<div class="flow">
<div class="step"><strong>配置</strong>外部服务 target/token</div><div class="arrow">-&gt;</div>
<div class="step"><strong>连接</strong>MCP/AuthZ/Outlook client</div><div class="arrow">-&gt;</div>
<div class="step"><strong>适配</strong>转成内部 tool/client API</div><div class="arrow">-&gt;</div>
<div class="step"><strong>消费</strong>ToolRegistry 或 Services 调用</div>
</div>
<h2>小模块拆分</h2>
<article class="module">
<h3>MCP client</h3>
<p>Engine boot 时 MCP manager 连接配置中的 MCP servers把远程 tool 描述包装为 Beaver ToolSpec 并注册到 ToolRegistry。</p>
<div class="subflow">
<div>读取 MCP server 配置。</div>
<div>connect_all 建立连接并拉取工具列表。</div>
<div>MCP wrapper 转换工具 schema 和 callable。</div>
<div>ToolAssembler 后续按任务选择这些工具。</div>
</div>
</article>
<article class="module">
<h3>AuthZ</h3>
<p>权限授权系统的外部客户端边界。当前实际权限模块仍是骨架AuthZ client 是后续接入策略判断的位置。</p>
<div class="subflow">
<div>Service 或 Tool 发起授权检查。</div>
<div>AuthZ client 调外部服务。</div>
<div>返回 allow/deny/context。</div>
</div>
</article>
<article class="module">
<h3>Outlook / A2A / WhatsApp</h3>
<p>这些是面向外部渠道或平台的适配边界。成熟路径应保持“集成层只做协议适配,业务语义进入 Services”。</p>
<div class="subflow">
<div>外部事件或 API 请求进入 client。</div>
<div>规范化成内部消息、工具结果或服务参数。</div>
<div>交给 AgentService/ToolExecutor不在集成层直接拼业务 prompt。</div>
</div>
</article>
</section></main></body></html>

View File

@ -0,0 +1,60 @@
<!doctype html>
<html lang="zh-CN">
<head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1"><title>Interfaces 模块蓝图</title><link rel="stylesheet" href="blueprint.css"></head>
<body><main class="page">
<header class="topbar"><h1>Interfaces</h1><p>接口层把外部输入转成内部服务调用。它包含 Web API/静态文件、CLI、Gateway、渠道适配和 MCP server。核心原则是入口负责协议转换业务决策交给 Services 和 Engine。</p></header>
<nav class="nav"><a href="index.html">索引</a><a href="services.html">Services</a><a href="engine.html">Engine</a><a href="prompt-atlas.html">Prompt Atlas</a></nav>
<section class="content">
<h2>大模块流程</h2>
<div class="flow">
<div class="step"><strong>外部请求</strong>HTTP / CLI / MCP / Channel</div><div class="arrow">-&gt;</div>
<div class="step"><strong>Schema/参数整理</strong>session、model、thinking、routing</div><div class="arrow">-&gt;</div>
<div class="step"><strong>调用 Services</strong>AgentService / CronService / SkillHubService</div><div class="arrow">-&gt;</div>
<div class="step"><strong>返回响应</strong>chat result、task state、skill artifacts、cron history</div>
</div>
<h2>小模块拆分</h2>
<article class="module">
<h3>web</h3>
<p>Web 应用是主要产品入口,集中提供 chat、task acceptance、cron、session、skill draft/review/publish 等 API并服务前端静态资源。</p>
<div class="subflow">
<div>请求进入 FastAPI route解析 body/query/path。</div>
<div>根据功能调用 AgentService、CronService、SkillHubService 或 SessionProcessProjector。</div>
<div>把内部 dataclass/model 转成 JSON payload并在必要时补充 session/run/task 元数据。</div>
</div>
<p class="meta">关键文件:<code>beaver/interfaces/web/app.py</code><code>beaver/interfaces/web/files.py</code></p>
</article>
<article class="module">
<h3>cli</h3>
<p>CLI 入口用于本地命令行运行 Beaver。它复用同一套 AgentService/AgentLoop因此 CLI 不是第二套 runtime只是更薄的协议层。</p>
<div class="subflow">
<div>读取命令行参数。</div>
<div>构造 session/source/model 参数。</div>
<div>调用 agent run打印最终输出。</div>
</div>
</article>
<article class="module">
<h3>gateway 与 channels</h3>
<p>Gateway 和 channels 是多渠道接入边界,把渠道消息抽象为统一的内部消息。当前代码中通道能力较轻,主要服务未来接入不同聊天来源。</p>
<div class="subflow">
<div>渠道事件进入 adapter。</div>
<div>规范化 user_id、channel、chat_id、content。</div>
<div>转给 AgentService最终仍落入同一个 session/run 体系。</div>
</div>
</article>
<article class="module">
<h3>mcp_servers</h3>
<p>对外暴露 MCP server 能力,让 Beaver 的部分能力可以被其他 MCP client 调用;这和 Beaver 作为 MCP client 使用外部工具是两条边界。</p>
<div class="subflow">
<div>MCP client 调用 server tool。</div>
<div>接口层解析 MCP 参数。</div>
<div>转调内部服务或数据访问层。</div>
</div>
</article>
<h2>修改影响点</h2>
<p>入口层新增字段时,要同步核对 Services 层是否需要进入 prompt例如 <code>thinking_enabled</code> 会影响 router、skill assembler、provider chat kwargs<code>execution_context</code> 会进入 ContextBuilder 的 system prompt。</p>
</section></main></body></html>

View File

@ -0,0 +1,58 @@
<!doctype html>
<html lang="zh-CN">
<head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1"><title>Memory 模块蓝图</title><link rel="stylesheet" href="blueprint.css"></head>
<body><main class="page">
<header class="topbar"><h1>Memory</h1><p>Memory 模块提供三类持久上下文curated memory、session/run memory、skill learning memory。主链 prompt 使用 frozen curated snapshot审计和学习使用 session/run evidenceTask evidence 只作为事实记录。</p></header>
<nav class="nav"><a href="index.html">索引</a><a href="engine.html">Engine</a><a href="skills.html">Skills</a><a href="prompt-atlas.html">Prompt Atlas</a></nav>
<section class="content">
<h2>大模块流程</h2>
<div class="flow">
<div class="step"><strong>Curated</strong>MEMORY.md / USER.md</div><div class="arrow">-&gt;</div>
<div class="step"><strong>Snapshot</strong>run 开始前冻结</div><div class="arrow">-&gt;</div>
<div class="step"><strong>Prompt</strong>ContextBuilder 注入 system sections</div><div class="arrow">-&gt;</div>
<div class="step"><strong>Session</strong>运行事件和可见历史</div><div class="arrow">-&gt;</div>
<div class="step"><strong>Learning</strong>run receipts / skill effects</div>
</div>
<h2>小模块拆分</h2>
<article class="module">
<h3>curated</h3>
<p>人工或工具维护的长期记忆文件。MemoryService 在每个 run 开始前捕获 snapshotContextBuilder 只读 snapshot避免运行中变化影响同一轮 prompt。</p>
<div class="subflow">
<div>读取 workspace 下的 memory 文件。</div>
<div>生成 MemorySnapshot。</div>
<div><code>as_prompt_sections()</code> 渲染为 system prompt sections。</div>
</div>
</article>
<article class="module">
<h3>sessions</h3>
<p>SQLite session store 保存 message/event 流、usage、system prompt snapshot、tool call、tool result、task evidence 与 acceptance 事件等,并支持 FTS 检索。</p>
<div class="subflow">
<div>AgentLoop append_message 记录每一步。</div>
<div>context_visible=false 的内部事件不进入普通对话历史。</div>
<div>get_history 给 ContextBuilder 取可见历史。</div>
<div>session_search 工具使用检索能力找历史证据。</div>
</div>
</article>
<article class="module">
<h3>runs</h3>
<p>run memory 记录一次 run 的 receipts、acceptance metadata、skill effects。它是 skill learning 的主要证据来源,但 learning 入口由 task accepted 触发,而不是单个 run 触发。</p>
<div class="subflow">
<div>run_completed 后记录 run receipt。</div>
<div>用户 acceptance accept/revise/abandon 更新 task 状态,并为 skill learning 标记 final accepted run。</div>
<div>skill effects 根据成功率重算版本分数。</div>
</div>
</article>
<article class="module">
<h3>skills learning store</h3>
<p>保存 learning candidates、drafts、review 状态和发布记录。它把一次运行经验变成可审核的长期能力改动。</p>
<div class="subflow">
<div>从 accepted task evidence 建 candidate包含 task 的所有 runs 和 final_accepted_run_id。</div>
<div>candidate 进入 synthesis。</div>
<div>生成 draft 后由人审阅发布。</div>
</div>
</article>
</section></main></body></html>

View File

@ -0,0 +1,30 @@
<!doctype html>
<html lang="zh-CN">
<head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1"><title>Permissions 模块蓝图</title><link rel="stylesheet" href="blueprint.css"></head>
<body><main class="page">
<header class="topbar"><h1>Permissions</h1><p>当前 <code>beaver/permissions</code> 更像预留边界,还没有形成完整策略执行链路。它应该承接工具执行、文件访问、外部集成、用户身份和审计之间的权限判断。</p></header>
<nav class="nav"><a href="index.html">索引</a><a href="tools.html">Tools</a><a href="integrations.html">Integrations</a></nav>
<section class="content">
<h2>预期大模块流程</h2>
<div class="flow">
<div class="step"><strong>请求动作</strong>tool / integration / file / cron</div><div class="arrow">-&gt;</div>
<div class="step"><strong>上下文</strong>user/session/source/workspace</div><div class="arrow">-&gt;</div>
<div class="step"><strong>策略判断</strong>本地 policy 或 AuthZ</div><div class="arrow">-&gt;</div>
<div class="step"><strong>执行/拒绝</strong>ToolResult 或异常</div><div class="arrow">-&gt;</div>
<div class="step"><strong>审计</strong>session event/run evidence</div>
</div>
<h2>当前状态</h2>
<article class="module">
<h3>骨架模块</h3>
<p>现有代码没有在核心路径中统一调用 permissions policy。工具执行主要依赖 ToolExecutor 和各工具自身约束;外部授权能力放在 integrations/authz 边界。</p>
<div class="subflow">
<div>权限模块可作为未来统一策略层。</div>
<div>ToolContext 已携带 user/session/workspace/services可作为策略输入。</div>
<div>session event 已经具备审计载体,可记录 allow/deny。</div>
</div>
</article>
<h2>修改建议核对点</h2>
<p>如果后续补权限,建议从 ToolExecutor 前置检查切入:它是所有 provider tool_call 的统一收口点。第二个切入点是 web route 层的管理类 API例如 skill 发布、cron 修改、文件系统工具。</p>
</section></main></body></html>

View File

@ -0,0 +1,217 @@
<!doctype html>
<html lang="zh-CN">
<head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1"><title>Prompt Atlas</title><link rel="stylesheet" href="blueprint.css"></head>
<body><main class="page">
<header class="topbar"><h1>Prompt Atlas</h1><p>这里集中记录后端所有直接组装 LLM messages 的位置。每段都按“谁调用、输入怎么来、system/user prompt 怎么拼、模型输出怎么解析、失败怎么 fallback”展开。</p></header>
<nav class="nav"><a href="index.html">索引</a><a href="services.html">Services</a><a href="engine.html">Engine</a><a href="tasks.html">Tasks</a><a href="skills.html">Skills</a><a href="coordinator.html">Coordinator</a></nav>
<section class="content">
<h2>总览流程</h2>
<div class="flow">
<div class="step"><strong>Intent Router</strong>是否 Task</div><div class="arrow">-&gt;</div>
<div class="step"><strong>Planner</strong>single/team</div><div class="arrow">-&gt;</div>
<div class="step"><strong>Skill 选择</strong>主 run / team node</div><div class="arrow">-&gt;</div>
<div class="step"><strong>Main Context</strong>身份/会话/记忆/技能/历史</div><div class="arrow">-&gt;</div>
<div class="step"><strong>Evidence</strong>事实记录</div><div class="arrow">-&gt;</div>
<div class="step"><strong>Learning</strong>accepted task evidence -> 草稿合成</div>
</div>
<h2 id="intent-router">1. MainAgentRouter</h2>
<p class="meta">文件:<code>beaver/tasks/router.py</code>;调用方:<code>AgentService._process_with_main_agent()</code></p>
<table class="table">
<tr><th>阶段</th><th>内容</th></tr>
<tr><td>作用</td><td>只决定当前用户消息进入 simple chat 还是 internal Task mode不回答用户。</td></tr>
<tr><td>system message</td><td>声明自己是 Beaver 的 Intent Agent唯一职责是路由只返回紧凑 JSON不要回答用户不要解释。</td></tr>
<tr><td>user prompt 组成</td><td>固定说明“Decide how to route”可选 intent-agent-router skill guidanceActions 列表Critical policy返回 JSON keysActive task JSON最近 8 条 user/assistant 对话Current user message。</td></tr>
<tr><td>输出</td><td><code>{ action, reason, short_title }</code>。action 映射为 simple_chat、continue_task、revise_task、new_task、close_task、abandon_task。</td></tr>
<tr><td>fallback</td><td>provider 不可用或两次超时失败:有 active task 则 continue_task否则 simple_chat。</td></tr>
</table>
<div class="flow">
<div class="step">message + active_task + recent_messages</div><div class="arrow">-&gt;</div>
<div class="step">拼 router prompt</div><div class="arrow">-&gt;</div>
<div class="step">aux/main provider chat</div><div class="arrow">-&gt;</div>
<div class="step">parse JSON object</div><div class="arrow">-&gt;</div>
<div class="step">MainAgentDecision</div>
</div>
<pre>user prompt =
"Decide how to route..."
+ optional "Intent Agent skill guidance:\n{intent_skill}"
+ Actions 列表
+ Critical policy 列表
+ "Return JSON only with keys: action, reason, short_title."
+ "Active task:\n{json(active_task_payload)}"
+ "Recent conversation:\n{json(recent[-8:])}"
+ "Current user message:\n{message}"</pre>
<h2>2. TaskExecutionPlanner</h2>
<p class="meta">文件:<code>beaver/tasks/planner.py</code>;调用方:<code>AgentService._run_task_mode()</code></p>
<table class="table">
<tr><th>阶段</th><th>内容</th></tr>
<tr><td>作用</td><td>决定本次 Task attempt 直接单 agent 执行,还是先创建 sub-agent team。</td></tr>
<tr><td>system message</td><td>选择 internal Beaver Task attempt 的执行模式;返回紧凑 JSON。</td></tr>
<tr><td>user prompt 组成</td><td>执行模式说明team 使用条件JSON schemaTask goalCurrent user requestAttempt index必要的 task history若有</td></tr>
<tr><td>输出</td><td><code>mode</code> 为 single 或 teamteam 时还需 <code>strategy</code><code>nodes</code><code>final_synthesis_instruction</code></td></tr>
<tr><td>fallback</td><td>provider 不可用、JSON 解析失败、graph validate 失败、skill resolver 失败都会回退 single。</td></tr>
</table>
<pre>planner user prompt =
"Decide execution mode for this internal Task attempt."
+ "Use mode=team only when ..."
+ JSON schema(mode/reason/strategy/nodes/final_synthesis_instruction)
+ "Task goal:\n{task.goal}"
+ "Current user request:\n{user_message}"
+ "Attempt index: {attempt_index}"
+ optional "Relevant task history:\n{task_history}"</pre>
<h2>3. TaskSkillResolver</h2>
<p class="meta">文件:<code>beaver/tasks/skill_resolver.py</code>调用方planner 解析 team graph 后</p>
<table class="table">
<tr><th>阶段</th><th>内容</th></tr>
<tr><td>query 组装</td><td><code>skill_query</code><code>node.task</code>、required capabilities、task.goal、user_message 用换行拼接。</td></tr>
<tr><td>召回</td><td>SkillsLoader 构建候选EmbeddingRetriever top-8。</td></tr>
<tr><td>system message</td><td>为一个 generic sub-agent node 选择 published Beaver skills只返回 JSON array不能编造名字不匹配返回 []。</td></tr>
<tr><td>user prompt</td><td>Node skill queryCandidate skills 列表;要求返回 JSON例如 <code>["skill-a"]</code><code>[]</code></td></tr>
<tr><td>输出过滤</td><td>只保留候选集中真实存在的 skill name并保持模型输出顺序。</td></tr>
<tr><td>fallback</td><td>LLM 失败或返回空时,进入 EphemeralGuidanceSynthesizer。</td></tr>
</table>
<pre>node skill query =
join_non_empty(skill_query, node.task, " ".join(required_capabilities), task.goal, user_message)</pre>
<h2>4. EphemeralGuidanceSynthesizer</h2>
<p class="meta">文件:<code>beaver/skills/learning/missing_skill.py</code>调用方TaskSkillResolver</p>
<table class="table">
<tr><th>阶段</th><th>内容</th></tr>
<tr><td>作用</td><td>team node 没有 published skill 可用时,生成当前委派子任务专用的一次性 guidance。</td></tr>
<tr><td>system message</td><td>创建 concise Beaver ephemeral guidance只返回 JSON keysguidance_name、description、content、tags。</td></tr>
<tr><td>user prompt</td><td>说明“Create procedural guidance”Task goalCurrent user requestNode idNode taskSkill queryRequired capabilities要求 content 是临时 sub-agent 可执行指导不包含实现声明、review metadata 或 publish metadata。</td></tr>
<tr><td>输出</td><td>生成 <code>SkillContext(name="ephemeral:{guidance_name}", version="ephemeral:{guidance_id}")</code></td></tr>
<tr><td>fallback</td><td>失败时本地生成基础 payloadObjective、Capabilities to apply、Output。</td></tr>
</table>
<h2 id="skill-assembler">5. SkillAssembler</h2>
<p class="meta">文件:<code>beaver/skills/assembler/task_assembler.py</code>;调用方:<code>AgentLoop._process_direct_impl()</code></p>
<table class="table">
<tr><th>阶段</th><th>内容</th></tr>
<tr><td>query 来源</td><td>默认是 taskTask 模式会传入 AgentService 组装的 skill_selection_contextteam node 会传 LocalAgentRunner 组装的 delegated skill_selection_context。</td></tr>
<tr><td>召回</td><td>SkillsLoader candidates -> EmbeddingRetriever top-k。</td></tr>
<tr><td>shortlist</td><td>候选数超过 max_detailed_candidates 时LLM 先基于摘要返回最多 N 个 skill names。</td></tr>
<tr><td>final</td><td>把 shortlist 对应候选补充 skill 正文,再让 LLM 返回最终要激活的 skill names。</td></tr>
<tr><td>system message</td><td>选择 Beaver skills输入 task description 和 candidate skill information只返回 JSON array不能编造名字无匹配返回 [];包含 selection stage 和返回数量指令。</td></tr>
<tr><td>user prompt</td><td>Task descriptionCandidate skills返回 JSON 示例。</td></tr>
<tr><td>注入</td><td>加载 skill 正文并 strip frontmatter生成 SkillContext稍后由 ContextBuilder 转成 activation message。</td></tr>
</table>
<pre>skill selector messages =
system: "You select Beaver skills for a single run..." + "Selection stage: {shortlist|final}..."
user: "Task description:\n{task_description}\n\nCandidate skills:\n{candidate_summary}\n\nReturn only JSON..."</pre>
<h2 id="agent-service-contexts">6. AgentService 组装的上下文片段</h2>
<p class="meta">文件:<code>beaver/services/agent_service.py</code></p>
<article class="module">
<h3>Task skill selection context</h3>
<p>传给 SkillAssembler 的 task_description。它不是 provider chat 的最终 prompt但会影响选中哪些 skills。</p>
<div class="subflow">
<div>Task goal / description / current user request。</div>
<div>Execution phase、task status、attempt index。</div>
<div>constraints、prior skills、必要的 task history。</div>
<div>planner reason、strategy、nodes、team summaries、final synthesis instruction。</div>
<div>明确要求优先选择 existing published skills。</div>
</div>
</article>
<article class="module">
<h3>Team execution context</h3>
<p>如果 team 先跑,主 agent 的 execution_context 会包含 planner reason、team strategy、team success、node results、rendered team evidence、final synthesis instruction以及避免重复失败工具调用的提醒。</p>
</article>
<article class="module">
<h3>Scheduled execution context</h3>
<p>cron task/notification 会把 Cron Job ID、Name、Scheduled Run ID 和“不向用户确认,直接执行/生成通知”的约束放进 execution_context。</p>
</article>
<h2 id="delegated-contexts">7. LocalAgentRunner delegated contexts</h2>
<p class="meta">文件:<code>beaver/coordinator/local.py</code></p>
<table class="table">
<tr><th>上下文</th><th>拼装内容</th></tr>
<tr><td>execution_context</td><td>Parent task ID、Parent run ID、delegated worker 说明、agent.system_prompt、constraints、expected output、dependency outputs、pinned inherited skills、ephemeral pinned guidance。</td></tr>
<tr><td>skill_selection_context</td><td>Parent task ID、Node task、Execution phase=team_node、Agent role、Skill query、Required capabilities、constraints、expected output、pinned skills、dependency outputs 前 800 字、Skill selection instruction。</td></tr>
</table>
<div class="flow">
<div class="step">DelegationEnvelope</div><div class="arrow">-&gt;</div>
<div class="step">child session id</div><div class="arrow">-&gt;</div>
<div class="step">execution_context</div><div class="arrow">+</div>
<div class="step">skill_selection_context</div><div class="arrow">-&gt;</div>
<div class="step">AgentLoop.process_direct</div>
</div>
<h2 id="agent-loop">8. AgentLoop 主运行 prompt</h2>
<p class="meta">文件:<code>beaver/engine/loop.py</code><code>beaver/engine/context/builder.py</code></p>
<table class="table">
<tr><th>阶段</th><th>内容</th></tr>
<tr><td>base identity</td><td>固定 Beaver 身份海狸Beaver博维资讯系统有限公司研发的 AI 助手。</td></tr>
<tr><td>base system prompt</td><td>AgentProfile.system_prompt。</td></tr>
<tr><td>session section</td><td>Session ID、Source、Model、User ID、Channel、Chat ID、Parent Session ID。</td></tr>
<tr><td>execution context</td><td>Service 或 LocalAgentRunner 传入,标题为 <code># Execution Context</code></td></tr>
<tr><td>memory snapshot</td><td>MemorySnapshot.as_prompt_sections() 的 frozen sections。</td></tr>
<tr><td>extra sections</td><td>当前固定加入 Tool Failure Guidance。</td></tr>
<tr><td>skill activation</td><td>每个 SkillContext 变成一条 user message位于 system prompt 之后、历史消息之前。</td></tr>
<tr><td>history</td><td>session_manager.get_history()ContextBuilder 跳过 role=system 的历史。</td></tr>
<tr><td>current user</td><td>本轮 task/message 作为最后一条 user message。</td></tr>
</table>
<pre>messages =
[
{"role": "system", "content": build_system_prompt(...)},
...build_skill_activation_messages(activated_skills),
...visible_history_without_system,
{"role": "user", "content": current_user_input}
]</pre>
<h3>Tool Failure Guidance</h3>
<p>AgentLoop 把一段额外 section 放进 system prompt如果同类工具反复失败不要继续换 query 重试;使用已有材料,明确不确定性,给出部分已确认结果。</p>
<h3>Tool iteration finalizer</h3>
<p>到达最大工具迭代数后AgentLoop 追加一条新的 system message要求工具预算已耗尽、不要再调用工具、基于现有对话和工具结果给出最终答案并明确不确定性。该 finalizer 只用于最后一次无工具收尾调用。</p>
<h2>9. Tool loop 消息追加</h2>
<p class="meta">文件:<code>beaver/engine/context/builder.py</code><code>beaver/engine/loop.py</code></p>
<div class="flow">
<div class="step">provider response</div><div class="arrow">-&gt;</div>
<div class="step">assistant message(content + tool_calls)</div><div class="arrow">-&gt;</div>
<div class="step">ToolExecutor</div><div class="arrow">-&gt;</div>
<div class="step">tool message(tool_call_id/name/content)</div><div class="arrow">-&gt;</div>
<div class="step">下一轮 provider chat</div>
</div>
<p>assistant 消息始终显式包含 <code>content</code>即使工具调用时为空tool_calls 会被规范化为 OpenAI-compatible 结构arguments 非字符串时转 JSON 字符串。</p>
<h2>10. Task Evidence</h2>
<p class="meta">文件:<code>beaver/tasks/evidence.py</code></p>
<table class="table">
<tr><th>阶段</th><th>内容</th></tr>
<tr><td>作用</td><td>记录 Task run 的事实证据,不判断、不打分、不 gate也不生成 revision prompt。</td></tr>
<tr><td>输入</td><td>Task goal、attempt index、main run、team runs、tool results、team node results、assistant final output。</td></tr>
<tr><td>输出</td><td>TaskEvidencePacket / rendered evidence text供审计、过程展示和 skill learning 使用。</td></tr>
<tr><td>边界</td><td>Task 是否完成只由 User Acceptance 决定accepted task evidence 包含 task 的所有 runs并标记 final_accepted_run_id。</td></tr>
</table>
<h2>11. SkillDraftSynthesizer</h2>
<p class="meta">文件:<code>beaver/skills/learning/synthesizer.py</code></p>
<table class="table">
<tr><th>阶段</th><th>内容</th></tr>
<tr><td>作用</td><td>从 accepted task evidence 合成 skill 草稿,支持 new/revise/merge。</td></tr>
<tr><td>system message</td><td>从 accepted task evidence 合成 Beaver skill drafts只返回 JSON keysfrontmatter、content、change_reason。</td></tr>
<tr><td>user prompt</td><td>ActionCandidate kindReasonRelated skillsCalled tool namesRun-selected tool namesTask summariesSession excerptsfinal_accepted_run_idfrontmatter 必须包含 description 和 tools 数组。</td></tr>
<tr><td>输出 normalize</td><td>frontmatter.tools 为空时用 evidence_packet.metadata.tool_names 补齐。</td></tr>
<tr><td>fallback</td><td>JSON 不合格时本地生成基础 frontmatter 和 Evidence 内容。</td></tr>
</table>
<h2 id="provider-conversion">12. Provider conversion</h2>
<p class="meta">文件:<code>beaver/engine/providers/*</code></p>
<p>Provider 层不负责创造业务 prompt但会改变 messages 的传输形式。Anthropic/Codex 类 provider 会把第一条 system message 拆出来传给 SDK 的 <code>system</code><code>instructions</code> 字段,剩余 messages 作为对话OpenAI/LiteLLM 风格 provider 基本保留 messages + tools。</p>
<div class="subflow">
<div>ContextBuilder 输出统一 messages。</div>
<div>Provider adapter 根据目标 SDK 转换 system、tools、tool calls、usage、reasoning。</div>
<div>响应统一回 LLMResponseAgentLoop 不感知 SDK 差异。</div>
</div>
<h2>13. 无 LLM prompt 但影响 prompt 的组件</h2>
<table class="table">
<tr><th>组件</th><th>影响</th></tr>
<tr><td>ToolAssembler</td><td>不调用 LLM但决定哪些 tool schema 暴露给主模型。</td></tr>
<tr><td>MemoryService</td><td>不调用 LLM但 frozen snapshot 会进入 system prompt。</td></tr>
<tr><td>SessionManager</td><td>不调用 LLM但可见历史决定 ContextBuilder 的 history。</td></tr>
<tr><td>SkillDraftSafetyChecker</td><td>不调用 LLM是 deterministic safety gate影响草稿是否能进入后续审核。</td></tr>
</table>
</section></main></body></html>

View File

@ -0,0 +1,68 @@
<!doctype html>
<html lang="zh-CN">
<head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1"><title>Providers 模块蓝图</title><link rel="stylesheet" href="blueprint.css"></head>
<body><main class="page">
<header class="topbar"><h1>Providers</h1><p>Providers 把不同模型服务统一成 Beaver 内部的 <code>chat(messages, tools, model, ...)</code> 协议,屏蔽 OpenAI/LiteLLM/Anthropic/Codex/Custom 的差异。</p></header>
<nav class="nav"><a href="index.html">索引</a><a href="engine.html">Engine</a><a href="prompt-atlas.html">Prompt Atlas</a></nav>
<section class="content">
<h2>大模块流程</h2>
<div class="flow">
<div class="step"><strong>配置目标</strong>provider_name/model/api_base/key</div><div class="arrow">-&gt;</div>
<div class="step"><strong>ProviderBundle</strong>main/auxiliary/embedding</div><div class="arrow">-&gt;</div>
<div class="step"><strong>Provider.chat</strong>统一入参</div><div class="arrow">-&gt;</div>
<div class="step"><strong>协议转换</strong>system/tool/reasoning/usage</div><div class="arrow">-&gt;</div>
<div class="step"><strong>LLMResponse</strong>content/tool_calls/finish_reason</div>
</div>
<h2>小模块拆分</h2>
<article class="module">
<h3>base models</h3>
<p><code>LLMProvider</code> 定义 chat 协议;<code>LLMResponse</code> 统一模型输出;<code>ToolCallRequest</code> 统一工具调用 id/name/arguments。</p>
<div class="subflow">
<div>AgentLoop 只消费 LLMResponse不关心具体 SDK。</div>
<div>Tool call arguments 被规范化为字符串或 dict 后交给 ToolExecutor。</div>
<div>usage 被映射回 session usage。</div>
</div>
</article>
<article class="module">
<h3>factory / runtime / registry</h3>
<p>根据配置创建 provider runtime并组织 main provider、auxiliary provider 和 embedding runtime。Router、Planner、Validator 和 Skill 选择通常使用 auxiliary provider主回答使用 main provider。</p>
<div class="subflow">
<div>resolve provider target。</div>
<div>创建 runtime带 api key/base/header/timeout/model。</div>
<div>创建 ProviderBundle供一次 Task attempt 复用。</div>
</div>
</article>
<article class="module">
<h3>LiteLLM / OpenAI-like</h3>
<p>主要走 OpenAI 风格 messages + tools。它最接近 Beaver 内部协议,转换成本最低。</p>
<div class="subflow">
<div>接收 ContextBuilder messages。</div>
<div>附加 tools、max_tokens、temperature、thinking_enabled。</div>
<div>解析 content、tool_calls、usage、finish_reason。</div>
</div>
</article>
<article class="module">
<h3>Anthropic 与 Codex</h3>
<p>这两类 provider 对 system prompt 有特殊要求,会把第一条 system message 提取到独立字段,剩余 messages 作为对话输入。Codex 还会使用 prompt cache key。</p>
<div class="subflow">
<div>扫描 messages提取第一条 system。</div>
<div>其余 system 或非 system message 按 provider 能接受的格式转换。</div>
<div>调用 provider SDK并把响应转回 LLMResponse。</div>
</div>
<p>Prompt 相关provider 本身不创造业务 prompt但会改变 system message 的传输位置,详见 <a href="prompt-atlas.html#provider-conversion">Prompt Atlas</a></p>
</article>
<article class="module">
<h3>fallback chain</h3>
<p>当主 provider 失败时fallback chain 可以尝试备用目标。上层仍看到统一的 LLMResponse。</p>
<div class="subflow">
<div>主 provider 调用失败。</div>
<div>记录异常并尝试 fallback target。</div>
<div>成功则返回 fallback response全部失败则 finish_reason/error。</div>
</div>
</article>
</section></main></body></html>

View File

@ -0,0 +1,71 @@
<!doctype html>
<html lang="zh-CN">
<head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1"><title>Services 模块蓝图</title><link rel="stylesheet" href="blueprint.css"></head>
<body><main class="page">
<header class="topbar"><h1>Services</h1><p>Services 是产品用例编排层。它不实现底层 tool loop但决定什么时候进入 Task、什么时候跑通知、什么时候创建 team、什么时候记录用户验收和学习候选。</p></header>
<nav class="nav"><a href="index.html">索引</a><a href="tasks.html">Tasks</a><a href="engine.html">Engine</a><a href="coordinator.html">Coordinator</a><a href="prompt-atlas.html">Prompt Atlas</a></nav>
<section class="content">
<h2>大模块流程</h2>
<div class="flow">
<div class="step"><strong>接口调用</strong>chat / cron / acceptance / skill admin</div><div class="arrow">-&gt;</div>
<div class="step"><strong>AgentService</strong>路由 simple vs Task</div><div class="arrow">-&gt;</div>
<div class="step"><strong>Task 编排</strong>planner、team、main run、evidence</div><div class="arrow">-&gt;</div>
<div class="step"><strong>状态落盘</strong>session events、task store、run memory、skill learning</div>
</div>
<h2>小模块拆分</h2>
<article class="module">
<h3>AgentService</h3>
<p>核心服务。普通消息先由 MainAgentRouter 分类simple chat 关闭 skill/tool 装配直接跑Task 模式创建或复用 TaskRecord计划 single/team运行主 agent记录 evidence并等待用户验收。</p>
<div class="subflow">
<div>输入 message + session kwargs创建 provider bundle。</div>
<div>调用 Intent Agent得到 simple_chat / continue_task / revise_task / new_task / close_task / abandon_task。</div>
<div>Task 模式中调用 TaskExecutionPlanner必要时先跑 TeamGraphScheduler。</div>
<div>主 AgentLoop 执行后构建 evidence packetevidence 只记录事实,不判断结果质量。</div>
<div>run 完成后进入 awaiting_acceptance用户 accept/revise/abandon 决定后续状态。</div>
</div>
<p>Prompt 相关AgentService 组装 Task skill selection context、team execution context、scheduled execution context详细见 <a href="prompt-atlas.html#agent-service-contexts">Prompt Atlas</a></p>
</article>
<article class="module">
<h3>CronService</h3>
<p>管理定时任务的创建、启停、触发、run history。CronService 负责调度状态;真正生成内容时调用 AgentService 的 scheduled task 或 notification 路径。</p>
<div class="subflow">
<div>保存 CronJob 配置和 payload。</div>
<div>到点创建 CronRunRecord。</div>
<div>调用 AgentService 执行 notification 或 Task。</div>
<div>回写 output/error/task_id/run_id。</div>
</div>
<div class="callout">核对点:<code>run_scheduled_task()</code> 当前末尾引用未定义 <code>job</code>/<code>run</code>,应改为函数入参。</div>
</article>
<article class="module">
<h3>TeamService</h3>
<p>提供 team graph 的产品级封装。底层执行在 coordinatorService 层关心的是把执行结果转成 API 友好的结构。</p>
<div class="subflow">
<div>接收 graph / nodes / strategy。</div>
<div>创建 TeamGraphScheduler 和 LocalAgentRunner。</div>
<div>返回 node results、run ids、success/error。</div>
</div>
</article>
<article class="module">
<h3>MemoryService 与 SessionProcessProjector</h3>
<p>MemoryService 给每个 run 捕获 frozen curated memory snapshotSessionProcessProjector 把 session 事件投影成 UI 所需的过程视图。</p>
<div class="subflow">
<div>run 开始前捕获 memory snapshot避免运行中写入导致 prompt 漂移。</div>
<div>session event 记录完整过程。</div>
<div>projector 从事件流还原步骤、工具、evidence 和用户验收状态。</div>
</div>
</article>
<article class="module">
<h3>SkillHubService</h3>
<p>面向产品的 skill 草稿、审核、发布、学习候选接口。底层由 skills/drafts/reviews/publisher/learning 实现。</p>
<div class="subflow">
<div>列出 learning candidates 或 drafts。</div>
<div>触发 draft synthesis / review。</div>
<div>人工确认后发布为 published skill。</div>
</div>
</article>
</section></main></body></html>

View File

@ -0,0 +1,82 @@
<!doctype html>
<html lang="zh-CN">
<head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1"><title>Skills 模块蓝图</title><link rel="stylesheet" href="blueprint.css"></head>
<body><main class="page">
<header class="topbar"><h1>Skills</h1><p>Skills 模块负责加载、选择、注入、学习和发布 Beaver 技能。技能不是普通文档摘要,而是会被 ContextBuilder 作为显式 user 消息注入当前 run 的操作指导。</p></header>
<nav class="nav"><a href="index.html">索引</a><a href="engine.html">Engine</a><a href="tasks.html">Tasks</a><a href="prompt-atlas.html">Prompt Atlas</a></nav>
<section class="content">
<h2>大模块流程</h2>
<div class="flow">
<div class="step"><strong>目录加载</strong>published/builtin/drafts/specs</div><div class="arrow">-&gt;</div>
<div class="step"><strong>候选召回</strong>embedding retrieve</div><div class="arrow">-&gt;</div>
<div class="step"><strong>LLM 选择</strong>shortlist/final 或 node skill selection</div><div class="arrow">-&gt;</div>
<div class="step"><strong>注入</strong>SkillContext -> activation message</div><div class="arrow">-&gt;</div>
<div class="step"><strong>学习</strong>accepted task evidence -> candidate -> draft -> review -> publish</div>
</div>
<h2>小模块拆分</h2>
<article class="module">
<h3>catalog loader</h3>
<p>扫描和加载 published skills、builtin skills并构建供 embedding/LLM 选择的候选摘要。它也提供 load_published_skill、get_skill_record、get_skill_tool_hints。</p>
<div class="subflow">
<div>读取 SKILL.md 和 frontmatter。</div>
<div>记录 name、description、version、content_hash、tool hints。</div>
<div>输出 selection candidates。</div>
</div>
</article>
<article class="module">
<h3>SkillAssembler</h3>
<p>主 agent 每个 run 的 skill 选择器。先用 embedding 召回候选;候选太多时先 LLM shortlist再把完整 skill 正文截断后交给 LLM final selection。</p>
<div class="subflow">
<div>query = task_description 或 AgentService 提供的 skill_selection_context。</div>
<div>embedding top-k 召回 selection candidates。</div>
<div>shortlist 阶段只看摘要,返回最多 N 个 skill names。</div>
<div>final 阶段看候选正文,返回最终激活 skill names。</div>
<div>加载正文strip frontmatter生成 SkillContext。</div>
</div>
<p>详细 prompt 见 <a href="prompt-atlas.html#skill-assembler">Prompt Atlas</a></p>
</article>
<article class="module">
<h3>activation injection</h3>
<p>ContextBuilder 不把 skill 正文塞进 system prompt而是每个 skill 生成一条 user-role activation message。这样 skills 的正文和主 system prompt 分层清晰。</p>
<pre>[SYSTEM: The "{skill.name}" skill (version {skill.version}) is active for this run.
Follow its instructions as active guidance unless the user overrides them.]
{skill.content}</pre>
</article>
<article class="module">
<h3>drafts / reviews / publisher</h3>
<p>草稿、审核和发布构成 skill 的人工治理链路。Learning 只生成候选和草稿,不直接把新能力静默注入 published 目录。</p>
<div class="subflow">
<div>DraftService 保存草稿内容和 metadata。</div>
<div>ReviewService 记录审核意见、状态、决策。</div>
<div>SkillPublisher 把通过审核的草稿写成正式 skill spec。</div>
</div>
</article>
<article class="module">
<h3>learning</h3>
<p>从用户接受后的 Task evidence 中提取学习候选,合成新 skill 或修订草稿,再经过 safety/eval/review。Safety/Eval 只评估 skill draft不评估 task result。</p>
<div class="subflow">
<div>Task accepted 后触发 learning一个 task 的所有 runs 都进入证据包,并标记 final_accepted_run_id。</div>
<div>LearningService 构建 learning candidates。</div>
<div>EvidencePacket 收集 task summary、session excerpts、tool names、user acceptance event 和 revision history。</div>
<div>SkillDraftSynthesizer 用 LLM 生成 frontmatter/content/change_reason JSON。</div>
<div>SafetyChecker 做确定性扫描Eval 评估后进入 draft/review。</div>
</div>
</article>
<article class="module">
<h3>missing skill guidance</h3>
<p>当 team node 没有匹配 published skill 时TaskSkillResolver 生成一次性 ephemeral guidance。它以 SkillContext 形式进入 delegated agent但不会发布为正式 skill。</p>
<div class="subflow">
<div>输入 task goal、user request、node id/task、skill query、required capabilities。</div>
<div>LLM 返回 guidance_name、description、content、tags。</div>
<div>若失败,用 fallback payload 生成基础指导。</div>
<div>SkillContext name = <code>ephemeral:{guidance_name}</code>version = <code>ephemeral:{guidance_id}</code></div>
</div>
</article>
</section></main></body></html>

View File

@ -0,0 +1,74 @@
<!doctype html>
<html lang="zh-CN">
<head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1"><title>Tasks 模块蓝图</title><link rel="stylesheet" href="blueprint.css"></head>
<body><main class="page">
<header class="topbar"><h1>Tasks</h1><p>Tasks 模块把“需要执行和跟踪”的用户请求从普通聊天中拆出来,形成有状态的内部任务。它包含路由、计划、技能解析、事实证据、用户验收和 task store。</p></header>
<nav class="nav"><a href="index.html">索引</a><a href="services.html">Services</a><a href="coordinator.html">Coordinator</a><a href="prompt-atlas.html">Prompt Atlas</a></nav>
<section class="content">
<h2>大模块流程</h2>
<div class="flow">
<div class="step"><strong>Router</strong>simple/task/revise/new/close</div><div class="arrow">-&gt;</div>
<div class="step"><strong>TaskRecord</strong>创建或复用 open task</div><div class="arrow">-&gt;</div>
<div class="step"><strong>Planner</strong>single 或 team graph</div><div class="arrow">-&gt;</div>
<div class="step"><strong>Run</strong>team + main AgentLoop</div><div class="arrow">-&gt;</div>
<div class="step"><strong>Evidence</strong>事实记录,不判断</div><div class="arrow">-&gt;</div>
<div class="step"><strong>User Acceptance</strong>accept/revise/abandon</div>
</div>
<h2>小模块拆分</h2>
<article class="module">
<h3>models / store / service</h3>
<p><code>TaskRecord</code> 保存 task_id、session_id、description、goal、status、run_ids、acceptance history、metadata。TaskService 提供 create/start/complete/acceptance/close/abandon 等状态操作。</p>
<div class="subflow">
<div>router 决定进入 Task 后创建或复用 active task。</div>
<div>每次 attempt 调用 start_run 记录 user_message 和 attempt_index。</div>
<div>run 完成后构建 TaskEvidencePacket状态进入 awaiting_acceptance。</div>
<div>用户验收后进入 closedaccepted、needs_revision 或 abandoned。</div>
</div>
</article>
<article class="module">
<h3>MainAgentRouter</h3>
<p>独立 LLM 调用,只负责路由,不回答用户。输入 active task 摘要、最近对话、当前 message 和 intent-agent skill guidance输出紧凑 JSON。</p>
<div class="subflow">
<div>provider 不可用时 fallback有 active task 则 continue_task否则 simple_chat。</div>
<div>system message 约束“只路由、不解释、只 JSON”。</div>
<div>user prompt 包含 actions、critical policy、active task、recent conversation、current message。</div>
<div>解析 action映射成 MainAgentDecision。</div>
</div>
<p>详细 prompt 字段见 <a href="prompt-atlas.html#intent-router">Prompt Atlas</a></p>
</article>
<article class="module">
<h3>TaskExecutionPlanner</h3>
<p>决定当前 attempt 是单 agent 还是先跑小 team。它要求模型返回 JSON schemamode、reason、strategy、nodes、final_synthesis_instruction。</p>
<div class="subflow">
<div>选择 auxiliary/main provider。</div>
<div>输入 task goal、当前用户请求、attempt index 和必要的 task history。</div>
<div>如果 team解析 nodes 为 ExecutionGraph 并 validate。</div>
<div>调用 TaskSkillResolver 为 team node 绑定 published skills 或 ephemeral guidance。</div>
</div>
</article>
<article class="module">
<h3>TaskSkillResolver</h3>
<p>给 planner 生成的泛型 team node 解析能力来源:先 embedding 召回 published skills再让 LLM 从候选中选;没有匹配则生成一次性 ephemeral guidance。</p>
<div class="subflow">
<div>构造 queryskill_query、node.task、required_capabilities、task.goal、user_message。</div>
<div>embedding top-8 召回候选 skill。</div>
<div>LLM 返回候选 skill name JSON array。</div>
<div>若为空EphemeralGuidanceSynthesizer 生成临时 SkillContext 注入 node。</div>
</div>
</article>
<article class="module">
<h3>EvidenceBuilder / User Acceptance</h3>
<p>EvidenceBuilder 从 session 中提取 run evidence、tool summaries、team node results。Evidence 只记录事实,不判断、不打分、不 gate也不生成 revision promptTask 是否完成只由用户验收决定。</p>
<div class="subflow">
<div>主 run 完成后收集 assistant output、tool result、team result。</div>
<div>渲染 evidence packet 文本,用于审计、展示和后续 skill learning。</div>
<div>用户 accept 后关闭 task并把整个 task 的所有 runs 标记为 Accepted Task Evidence。</div>
<div>用户 revise 时只把 revision message 和必要 task history 带入下一轮执行,不把整包 evidence 自动塞进 prompt。</div>
</div>
</article>
</section></main></body></html>

View File

@ -0,0 +1,79 @@
<!doctype html>
<html lang="zh-CN">
<head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1"><title>Tools 模块蓝图</title><link rel="stylesheet" href="blueprint.css"></head>
<body><main class="page">
<header class="topbar"><h1>Tools</h1><p>Tools 模块定义 Beaver 可调用工具的规格、注册、选择和执行。它把内置工具与 MCP 工具统一成 provider tool schema再把模型返回的 tool_calls 安全地执行成 tool result。</p></header>
<nav class="nav"><a href="index.html">索引</a><a href="engine.html">Engine</a><a href="skills.html">Skills</a><a href="prompt-atlas.html">Prompt Atlas</a></nav>
<section class="content">
<h2>大模块流程</h2>
<div class="flow">
<div class="step"><strong>注册</strong>内置工具 + MCP wrapper</div><div class="arrow">-&gt;</div>
<div class="step"><strong>选择</strong>always + skill hints + embedding top-k</div><div class="arrow">-&gt;</div>
<div class="step"><strong>导出 schema</strong>provider function/tool schema</div><div class="arrow">-&gt;</div>
<div class="step"><strong>执行</strong>ToolExecutor 执行 tool_call</div><div class="arrow">-&gt;</div>
<div class="step"><strong>回填</strong>tool message + session event</div>
</div>
<h2>小模块拆分</h2>
<article class="module">
<h3>base</h3>
<p><code>ToolSpec</code> 描述 name、description、input schema、always flag<code>ToolContext</code> 携带 workspace、session、user、services<code>ToolResult</code> 统一 success/content/error。</p>
<div class="subflow">
<div>工具实现暴露成 BaseTool 或 ObjectBackedTool。</div>
<div>ToolSpec 同时可转 MCP descriptor、provider schema、embedding candidate。</div>
<div>执行结果必须是 ToolResultAgentLoop 再转成 provider tool message。</div>
</div>
</article>
<article class="module">
<h3>registry</h3>
<p>ToolRegistry 是工具目录。EngineLoader 会注册内置工具MCP manager connect_all 后也把外部 MCP 工具注册进来。</p>
<div class="subflow">
<div>register tool spec 与 callable。</div>
<div>list_specs/list_always_specs/get_specs 供 ToolAssembler 使用。</div>
<div>export_selected_provider_schemas 输出本轮模型可见工具。</div>
</div>
</article>
<article class="module">
<h3>ToolAssembler</h3>
<p>按 run 选择工具,不做 LLM prompt。选择顺序固定always tools、配置里的默认 always names、activated skills 的 tool hints、embedding top-k。</p>
<div class="subflow">
<div>先加入 registry.list_always_specs。</div>
<div>再加入默认 <code>memory</code><code>session_search</code></div>
<div>收集 activated skill 的 <code>tool_hints</code> 或 loader 中的 skill tool hints。</div>
<div>对剩余工具做 embedding retrieve补充 top-k。</div>
</div>
</article>
<article class="module">
<h3>runtime executor</h3>
<p>ToolExecutor 接收 provider 返回的 ToolCallRequest查 registry解析 arguments传入 ToolContext捕获异常并返回 ToolResult。</p>
<div class="subflow">
<div>模型返回 tool_calls。</div>
<div>AgentLoop 序列化并记录 assistant tool call。</div>
<div>ToolExecutor 执行对应工具。</div>
<div>AgentLoop 把 result.content 追加为 role=tool message。</div>
</div>
</article>
<article class="module">
<h3>builtins</h3>
<p>内置工具包括 terminal、filesystem、web、memory、session_search、skill_view、skills_admin、cron、utility、echo。它们覆盖本地开发、记忆、会话检索、skill 管理和定时任务。</p>
<div class="subflow">
<div>工具从 ToolContext 取 workspace/services。</div>
<div>执行具体 IO 或服务动作。</div>
<div>返回文本结果,进入下一轮模型上下文。</div>
</div>
</article>
<article class="module">
<h3>mcp wrapper</h3>
<p>把外部 MCP tool 适配成 Beaver ToolSpec/callable使外部工具和内置工具对 AgentLoop 呈现同一种接口。</p>
<div class="subflow">
<div>MCP manager connect_all。</div>
<div>为每个外部 tool 建 wrapper。</div>
<div>注册进 ToolRegistry后续由 ToolAssembler 选择。</div>
</div>
</article>
</section></main></body></html>

View File

@ -0,0 +1,954 @@
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Beaver Backend Module Blueprint</title>
<style>
:root {
--c-bg: #f8fafc;
--c-canvas: #ffffff;
--c-border: #cbd5e1;
--c-border-strong: #94a3b8;
--c-text-main: #0f172a;
--c-text-sub: #64748b;
--c-text-soft: #475569;
--c-accent: #111827;
--c-risk: #b91c1c;
--c-ok: #166534;
--font-ui: Inter, Helvetica, Arial, sans-serif;
--font-mono: "JetBrains Mono", Consolas, "Liberation Mono", monospace;
}
* {
box-sizing: border-box;
}
body {
margin: 0;
min-height: 100vh;
background: var(--c-bg);
color: var(--c-text-main);
font-family: var(--font-ui);
line-height: 1.55;
}
a {
color: inherit;
text-decoration: underline;
text-decoration-thickness: 1px;
text-underline-offset: 2px;
}
.page {
width: min(1500px, 100%);
margin: 0 auto;
padding: 32px;
}
.diagram-canvas {
background: var(--c-canvas);
border: 1px solid var(--c-border);
padding: 32px;
}
.diagram-header {
display: grid;
grid-template-columns: minmax(0, 1fr) auto;
gap: 24px;
align-items: start;
border-bottom: 1px solid var(--c-border);
padding-bottom: 18px;
margin-bottom: 24px;
}
.diagram-title {
margin: 0 0 6px;
font-size: 24px;
font-weight: 700;
letter-spacing: 0;
}
.diagram-subtitle,
.meta-line,
.kicker {
font-family: var(--font-mono);
font-size: 11px;
color: var(--c-text-sub);
text-transform: uppercase;
letter-spacing: 0.05em;
}
.meta-box {
border: 1px solid var(--c-border);
padding: 10px 12px;
min-width: 280px;
font-family: var(--font-mono);
font-size: 12px;
color: var(--c-text-soft);
}
.summary {
display: grid;
grid-template-columns: 1.15fr 0.85fr;
gap: 16px;
margin-bottom: 18px;
}
.panel,
.module,
.flow-box,
.note,
.table-wrap {
border: 1px solid var(--c-border);
background: var(--c-canvas);
}
.panel {
padding: 16px;
}
.panel h2,
.section h2 {
margin: 0 0 10px;
font-size: 17px;
letter-spacing: 0;
}
.panel p,
.module p,
.note p {
margin: 0;
color: var(--c-text-soft);
font-size: 13px;
}
.badge-row {
display: flex;
flex-wrap: wrap;
gap: 6px;
margin-top: 12px;
}
.badge {
display: inline-block;
border: 1px solid var(--c-border);
padding: 2px 6px;
font-family: var(--font-mono);
font-size: 10px;
color: var(--c-text-sub);
white-space: nowrap;
}
.badge-solid {
border-color: var(--c-accent);
background: var(--c-accent);
color: var(--c-canvas);
}
.section {
margin-top: 24px;
border-top: 1px solid var(--c-border);
padding-top: 24px;
}
.section-head {
display: grid;
grid-template-columns: minmax(0, 1fr) auto;
gap: 16px;
align-items: end;
margin-bottom: 14px;
}
.section-head p {
margin: 4px 0 0;
max-width: 980px;
color: var(--c-text-sub);
font-size: 13px;
}
.module-grid {
display: grid;
grid-template-columns: repeat(3, minmax(0, 1fr));
gap: 12px;
}
.module {
padding: 14px;
display: flex;
flex-direction: column;
gap: 10px;
min-height: 260px;
}
.module h3 {
margin: 0;
font-size: 15px;
letter-spacing: 0;
}
.module-label {
font-family: var(--font-mono);
font-size: 11px;
color: var(--c-text-sub);
}
.file-list,
.bullets,
.checks {
margin: 0;
padding-left: 18px;
color: var(--c-text-soft);
font-size: 13px;
}
.file-list {
font-family: var(--font-mono);
font-size: 11px;
line-height: 1.55;
}
.flow {
display: grid;
grid-template-columns: repeat(7, minmax(0, 1fr));
gap: 10px;
align-items: stretch;
}
.flow-box {
min-height: 118px;
padding: 12px;
position: relative;
}
.flow-box::after {
content: "";
position: absolute;
top: 50%;
right: -10px;
width: 10px;
border-top: 1px solid var(--c-border-strong);
}
.flow-box:last-child::after {
display: none;
}
.flow-box h3 {
margin: 0 0 8px;
font-size: 13px;
}
.flow-box p {
margin: 0;
color: var(--c-text-soft);
font-size: 12px;
}
.matrix {
display: grid;
grid-template-columns: 280px minmax(0, 1fr);
border-top: 1px solid var(--c-border);
border-left: 1px solid var(--c-border);
}
.matrix > div {
border-right: 1px solid var(--c-border);
border-bottom: 1px solid var(--c-border);
padding: 10px 12px;
font-size: 13px;
}
.matrix .key {
font-family: var(--font-mono);
color: var(--c-text-main);
background: #f8fafc;
}
.table-wrap {
overflow-x: auto;
}
table {
width: 100%;
min-width: 980px;
border-collapse: collapse;
font-size: 13px;
}
th,
td {
border-bottom: 1px solid var(--c-border);
border-right: 1px solid var(--c-border);
padding: 10px 12px;
vertical-align: top;
text-align: left;
}
th {
font-family: var(--font-mono);
font-size: 11px;
text-transform: uppercase;
letter-spacing: 0.05em;
color: var(--c-text-sub);
background: #f8fafc;
}
tr:last-child td {
border-bottom: 0;
}
th:last-child,
td:last-child {
border-right: 0;
}
code,
.mono {
font-family: var(--font-mono);
font-size: 0.92em;
color: var(--c-text-main);
}
.risk {
border-color: #fecaca;
}
.risk h3,
.risk .module-label {
color: var(--c-risk);
}
.ok {
color: var(--c-ok);
font-weight: 600;
}
.cols-2 {
display: grid;
grid-template-columns: repeat(2, minmax(0, 1fr));
gap: 12px;
}
.cols-4 {
display: grid;
grid-template-columns: repeat(4, minmax(0, 1fr));
gap: 12px;
}
.note {
padding: 14px;
}
@media (max-width: 1180px) {
.module-grid,
.summary,
.cols-4 {
grid-template-columns: 1fr 1fr;
}
.flow {
grid-template-columns: 1fr 1fr;
}
.flow-box::after {
display: none;
}
}
@media (max-width: 760px) {
.page {
padding: 12px;
}
.diagram-canvas {
padding: 16px;
}
.diagram-header,
.summary,
.section-head,
.module-grid,
.cols-2,
.cols-4,
.flow,
.matrix {
grid-template-columns: 1fr;
}
.meta-box {
min-width: 0;
}
}
</style>
</head>
<body>
<main class="page">
<article class="diagram-canvas">
<header class="diagram-header">
<div>
<div class="diagram-title">Beaver Backend Module Blueprint</div>
<div class="diagram-subtitle">Flat Engineering Blueprint / app-instance/backend / 2026-05-25</div>
</div>
<div class="meta-box">
SOURCE: <span class="mono">app-instance/backend</span><br>
STYLE: <span class="mono">projcet_review/blueprinter.md</span><br>
SCOPE: <span class="mono">backend code + tests + architecture docs</span><br>
MULTI-PAGE: <a href="backend_blueprint/index.html">backend_blueprint/index.html</a>
</div>
</header>
<section class="summary">
<div class="panel">
<h2>项目是干嘛的</h2>
<p>
Beaver 后端是一个面向用户任务的 agent runtime。它接收来自 Web、WebSocket、CLI、Gateway、Cron 或 MCP 的请求,
用 Main Agent 判断这是不是一个需要跟踪的 Task简单问题直接回复复杂任务进入 Task mode。Task mode 会规划单 agent
或 team 执行,运行统一的 <code>AgentLoop</code>,选择技能和工具,调用模型,记录事实证据,并等待用户接受、修改或放弃。
只有用户接受后的 Task evidence 才会沉淀为可学习的 skill 候选。
</p>
<div class="badge-row">
<span class="badge-solid badge">UNIFIED ENGINE</span>
<span class="badge">TASK MODE</span>
<span class="badge">TEAM COORDINATOR</span>
<span class="badge">SKILL LEARNING</span>
<span class="badge">MCP TOOLS</span>
<span class="badge">SCHEDULED TASKS</span>
</div>
</div>
<div class="panel">
<h2>最关键的架构判断</h2>
<p>
主 agent、team node、sub-agent 都不各自实现一套 runtime它们最后都回到同一个 <code>beaver.engine.AgentLoop</code>
因此后续修改时要优先确认:入口层是不是薄的,服务层是不是只编排,真正 tool loop / prompt / provider / session 逻辑是不是仍在 engine 内收口。
</p>
<div class="badge-row">
<span class="badge">interfaces -> services</span>
<span class="badge">services -> engine</span>
<span class="badge">engine -> skills/tools/memory</span>
</div>
</div>
</section>
<section class="section">
<div class="section-head">
<div>
<h2>主执行流</h2>
<p>这是后端最重要的一条路径,后续逐模块修改文档应该先对齐这条链路。</p>
</div>
<div class="kicker">CHAT / TASK / ACCEPTANCE / LEARNING</div>
</div>
<div class="flow">
<div class="flow-box">
<h3>1. 入口接收</h3>
<p><code>/api/chat</code>、WebSocket、CLI、Gateway 或 Cron 把用户消息转给 <code>AgentService</code></p>
</div>
<div class="flow-box">
<h3>2. 意图路由</h3>
<p><code>MainAgentRouter</code> 结合 active task 和近期会话,判断 simple / new_task / continue / revise / close / abandon。</p>
</div>
<div class="flow-box">
<h3>3. Task 建模</h3>
<p><code>TaskService</code> 写入 <code>tasks.json</code><code>events.jsonl</code>,维护 open/running/awaiting_acceptance/closed 状态。</p>
</div>
<div class="flow-box">
<h3>4. 执行规划</h3>
<p><code>TaskExecutionPlanner</code> 让辅助模型选择 single 或 team并为 team 生成 sequence / parallel / DAG 节点。</p>
</div>
<div class="flow-box">
<h3>5. 统一运行</h3>
<p><code>AgentLoop</code> 冻结 memory选 skill选 tool构建 prompt调用 provider执行 tool loop。</p>
</div>
<div class="flow-box">
<h3>6. 事实证据</h3>
<p><code>EvidenceBuilder</code> 汇总 run/team/tool 证据。Evidence 只记录事实,不判断、不打分、不 gate。</p>
</div>
<div class="flow-box">
<h3>7. 验收学习</h3>
<p>用户接受 Task 后生成 accepted task evidence 和 learning candidatesworker 可生成 draft但不会自动 approve/publish。</p>
</div>
</div>
</section>
<section class="section">
<div class="section-head">
<div>
<h2>模块总览</h2>
<p>每个模块下面都写明责任、逻辑、具体怎么做,以及关键文件。</p>
</div>
<div class="kicker">MODULE RESPONSIBILITY MAP</div>
</div>
<div class="module-grid">
<section class="module">
<div class="module-label">foundation</div>
<h3>底层配置、事件和通用模型</h3>
<p>负责加载实例级配置、定义 provider/MCP/AuthZ/backend identity schema、提供 message bus 和 cron 数据模型。</p>
<ul class="bullets">
<li>配置来源优先级:<code>BEAVER_CONFIG_PATH</code><code>BEAVER_HOME/config.json</code>、workspace 下 <code>.beaver/config.json</code></li>
<li><code>BeaverConfig.resolve_provider_target()</code> 从默认模型、显式 provider 和已配置凭据推导运行目标。</li>
<li><code>MessageBus</code> 用 async queue 承接 gateway inbound/outbound。</li>
<li><code>CronSchedule/CronJob/CronRunRecord</code> 是定时任务持久化模型。</li>
</ul>
<ul class="file-list">
<li>beaver/foundation/config/schema.py</li>
<li>beaver/foundation/config/loader.py</li>
<li>beaver/foundation/events/message_bus.py</li>
<li>beaver/foundation/models/cron.py</li>
<li>beaver/foundation/embedding.py</li>
</ul>
</section>
<section class="module">
<div class="module-label">interfaces</div>
<h3>薄入口层</h3>
<p>负责把 HTTP、WebSocket、CLI、Gateway、MCP server 的输入转换成服务层调用,不应保存核心执行逻辑。</p>
<ul class="bullets">
<li>Web app lifespan 启动 <code>AgentService</code> running mode、<code>CronService</code> 和可选 skill learning worker。</li>
<li><code>/api/chat</code><code>/ws/{session_id}</code> 都委托给 <code>_run_web_direct()</code> / <code>AgentService</code></li>
<li>文件 API 分两类:聊天附件 <code>workspace/files/&lt;id&gt;</code> 与 workspace 浏览/上传/预览。</li>
<li>MCP interface 暴露 memory/tools serverGateway 用 <code>MessageBus</code> 桥接渠道。</li>
</ul>
<ul class="file-list">
<li>beaver/interfaces/web/app.py</li>
<li>beaver/interfaces/web/files.py</li>
<li>beaver/interfaces/cli/main.py</li>
<li>beaver/interfaces/gateway/main.py</li>
<li>beaver/interfaces/mcp/*.py</li>
<li>beaver/interfaces/channels/*.py</li>
</ul>
</section>
<section class="module">
<div class="module-label">services</div>
<h3>应用服务编排层</h3>
<p>负责把入口请求转成系统内部流程agent 运行、task mode、cron、team、memory、skill hub、process projection。</p>
<ul class="bullets">
<li><code>AgentService</code> 是主入口,区分 direct mode 和 running mode。</li>
<li><code>_process_with_main_agent()</code> 先做意图分类,再决定是否进入 Task。</li>
<li><code>_run_task_mode()</code> 管理 task planning、team 执行、主 agent synthesis、evidence 记录和用户验收状态。</li>
<li><code>CronService</code> 负责持久化定时任务、计算下一次运行、记录 history。</li>
<li><code>SessionProcessProjector</code> 把隐藏 task/team 事件投影给前端过程视图。</li>
</ul>
<ul class="file-list">
<li>beaver/services/agent_service.py</li>
<li>beaver/services/team_service.py</li>
<li>beaver/services/cron_service.py</li>
<li>beaver/services/process_service.py</li>
<li>beaver/services/skillhub_service.py</li>
</ul>
</section>
<section class="module">
<div class="module-label">engine</div>
<h3>统一 agent 运行内核</h3>
<p>这是主 agent 和 delegated agent 共用的核心。它装配 runtime构建上下文选择技能和工具驱动 provider/tool loop并记录所有运行事件。</p>
<ul class="bullets">
<li><code>EngineLoader</code> 装配 session、memory、run store、skill store、tool registry、MCP manager、task/evidence 服务。</li>
<li><code>AgentLoop.process_direct()</code> 是单次运行主链running mode 下只能通过 queue <code>submit_direct()</code></li>
<li>每个 run 独立捕获 frozen memory snapshot避免 parallel team runs 共享快照互相污染。</li>
<li>运行时写入 <code>run_started</code>、skill activation、tool selection、LLM request、tool result、run completed/failed 等事件。</li>
</ul>
<ul class="file-list">
<li>beaver/engine/loader.py</li>
<li>beaver/engine/loop.py</li>
<li>beaver/engine/context/builder.py</li>
<li>beaver/engine/providers/*.py</li>
<li>beaver/engine/session/*.py</li>
</ul>
</section>
<section class="module">
<div class="module-label">providers</div>
<h3>模型 provider 抽象与选路</h3>
<p>把不同模型网关统一成 <code>LLMProvider.chat()</code>,返回统一 <code>LLMResponse</code><code>ToolCallRequest</code></p>
<ul class="bullets">
<li><code>ProviderRuntime</code> 描述解析后的 provider、model、api mode、凭据、headers、routing。</li>
<li><code>ProviderBundle</code> 同时包含 main、fallback、auxiliary、embedding runtime。</li>
<li><code>FallbackProviderChain</code> 在主 provider 返回 error 或抛异常时按单次调用切到 fallback。</li>
<li>实现包含 LiteLLM、Anthropic、OpenAI Codex API、OpenAI-compatible custom。</li>
</ul>
<ul class="file-list">
<li>beaver/engine/providers/base.py</li>
<li>beaver/engine/providers/runtime.py</li>
<li>beaver/engine/providers/factory.py</li>
<li>beaver/engine/providers/registry.py</li>
<li>beaver/engine/providers/litellm.py</li>
<li>beaver/engine/providers/anthropic.py</li>
<li>beaver/engine/providers/codex.py</li>
</ul>
</section>
<section class="module">
<div class="module-label">tasks</div>
<h3>内部 Task、事实证据和用户验收</h3>
<p>负责把“需要执行和跟踪”的用户请求变成可持久化、可重试、可验收的 Task。</p>
<ul class="bullets">
<li><code>MainAgentRouter</code> 使用 LLM JSON 决策区分 simple/task/continue/revise/close/abandon。</li>
<li><code>TaskExecutionPlanner</code> 让辅助模型选择 single 或 team并限制 team 节点最多 6 个。</li>
<li><code>TaskSkillResolver</code> 为 team node 匹配 published skill没有匹配时生成 one-run ephemeral guidance。</li>
<li><code>EvidenceBuilder</code> 只记录事实证据Task 是否完成只由用户验收决定。</li>
</ul>
<ul class="file-list">
<li>beaver/tasks/models.py</li>
<li>beaver/tasks/service.py</li>
<li>beaver/tasks/router.py</li>
<li>beaver/tasks/planner.py</li>
<li>beaver/tasks/skill_resolver.py</li>
<li>beaver/tasks/evidence.py</li>
<li>beaver/tasks/store.py</li>
</ul>
</section>
<section class="module">
<div class="module-label">coordinator</div>
<h3>多 agent / team 编排</h3>
<p>负责把 team execution graph 转成多个 delegated runs。v1 真正实现的是 sequence、parallel、DAG其它 strategy 目前保留但未实现。</p>
<ul class="bullets">
<li><code>ExecutionGraph.validate()</code> 校验节点唯一、依赖存在、无环,以及 strategy 是否已实现。</li>
<li><code>TeamGraphScheduler</code> 按策略运行节点,失败依赖会把后续节点标记 blocked。</li>
<li><code>LocalAgentRunner</code> 为每个节点生成 child session并仍调用同一个 <code>AgentLoop</code></li>
<li>Agent registry 和 LocalSubagentStore 支持管理 specialist/subagent但当前 Task 主链主要走 generic skill worker。</li>
</ul>
<ul class="file-list">
<li>beaver/coordinator/models.py</li>
<li>beaver/coordinator/execution/scheduler.py</li>
<li>beaver/coordinator/local.py</li>
<li>beaver/coordinator/registry/*.py</li>
<li>beaver/coordinator/subagents.py</li>
</ul>
</section>
<section class="module">
<div class="module-label">tools</div>
<h3>工具契约、选择和执行</h3>
<p>负责把内建工具和 MCP 工具统一暴露为 provider function schema并在 tool loop 里执行模型返回的调用。</p>
<ul class="bullets">
<li><code>ToolSpec</code> 是工具元数据和 schema 的事实来源,可导出 MCP descriptor 和 provider schema。</li>
<li><code>ToolAssembler</code> 按 always tools、skill tool hints、embedding retrieval 选择本轮工具。</li>
<li><code>ToolExecutor</code> 兼容 <code>ToolCallRequest</code> 和 OpenAI 风格 dict解析参数并调用 registry。</li>
<li>内建工具覆盖 memory、session search、filesystem、web fetch/search、terminal/process/code、cron、skill admin、delegation utility。</li>
</ul>
<ul class="file-list">
<li>beaver/tools/base.py</li>
<li>beaver/tools/registry/tool_registry.py</li>
<li>beaver/tools/runtime/executor.py</li>
<li>beaver/tools/assembler/task_assembler.py</li>
<li>beaver/tools/builtins/*.py</li>
<li>beaver/tools/mcp/wrapper.py</li>
</ul>
</section>
<section class="module">
<div class="module-label">skills</div>
<h3>技能目录、选择、生命周期和学习</h3>
<p>负责发现、选择、注入、版本化、审核、发布和自动学习 Beaver skills。</p>
<ul class="bullets">
<li><code>SkillsLoader</code> 读取 workspace published skills、plugin/extra dirs、builtin skills解析 frontmatter 和工具提示。</li>
<li><code>SkillAssembler</code> 用 embedding 召回候选,再用 LLM 做 shortlist/final 选择,并返回 <code>SkillContext</code></li>
<li><code>SkillSpecStore</code> 管理 <code>skill.json</code><code>current.json</code>、versions、drafts、reviews。</li>
<li><code>SkillLearningPipelineService</code> 协调 candidate -> draft -> safety/eval -> review -> approve -> publish。</li>
</ul>
<ul class="file-list">
<li>beaver/skills/catalog/*.py</li>
<li>beaver/skills/assembler/*.py</li>
<li>beaver/skills/specs/*.py</li>
<li>beaver/skills/drafts/service.py</li>
<li>beaver/skills/reviews/service.py</li>
<li>beaver/skills/publisher/service.py</li>
<li>beaver/skills/learning/*.py</li>
</ul>
</section>
<section class="module">
<div class="module-label">memory</div>
<h3>会话、长期记忆、运行记忆和学习状态</h3>
<p>负责保存对话事件、长期记忆、run receipt、skill effect、skill learning candidates 和安全/eval 报告。</p>
<ul class="bullets">
<li>会话存 SQLite包含 <code>sessions</code><code>messages</code> 和 FTS5 <code>messages_fts</code></li>
<li>长期记忆只有 <code>MEMORY.md</code><code>USER.md</code> 两个桶,写入前扫描 prompt injection / secret exfiltration 风险。</li>
<li>run memory 用 JSONL 保存 <code>RunRecord</code><code>SkillEffectRecord</code></li>
<li>skill learning store 维护候选状态、performance snapshot、safety report、eval report。</li>
</ul>
<ul class="file-list">
<li>beaver/engine/session/*.py</li>
<li>beaver/memory/curated/*.py</li>
<li>beaver/memory/runs/*.py</li>
<li>beaver/memory/skills/*.py</li>
<li>beaver/memory/search/transcript_store.py</li>
</ul>
</section>
<section class="module">
<div class="module-label">integrations</div>
<h3>外部系统与协议集成</h3>
<p>负责连接 AuthZ、MCP 和 Outlook。WhatsApp、A2A、providers 目录当前主要是占位。</p>
<ul class="bullets">
<li><code>MCPConnectionManager</code> 支持 stdio 和 streamable HTTP MCP server并把远端 tools 注册成 <code>mcp_{server}_{tool}</code></li>
<li>远端 MCP 可用 AuthZ backend token 模式,通过 backend identity 换取 bearer token。</li>
<li>Outlook integration 通过 AuthZ 或直接凭据连接,维护 workspace meta提供 status/overview/messages/events/detail。</li>
<li><code>AuthzClient</code> 负责用户/backend 注册、权限查询、token 签发。</li>
</ul>
<ul class="file-list">
<li>beaver/integrations/mcp/connection.py</li>
<li>beaver/integrations/authz/client.py</li>
<li>beaver/integrations/outlook/__init__.py</li>
<li>beaver/integrations/a2a/__init__.py</li>
<li>beaver/integrations/whatsapp/__init__.py</li>
</ul>
</section>
<section class="module risk">
<div class="module-label">permissions</div>
<h3>权限与治理层</h3>
<p>目录已经存在但当前基本是空骨架。实际权限约束主要散落在具体工具、workspace path 校验、memory safety 和 skill draft safety 中。</p>
<ul class="bullets">
<li><code>permissions/guards</code><code>policies</code><code>profiles</code> 只有 docstring。</li>
<li><code>ToolsConfig.restrict_to_workspace</code> 已在配置 schema 里存在,但需要逐工具核对是否真正执行。</li>
<li>后续如果要做能力治理应把工具执行、MCP sensitive 标记、provider/terminal/file 操作统一接入这里。</li>
</ul>
<ul class="file-list">
<li>beaver/permissions/__init__.py</li>
<li>beaver/permissions/guards/__init__.py</li>
<li>beaver/permissions/policies/__init__.py</li>
<li>beaver/permissions/profiles/__init__.py</li>
</ul>
</section>
</div>
</section>
<section class="section">
<div class="section-head">
<div>
<h2>核心数据落点</h2>
<p>这些文件/数据库是运行后最重要的事实来源。后续核对行为是否符合预期时,优先看这里。</p>
</div>
<div class="kicker">PERSISTENCE MAP</div>
</div>
<div class="table-wrap">
<table>
<thead>
<tr>
<th>数据</th>
<th>位置</th>
<th>写入者</th>
<th>用途</th>
</tr>
</thead>
<tbody>
<tr>
<td>Session / transcript event stream</td>
<td><code>&lt;workspace&gt;/sessions/state.db</code></td>
<td><code>SessionManager</code> / <code>AgentLoop</code></td>
<td>保存可见对话、隐藏 system snapshots、tool calls/results、run lifecycle、usage、FTS 搜索。</td>
</tr>
<tr>
<td>Task records</td>
<td><code>&lt;workspace&gt;/tasks/tasks.json</code></td>
<td><code>TaskService</code></td>
<td>保存 task goal/status/run_ids/skill_names/acceptance history。</td>
</tr>
<tr>
<td>Task events</td>
<td><code>&lt;workspace&gt;/tasks/events.jsonl</code></td>
<td><code>TaskService</code></td>
<td>保存 created/run_started/run_completed/evidence_recorded/accepted/revised/closed/abandoned。</td>
</tr>
<tr>
<td>Curated memory</td>
<td><code>&lt;workspace&gt;/memory/curated/MEMORY.md</code>, <code>USER.md</code></td>
<td><code>MemoryTool</code> / <code>MemoryStore</code></td>
<td>长期注入 prompt 的稳定事实;每个 run 冻结快照。</td>
</tr>
<tr>
<td>Run receipts / skill effects</td>
<td><code>&lt;workspace&gt;/memory/runs/*.jsonl</code></td>
<td><code>AgentLoop</code> / <code>AgentService</code> 用户验收入口</td>
<td>skill learning 的原始执行证据、用户验收事件和 final accepted run 标记。</td>
</tr>
<tr>
<td>Skills lifecycle</td>
<td><code>&lt;workspace&gt;/skills/&lt;name&gt;/...</code></td>
<td><code>SkillSpecStore</code> / draft/review/publisher services</td>
<td>published versions、drafts、reviews、current version、supporting files。</td>
</tr>
<tr>
<td>Skill learning state</td>
<td><code>&lt;workspace&gt;/memory/skills/...</code></td>
<td><code>SkillLearningStore</code></td>
<td>候选、performance snapshot、safety report、eval report。</td>
</tr>
<tr>
<td>Cron jobs and runs</td>
<td><code>&lt;workspace&gt;/cron/jobs.json</code></td>
<td><code>CronService</code></td>
<td>定时任务配置、next_run、history、notification/task linkage。</td>
</tr>
<tr>
<td>Agent registry / subagents</td>
<td><code>&lt;workspace&gt;/agents/registry.json</code>, <code>*_agent/AGENTS.json</code></td>
<td><code>AgentRegistry</code> / <code>LocalSubagentStore</code></td>
<td>管理 builtin/workspace/learned agents 和本地 sub-agent workspace。</td>
</tr>
</tbody>
</table>
</div>
</section>
<section class="section">
<div class="section-head">
<div>
<h2>关键流程拆解</h2>
<p>这些流程是后续逐模块修改时最容易产生偏差的地方。</p>
</div>
<div class="kicker">CONTROL FLOWS</div>
</div>
<div class="cols-2">
<div class="panel">
<h2>Simple chat</h2>
<ul class="checks">
<li>入口调用 <code>AgentService._process_with_main_agent()</code></li>
<li><code>MainAgentRouter</code> 返回非 task。</li>
<li>关闭 skill assembly 和 tools<code>include_skill_assembly=False</code><code>include_tools=False</code></li>
<li>仍通过 <code>AgentLoop</code> 写 session/run 事件,但不创建 Task。</li>
</ul>
</div>
<div class="panel">
<h2>Task mode single</h2>
<ul class="checks">
<li>创建或复用 open task。</li>
<li>planner 返回 single主 agent 直接运行。</li>
<li>运行后构建 <code>TaskEvidencePacket</code></li>
<li>运行后状态变 <code>awaiting_acceptance</code>;用户 accept/revise/abandon 决定关闭、修订或放弃。</li>
</ul>
</div>
<div class="panel">
<h2>Task mode team</h2>
<ul class="checks">
<li>planner 生成 <code>ExecutionGraph</code></li>
<li><code>TaskSkillResolver</code> 给节点绑定 published skill 或 ephemeral guidance。</li>
<li><code>TeamService</code> 运行节点,节点仍调用 <code>AgentLoop</code></li>
<li>主 agent synthesis 使用 team evidence通常关闭工具调用避免重复执行子 agent 已做的事情。</li>
</ul>
</div>
<div class="panel">
<h2>Skill learning</h2>
<ul class="checks">
<li>每个 run 记录 activated skill receipt 和 effect。</li>
<li>用户 accept task 后才生成候选;证据包含整个 task 的所有 runs并标记 final_accepted_run_id。</li>
<li>worker 只生成 draft、做 safety/eval不自动 approve/publish。</li>
<li>publish 必须有 approved review、passing safety、没有失败 evalhigh risk 还需要显式确认。</li>
</ul>
</div>
</div>
</section>
<section class="section">
<div class="section-head">
<div>
<h2>后续核对问题清单</h2>
<p>这些问题适合配合 brainstorming / grill-me 逐模块核对想法和现有项目是否一致。</p>
</div>
<div class="kicker">REVIEW PROMPTS</div>
</div>
<div class="matrix">
<div class="key">产品目标</div>
<div>这个后端当前更像“任务型 agent runtime”不是普通聊天后端。你想保留 Task runtime 的 Plan -> Run -> Evidence -> User Acceptance 主体验,还是把它降级成可选高级模式?</div>
<div class="key">主入口边界</div>
<div><code>interfaces/web/app.py</code> 已经超过 3000 行,包含 auth、files、skills、cron、chat 等。后续是否要拆 route 模块,还是先保持单文件以降低迁移风险?</div>
<div class="key">Task 自动化程度</div>
<div>现在 Main Agent 会自动 Task 化复杂请求。你是否接受模型分类误差?是否需要用户显式确认创建 Task</div>
<div class="key">Team 执行策略</div>
<div>当前真正实现 sequence / parallel / DAG其它策略只是保留枚举。是否要支持更多 coordinator还是坚持 v1 只做三种稳定策略?</div>
<div class="key">Agent registry 角色</div>
<div>registry/search/target resolver 已存在,但 Task 主线主要绑定技能而不是 specialist agent。你希望 team node 优先找 specialist agent还是继续 generic skill worker</div>
<div class="key">权限治理</div>
<div>permissions 目录目前是骨架。terminal、filesystem、web、MCP、Outlook 等能力是否需要统一 policy gate</div>
<div class="key">Skill 学习闭环</div>
<div>候选生成应依赖 task accepted。你希望只从用户接受的 task evidence 学习,还是允许人工从 abandoned/revised 历史中手动创建候选?</div>
<div class="key">外部集成</div>
<div>Outlook/AuthZ/MCP 已经比较具体A2A/WhatsApp 仍是占位。后续应该优先补协议,还是先收紧已有集成的权限和错误处理?</div>
</div>
</section>
<section class="section">
<div class="section-head">
<div>
<h2>代码观察与风险点</h2>
<p>这些不是修改建议的最终结论,只是阅读代码后值得后续逐项核对的偏差点。</p>
</div>
<div class="kicker">OPEN RISKS</div>
</div>
<div class="cols-2">
<div class="note risk">
<h3>定时 Task 路径存在明显变量错误</h3>
<p>
<code>AgentService.run_scheduled_task()</code> 末尾更新 assistant event payload 时引用了 <code>job.id</code><code>run.scheduled_run_id</code><code>job.name</code>
但该函数参数只有 <code>cron_job_id</code><code>cron_job_name</code><code>scheduled_run_id</code>。这条路径如果执行到这里会触发 <code>NameError</code>
</p>
</div>
<div class="note risk">
<h3>权限层还没有真正成为执行闸门</h3>
<p>
<code>permissions</code> 目录为空骨架,实际保护分散在工具实现和路径校验里。若后续开放 terminal、filesystem、MCP sensitive tools需要统一执行前 policy。
</p>
</div>
<div class="note risk">
<h3>Web auth 是本地单用户风格</h3>
<p>
本地 auth 文件以 username/password 字段读写,使用 token/handoff code 做前端会话。若目标是多用户或公网后端需要重新评估密码存储、token 生命周期和权限边界。
</p>
</div>
<div class="note risk">
<h3>Skill eval 目前偏轻量启发式</h3>
<p>
<code>SkillDraftEvaluator</code> 基于历史 accepted task evidence 和草稿长度/内容做 bounded report不是真正 replay。它只属于 skill draft 治理,不属于 Task runtime。
</p>
</div>
<div class="note risk">
<h3>接口层过大</h3>
<p>
<code>interfaces/web/app.py</code> 同时承载 app factory、lifespan、auth、provider config、sessions、files、agents、MCP、Outlook、skills、cron、chat、helper functions。
后续修改时容易产生跨功能回归。
</p>
</div>
<div class="note">
<h3>已经落地的稳定点</h3>
<p>
<span class="ok">可依赖:</span>统一 <code>AgentLoop</code>、session event stream、Task evidence/acceptance 状态、team graph v1、skill lifecycle gates、MCP wrapper、workspace path containment。
</p>
</div>
</div>
</section>
<section class="section">
<div class="section-head">
<div>
<h2>测试覆盖信号</h2>
<p>单元测试覆盖了当前后端多数关键行为,可作为后续修改文档的回归索引。</p>
</div>
<div class="kicker">TEST INDEX</div>
</div>
<div class="cols-4">
<div class="panel">
<h2>Task / acceptance</h2>
<p><code>test_task_mode_feedback.py</code>, <code>test_task_evidence.py</code>, <code>test_task_execution_planner.py</code>, <code>test_task_skill_resolver.py</code></p>
</div>
<div class="panel">
<h2>Engine / providers</h2>
<p><code>test_websocket_chat.py</code>, <code>test_main_agent_router.py</code>, <code>test_litellm_thinking_mode.py</code>, <code>test_imports.py</code></p>
</div>
<div class="panel">
<h2>Team / process</h2>
<p><code>test_agent_team_v1.py</code>, <code>test_agent_registry_resolver.py</code>, <code>test_process_projection.py</code></p>
</div>
<div class="panel">
<h2>Skills / tools / web</h2>
<p><code>test_phase5_skills_runtime.py</code>, <code>test_skill_learning_*.py</code>, <code>test_tool_assembler.py</code>, <code>test_web_files_api.py</code></p>
</div>
</div>
</section>
</article>
</main>
</body>
</html>

View File

@ -0,0 +1,421 @@
# Beaver 后端产品能力展示与演进路线
## 1. 产品定位
Beaver 后端不是一个普通聊天后端,而是一个面向“可执行任务”的智能体运行系统。
它的核心价值是把用户的一句话,从普通对话升级成一条可以被识别、执行、追踪、验收、复用和持续学习的任务链路。系统不仅能调用模型生成回答,还能调用工具处理文件、搜索历史、执行命令、触发定时任务,并把成功经验沉淀成后续可复用的技能。
从产品角度看Beaver 后端承担的是智能体产品的“操作系统层”:
- 识别用户真正要完成的任务
- 调用合适的模型和工具执行任务
- 记录任务过程、证据和结果
- 支持用户确认、修改或放弃结果
- 把被用户认可的经验沉淀为长期能力
- 为已经完成但待接入的记忆系统、未来文件系统、多智能体协作和主动服务能力打基础
## 2. 核心价值
### 从聊天助手升级为任务执行系统
普通聊天系统的重点是“回答问题”。Beaver 后端的重点是“完成事情”。
系统会判断用户当前输入是简单对话,还是需要进入任务模式。如果是任务,系统会持续跟踪它的目标、执行过程、工具调用、产出结果和用户反馈,而不是把每次对话当成孤立消息。
### 从一次性回答升级为可验收结果
系统会保存任务执行过程中的证据,包括模型输出、工具结果、子任务结果和最终产物。用户可以对结果进行满意、修改、放弃等反馈。
这让产品具备“交付物意识”:不是模型说完就结束,而是以用户是否认可作为闭环。
### 从人工经验升级为可复用技能
当一次任务被用户认可,系统可以把这次成功经验转化为技能候选,再经过草稿、审核和发布,变成后续任务可自动调用的能力。
这意味着 Beaver 不只是被动响应,而是具备逐步积累组织经验的基础。
### 从短期上下文升级为长期记忆资产
系统已经具备长期记忆系统的底层能力,包括用户记忆、组织知识、历史任务、文件资源、工具经验和技能学习记录。当前重点不是从零建设记忆系统,而是把已完成的记忆能力接入主产品链路,并做成可展示、可管理、可解释的产品体验。
## 3. 产品能力总览
| 能力名称 | 当前价值 | 后续可展示的产品形态 |
| --- | --- | --- |
| 多渠道接入能力 | 支持来自网页、命令行、网关、定时任务等入口的请求 | 统一工作台、多渠道智能助手、企业内部入口 |
| 任务识别与跟踪能力 | 自动判断是否需要进入任务模式,并跟踪任务状态 | 任务列表、任务详情、执行进度 |
| 自动执行与工具调用能力 | 模型可以调用文件、终端、网页、记忆、定时任务等工具 | 可视化工具调用记录、任务产物管理 |
| 多智能体协作能力 | 复杂任务可拆给多个子智能体并行或按依赖执行 | 多角色协作视图、子任务分工看板 |
| 过程证据留存能力 | 任务过程、工具结果、输出和验证信息会被记录 | 可审计任务报告、证据链展示 |
| 用户验收与反馈能力 | 用户可以确认满意、要求修改或放弃任务 | 结果验收按钮、修改意见跟踪 |
| 技能沉淀与复用能力 | 成功经验可进入技能学习、草稿、审核、发布链路 | 企业技能库、最佳实践沉淀 |
| 长期记忆与上下文管理能力 | 记忆底层能力已完成,当前待接入主产品链路 | 记忆管理台、知识资产地图、检索轨迹 |
| 定时任务与主动触达能力 | 支持自动触发任务或通知 | 定时提醒、周期报告、主动运营 |
| 模型与供应商切换能力 | 后端抽象了模型供应商和 fallback 机制 | 成本/质量路由、多模型策略 |
## 4. 已具备的产品功能
### 4.1 多入口统一接入
系统可以承接不同来源的请求,包括网页端、命令行、本地工具、网关和定时任务。无论请求来自哪里,后端都会把它转成统一的会话和任务执行流程。
产品价值:
- 后续可以同时支持 Web 助手、企业内部工具、自动化任务和第三方系统接入
- 不需要为每个入口重复实现一套智能体逻辑
- 用户在不同入口产生的任务和记忆可以沉淀到同一套系统里
### 4.2 任务模式
系统会判断用户输入是普通聊天,还是需要持续执行和跟踪的任务。
如果是简单问题,系统直接回复;如果是复杂任务,系统会创建一个可追踪的任务,记录目标、过程、结果和反馈。
产品价值:
- 用户不需要手动创建任务,系统自动识别
- 复杂请求不会被当成一次性聊天处理
- 后续可以展示任务状态、历史结果和修改记录
### 4.3 自动执行与工具调用
系统可以根据任务需要选择并调用工具,例如文件操作、终端命令、网页访问、会话搜索、记忆读写、技能管理和定时任务操作。
产品价值:
- 智能体不只是“说”,还能“做”
- 适合代码分析、文件处理、资料整理、定时报告、历史追溯等工作流
- 工具调用过程会被记录,方便审计和复盘
### 4.4 多智能体协作
对于复杂任务,系统可以先拆分成多个子任务,再交给不同的子智能体执行。子任务可以顺序执行、并行执行,也可以按依赖关系执行。
产品价值:
- 复杂任务不必全部压在一个模型调用里
- 适合“调研 + 审核 + 汇总”“设计 + 实现 + 验证”等多阶段工作
- 后续可以形成可视化的任务协作网络
### 4.5 过程证据留存
系统会记录任务执行过程,包括模型请求、工具选择、工具结果、子任务输出、最终结果和用户反馈。
产品价值:
- 结果不是黑盒,可以追溯它是怎么得出的
- 适合企业场景下的复盘、合规和质量管理
- 为后续自动学习提供可靠证据
### 4.6 用户验收闭环
任务完成后,系统支持用户表达满意、要求修改或放弃。用户反馈会影响任务状态,也会影响后续技能学习。
产品价值:
- 用用户真实反馈判断任务是否成功
- 避免把错误结果沉淀成长期能力
- 支持“交付 - 修改 - 再交付”的工作流
### 4.7 技能沉淀与复用
当任务结果被用户认可,系统可以把执行经验整理成技能候选。候选经过草稿、审核和发布后,可以在未来类似任务中被自动激活。
产品价值:
- 把一次成功经验变成组织可复用能力
- 减少重复摸索和重复提示词编写
- 为企业内部“智能体技能库”打基础
### 4.8 长期记忆系统
系统已经完成长期记忆系统的底层能力,目前主要处于“已实现、待接入、待产品化展示”的状态。
它已经覆盖几类核心资产:
- 用户记忆:保存长期偏好、背景和稳定信息
- 组织业务知识:保存企业、项目和业务相关的长期知识
- 历史任务和结果:保存任务目标、执行过程和最终产出
- 文件和任务产物:保存可被后续任务引用的资源线索
- 工具调用经验:保存哪些工具在什么任务中有效或失败
- 成功技能和失败案例:支持后续复用和避坑
- 可复用工作流程:为技能学习和自动化执行提供素材
产品价值:
- 智能体可以基于历史上下文工作,而不是每次从零开始
- 后续重点是把已完成的记忆能力接入任务、文件、技能和工作台
- 支持向“越用越懂业务”的智能体演进
### 4.9 定时任务与主动触达
系统支持定时触发任务或通知,适合周期性提醒、自动报告、定期检查和主动推送。
产品价值:
- 从被动问答扩展到主动服务
- 支持运营、管理、监控类场景
- 可以与任务系统、记忆系统、文件系统联动
### 4.10 模型与供应商切换
系统把模型供应商抽象为统一接口,可以支持不同模型服务,并区分主模型、辅助模型和向量检索模型。
产品价值:
- 可以根据成本、速度、质量选择不同模型
- 可以为不同任务配置不同模型能力
- 为企业级稳定性和成本管理打基础
## 5. 记忆系统:从聊天记录到智能体长期资产
### 5.1 为什么记忆系统是关键能力
智能体产品的长期竞争力不只来自模型本身,还来自它能否积累上下文、业务经验、用户偏好和工具使用方式。
如果没有记忆系统,智能体每次都像第一次工作:不知道用户偏好,不知道历史任务,不知道哪些方案曾经成功,也不知道哪些工具调用曾经失败。
Beaver 后端的记忆系统底层能力已经完成,当前还没有完整接入主产品链路。后续重点是把它和任务执行、文件资源、技能学习、用户界面打通,让已经存在的记忆能力真正成为可使用、可解释、可管理的产品能力。
### 5.2 Beaver 已完成的记忆系统能力
Beaver 的记忆系统可以定义为“智能体长期上下文资产库”。这部分底层能力已经完成,当前状态是待接入主流程和产品界面。
它已经覆盖:
- 用户长期偏好
- 组织业务知识
- 历史任务和结果
- 文件和任务产物
- 工具调用经验
- 成功技能和失败案例
- 可复用的工作流程
这些能力后续需要接入三个主要产品入口:
- 任务执行:让任务能按需读取相关记忆
- 产品界面:让用户能查看、编辑、确认和删除记忆
- 技能学习:让事实类内容进入记忆,流程类经验进入技能
### 5.3 后期接入功能:记忆管理台
功能说明:
提供一个面向用户和管理员的记忆管理界面,展示系统已经记住了什么、这些记忆来自哪里、何时被使用、是否可信。
解决的问题:
- 用户不知道系统记住了什么
- 错误记忆难以发现和删除
- 记忆来源不可追溯
具体实现:
- 展示长期记忆列表,按用户、任务、技能、文件、来源分类
- 每条记忆展示来源任务、创建时间、最近使用时间、可信状态
- 支持用户手动确认、编辑、删除或冻结记忆
- 支持系统自动标记“候选记忆”,只有确认后进入长期记忆
接入状态:相关底层能力已经完成,后续需要接入产品界面和管理操作。
优先级:高
### 5.4 后期接入功能:记忆检索轨迹
功能说明:
当智能体引用历史信息时,展示它检索了哪些记忆、为什么选择这些记忆、最终哪些记忆进入了上下文。
解决的问题:
- 用户无法判断回答是否基于正确历史
- 记忆检索像黑盒,难以调试
- 企业场景需要解释和审计
具体实现:
- 每次任务执行记录记忆检索 query、候选结果、最终注入内容
- 在任务详情页展示“本次使用的记忆”
- 支持把错误引用标记为无效,反馈给记忆系统
接入状态:相关记录和检索能力已经具备基础,后续需要接入任务详情页和调试视图。
优先级:高
### 5.5 已实现功能:分层记忆加载
功能说明:
将记忆分为基础层、任务相关层和深度资料层,按任务需要逐步加载,减少上下文浪费。
解决的问题:
- 全量记忆直接注入会浪费 token
- 过多无关记忆会干扰模型判断
- 历史越多,检索越需要结构化
具体实现:
- 基础层:用户身份、偏好、长期稳定信息
- 任务层:和当前任务相关的历史任务、工具结果、文件资源
- 深度层:需要时再检索的详细材料、历史证据和长文档
- 每次任务记录加载了哪一层、为什么加载
接入状态:底层能力已实现,后续需要接入任务执行链路和产品展示界面。
### 5.6 已实现能力:记忆与技能联动的基础
功能说明:
把“记住信息”和“学会方法”区分开。事实类内容进入记忆,流程类经验进入技能。
解决的问题:
- 事实、偏好、方法、工具经验混在一起会造成混乱
- 技能学习需要来自高质量任务,而不是任意聊天
具体实现:
- 用户偏好、业务事实进入记忆
- 成功工作流程进入技能候选
- 失败任务进入反例经验,用于提醒系统避免重复错误
- 用户满意反馈作为高价值学习信号
接入状态:底层链路已经具备基础,后续需要在任务验收和技能审核界面中产品化呈现。
## 6. 后续补强的产品功能
### 6.3 技能库管理
功能说明:
把系统沉淀出来的技能变成可管理的企业能力库。
业务价值:
- 企业可以看到智能体已经掌握哪些工作方法
- 技能可以审核、启用、停用和版本管理
- 避免未经确认的经验直接影响生产结果
具体实现:
- 展示已发布技能、草稿技能、待审核技能
- 每个技能展示适用场景、来源任务、需要的工具、版本记录
- 支持人工审核后发布
- 支持回滚到上一版本
### 6.4 权限与安全策略
功能说明:
为工具调用、文件访问、外部系统接入和技能发布建立统一权限体系。
业务价值:
- 企业场景必须控制智能体能看什么、能改什么、能调用什么
- 防止越权访问文件、误执行命令或误发布技能
- 为后续接入客户数据和企业系统做准备
具体实现:
- 按用户、入口、任务类型和工具类型做权限判断
- 高风险工具调用前需要确认或策略允许
- 所有拒绝和允许都进入审计记录
- 管理员可以配置工具权限和文件权限
### 6.5 可回滚文件系统与对象存储
功能说明:
未来接入 MinIO 作为对象存储底座,承载用户文件、任务产物、工具生成物、会话附件、版本快照和可回滚文件状态。
业务价值:
- 智能体处理的文件不再散落在本地目录
- 每次任务产生的文件都可以追踪来源
- 误操作后可以回滚到历史版本
- 文件可以成为记忆系统和任务系统的长期资源
具体实现:
- 将用户上传文件、任务输出文件、工具生成文件统一存入对象存储
- 为每个文件建立版本记录、来源任务、创建人、使用记录
- 支持文件快照、版本比较和回滚
- 将文件元数据接入记忆检索,让智能体能按任务需要引用相关文件
- 文件访问走权限策略,避免越权读取
### 6.7 模型成本与质量监控
功能说明:
展示不同模型调用的成本、耗时、成功率和任务质量表现。
业务价值:
- 企业需要控制模型成本
- 不同任务可以选择不同模型
- 可以用数据判断模型供应商是否稳定
具体实现:
- 记录每次模型调用的 token、耗时、模型名、供应商和结果状态
- 按任务类型统计成本和成功率
- 支持为简单任务使用低成本模型,为复杂任务使用高质量模型
## 7. 未来路线图
### 短期:让现有能力稳定可展示
目标:把当前已有能力整理成可以验收、可以复盘的产品闭环。
目标为:
- 任务工作台
- 任务详情与执行过程展示
- 用户验收入口
- 定时任务运行历史
- 已完成记忆系统接入验证
- 基础记忆展示
- 技能草稿和审核流程整理
预期结果:
用户可以清楚看到系统正在做什么、做完了什么、哪些结果被认可、哪些经验可以沉淀。
### 中期:形成智能体资产管理能力
目标:把已经完成的记忆系统接入任务、文件和技能链路,并把任务、记忆、文件、技能变成可管理资产。
目标为:
- 记忆管理台
- 记忆检索轨迹
- 分层记忆加载接入任务执行链路
- 技能库管理
- 可回滚文件系统与对象存储
- 文件版本、来源和回滚
- 权限与审计策略
- 多智能体执行可视化
预期结果:
Beaver 不只是一个执行工具,而是开始形成企业级智能体资产库。已经完成的记忆能力会进入主产品链路,用户的文件、任务、记忆和技能可以被统一管理、追踪和复用。
### 长期:向自进化智能体系统演进
目标:让系统具备持续学习、持续优化和主动服务能力。
目标为::
- 记忆质量评估
- 技能效果评估
- 自动发现可复用流程
- 主动推荐技能优化
- 多模型质量和成本路由
- 跨任务、跨文件、跨记忆的上下文网络
预期结果:
Beaver 可以逐步从“会执行任务的助手”演进为“能积累组织经验、主动优化工作方式的智能体系统”。

View File

@ -0,0 +1,205 @@
# Beaver 后端近期已完善功能
## 1. 近期完善功能一览
| 已完善功能 | 当前状态 | 展示重点 |
| --- | --- | --- |
| 任务识别与跟踪 | 已完成 | 系统能判断用户是在普通聊天,还是在交办一个需要持续完成的任务 |
| 任务执行闭环 | 已完成 | 任务可以被创建、执行、记录过程、产出结果并进入验收 |
| 用户验收与反馈 | 已完成 | 用户可以对结果表示满意、要求修改或放弃 |
| 技能、工具调用与证据留存 | 已完成 | 系统会先用技能提供方法指导,再选择和调用工具,并记录执行证据 |
| 多智能体协作 | 已具备 | 复杂任务可以拆成多个子任务执行,再汇总结果 |
| 技能沉淀与复用 | 已具备 | 被认可的任务经验可以进入技能候选、草稿、审核和发布链路 |
| 长期记忆系统 | 底层已完成,待接入 | 已覆盖用户偏好、业务知识、历史任务、文件产物、工具经验和可复用流程 |
| 定时任务与主动触达 | 基础能力已具备 | 系统可以支持周期性任务和自动通知 |
| 多模型供应商适配 | 已具备 | 支持不同模型服务和后续成本/质量策略 |
## 2. 重点功能一:任务执行闭环
### 功能说明
Beaver 后端已经从普通问答升级为任务执行系统。用户不需要手动创建任务,系统会自动判断当前输入是否需要进入任务模式。
如果只是普通聊天,系统直接回答;如果用户是在交办一个需要执行、修改或验收的事情,系统会把它作为任务持续跟踪。
### 已完善的能力
- 自动识别普通聊天和任务请求
- 为复杂请求创建可追踪任务
- 保存任务目标、当前状态、执行次数和历史结果
- 支持任务继续、任务修改、新任务创建、任务关闭和任务放弃
- 任务完成后进入用户验收阶段
### 产品价值
这项能力解决的是“模型回答完就结束”的问题。
现在系统可以把用户请求当作一个有生命周期的任务处理:从识别、执行、产出,到用户反馈和后续修改,形成完整闭环。
### 可展示方式
1. 用户提出一个需要多步骤完成的任务
2. 系统自动识别为任务,而不是普通聊天
3. 系统执行任务并返回结果
4. 用户要求修改
5. 系统继续沿用原任务上下文进行修订
6. 用户确认满意后,任务进入完成状态
## 4. 重点功能二:用户验收与反馈闭环
### 功能说明
任务完成后,系统不会默认结果一定正确,而是支持用户进行明确反馈。
用户可以表达三类结果:
- 满意:结果被接受,可以作为成功经验
- 修改:结果需要继续调整
- 放弃:任务不再继续
### 已完善的能力
- 记录用户对任务结果的反馈
- 根据反馈更新任务状态
- 把修改意见作为下一轮任务执行的输入
- 把满意反馈作为后续技能学习的重要信号
- 把放弃任务保留为失败经验,避免重复问题
### 产品价值
这项能力让系统具备“交付意识”。
它不是只追求生成内容,而是把用户是否认可作为任务是否成功的标准。这对用户来说很重要,因为用户更关心结果是否可用,而不是模型是否生成了一段看起来合理的回答。
## 5. 重点功能三:技能、工具调用与过程证据留存
### 功能说明
系统已经具备“技能指导工具使用”的能力。它不是让模型随机选择工具,而是先根据任务选择合适的技能,由技能提供工作方法、注意事项和工具使用建议,再把相关工具暴露给模型执行任务。
工具可以覆盖文件、终端、网页、记忆、会话搜索、技能管理、定时任务等场景。技能负责告诉智能体“应该怎么做”,工具负责完成真实操作。
同时,系统会记录每次执行过程,包括使用了哪些工具、工具返回了什么、模型如何继续处理。
### 已完善的能力
- 按任务内容选择合适技能
- 将技能作为本轮任务的执行指导注入上下文
- 从技能中读取工具使用建议
- 按任务和技能共同决定本轮可用工具
- 向模型暴露本轮任务可调用的工具清单
- 执行模型发起的工具调用
- 保存工具调用结果
- 将工具结果继续放回任务上下文
- 记录本轮启用的技能、可用工具、工具调用和执行结果
- 形成可追溯的任务过程证据,便于复盘和后续技能学习
### 产品价值
这项能力让 Beaver 从“能说”变成“按方法做事”。
对外展示时,可以强调:系统不是单纯调用大模型生成文字,也不是把工具直接丢给模型自由发挥,而是通过技能把经验和方法注入任务执行,再由工具完成真实操作。
这让每次工具调用更像是按照一套可复用工作方法执行,而不是一次性的临场判断。
## 6. 重点功能四:多智能体协作执行
### 功能说明
对于复杂任务,系统已经具备拆分和委派能力。一个复杂任务可以先拆成多个子任务,再交给不同子智能体执行,最后由主流程汇总结果。
### 已具备的能力
- 判断任务是否需要多智能体协作
- 支持顺序执行、并行执行和依赖关系执行
- 子任务可以继承任务目标、约束和上下文
- 子任务结果会被收集并交给主流程汇总
- 子任务过程也会形成证据
### 产品价值
这项能力让系统可以处理更复杂的工作。
例如“调研一个方案、比较多个选项、审查风险、整理最终建议”这类任务,不必全部压给一个模型一次性完成,而是可以拆成多个角色协作。
## 7. 重点功能五:技能沉淀与复用
### 功能说明
系统已经具备把成功任务经验沉淀为技能的基础链路。
当一个任务被用户认可后,系统可以根据任务过程、工具使用、结果和反馈生成技能候选。候选可以进入草稿、审核和发布流程,成为后续任务可复用的能力。
### 已具备的能力
- 记录任务中使用过的技能和工具
- 根据用户满意反馈识别高价值经验
- 生成技能学习候选
- 支持技能草稿、审核和发布
- 后续任务可以根据任务内容自动选择相关技能
### 产品价值
这项能力让系统具备“越用越会做”的基础。
它不是简单保存聊天记录,而是把被验证过的工作方法沉淀下来,逐步形成企业自己的智能体技能库。
## 8. 重点功能六:长期记忆系统底层能力
### 功能说明
长期记忆系统的底层能力已经完成,目前还没有完整接入主产品链路和用户界面。
这意味着:记忆系统本身不是未来才开始做,而是已经具备底层基础。后续工作的重点是把它接入任务执行、文件资源、技能学习和管理界面。
### 已完成的记忆能力
- 用户长期偏好
- 组织业务知识
- 历史任务和结果
- 文件和任务产物
- 工具调用经验
- 成功技能和失败案例
- 可复用的工作流程
- 分层记忆加载
- 记忆与技能联动的基础
### 当前边界
已经完成的是记忆系统底层能力;还需要接入的是:
- 任务执行时自动读取相关记忆
- 用户界面展示系统记住了什么
- 记忆来源、可信度和使用记录可视化
- 用户可以确认、编辑、删除或冻结记忆
- 记忆检索轨迹进入任务详情页
### 产品价值
这项能力是 Beaver 向长期智能体演进的关键基础。
普通助手只能依赖当前对话,而 Beaver 已经具备把长期偏好、历史任务、工具经验和可复用流程沉淀为长期上下文资产的基础。
## 9. 重点功能七:定时任务与主动触达
### 功能说明
系统已经具备定时任务和主动触达的基础能力,可以支持周期性任务、自动提醒、定期报告和主动通知。
### 已具备的能力
- 创建定时任务
- 自动触发任务或通知
- 保存定时任务运行记录
- 支持用户对定时结果继续修改
- 定时任务可以进入普通任务链路
### 当前边界
定时任务能力已经具备基础,但仍需要进一步稳定化和产品化展示。
当前已发现一个需要优先修复的问题:定时任务结果回写中存在变量引用错误,可能影响任务结果记录。
### 产品价值
这项能力让 Beaver 从“用户问了才回答”进一步走向“系统主动完成周期性工作”。

View File

@ -0,0 +1,228 @@
---
name: blueprinter
description: Generate technical diagrams using HTML/CSS in Flat Engineering Blueprint style. Use when the user wants to create architecture diagrams, system diagrams, flowcharts, or technical specification sheets that look like engineering blueprints. Triggers on requests for flat diagrams, blueprint-style visualizations, or technical drawings.
---
# Blueprinter
Generate technical diagrams using HTML/CSS following the "Flat Engineering Blueprint" style guidelines.
## Core Philosophy
Precise, Objective, High Data-Ink Ratio. The output should look like a technical specification sheet or an architectural diagram, NOT a marketing landing page.
## Visual Rules
### 1. No Decorations
- NO drop shadows
- NO gradients
- NO glassmorphism/blur
- NO rounded buttons
### 2. Flat & Outlined
- Use 1px or 2px solid borders for structure
- Use white backgrounds for content blocks
### 3. Monochrome Base
| Element | Color |
|---------|-------|
| Background | Light Gray (#f8fafc) |
| Canvas | White (#ffffff) with Slate Border (#cbd5e1) |
| Text (Main) | High contrast Black (#0f172a) |
| Text (Sub) | Slate Gray (#64748b) |
| Accent | Use BLACK or ONE semantic color (e.g., Red for Error) sparingly |
### 4. Typography
- Headings/Labels: Sans-serif (Inter/Helvetica)
- Data/Paths/Code: Monospace (JetBrains Mono/Consolas)
### 5. Layout Structure
- The diagram must be contained within a `diagram-canvas` (a bordered box with padding)
- Header: Title + Uppercase Subtitle, separated by a solid bottom border
- Grid/Flexbox alignment: Everything must be strictly aligned
### 6. Elements
- **Connectors**: Thin, straight or orthogonal lines. Dashed lines for abstract relationships.
- **Icons**: Simple stroke SVG icons (no fill or complex details)
- **Badges**: Outlined or solid black/gray blocks. Small font size.
## CSS Variable Reference
```css
:root {
--c-bg: #f8fafc; /* Outer Background */
--c-canvas: #ffffff; /* Diagram Background */
--c-border: #cbd5e1; /* Slate-300 */
--c-text-main: #0f172a; /* Slate-900 */
--c-text-sub: #64748b; /* Slate-500 */
--font-ui: 'Inter', sans-serif;
--font-mono: 'JetBrains Mono', monospace;
}
```
## HTML Structure Template
```html
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>[Diagram Title]</title>
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
<style>
:root {
--c-bg: #f8fafc;
--c-canvas: #ffffff;
--c-border: #cbd5e1;
--c-text-main: #0f172a;
--c-text-sub: #64748b;
--c-accent: #dc2626; /* Optional: for errors/warnings only */
--font-ui: 'Inter', sans-serif;
--font-mono: 'JetBrains Mono', monospace;
}
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: var(--font-ui);
background: var(--c-bg);
padding: 40px;
min-height: 100vh;
}
.diagram-container {
max-width: 1200px;
margin: 0 auto;
}
.diagram-canvas {
background: var(--c-canvas);
border: 1px solid var(--c-border);
padding: 32px;
}
.diagram-header {
border-bottom: 1px solid var(--c-border);
padding-bottom: 16px;
margin-bottom: 24px;
}
.diagram-title {
font-size: 20px;
font-weight: 600;
color: var(--c-text-main);
margin-bottom: 4px;
}
.diagram-subtitle {
font-size: 11px;
font-weight: 500;
color: var(--c-text-sub);
text-transform: uppercase;
letter-spacing: 0.05em;
}
/* Component styles */
.component {
border: 1px solid var(--c-border);
padding: 16px;
background: var(--c-canvas);
}
.component-label {
font-family: var(--font-mono);
font-size: 12px;
color: var(--c-text-sub);
text-transform: uppercase;
letter-spacing: 0.05em;
}
.component-value {
font-family: var(--font-mono);
font-size: 14px;
color: var(--c-text-main);
font-weight: 500;
}
/* Connector lines */
.connector {
stroke: var(--c-border);
stroke-width: 1;
}
.connector-dashed {
stroke: var(--c-border);
stroke-width: 1;
stroke-dasharray: 4 4;
}
/* Badges */
.badge {
display: inline-block;
font-family: var(--font-mono);
font-size: 10px;
padding: 2px 6px;
border: 1px solid var(--c-border);
color: var(--c-text-sub);
}
.badge-solid {
background: var(--c-text-main);
color: var(--c-canvas);
border-color: var(--c-text-main);
}
</style>
</head>
<body>
<div class="diagram-container">
<div class="diagram-canvas">
<div class="diagram-header">
<div class="diagram-title">[Diagram Title]</div>
<div class="diagram-subtitle">[Diagram Type / Version]</div>
</div>
<!-- Diagram content goes here -->
</div>
</div>
</body>
</html>
```
## Usage Guidelines
1. **Always use the CSS variables** - never hardcode colors
2. **Keep it flat** - no shadows, no gradients, no blur effects
3. **Use monospace for data** - any technical values, paths, codes should use `--font-mono`
4. **Align strictly** - use CSS Grid or Flexbox with consistent gaps
5. **Connect with lines** - use SVG for connectors between components
6. **Minimal icons** - if icons are needed, use simple stroke-only SVGs
## Example: Simple System Diagram
```html
<div class="diagram-canvas">
<div class="diagram-header">
<div class="diagram-title">System Architecture</div>
<div class="diagram-subtitle">v1.0 / Overview</div>
</div>
<div style="display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 24px;">
<div class="component">
<div class="component-label">Client</div>
<div class="component-value">Web App</div>
</div>
<div class="component">
<div class="component-label">API</div>
<div class="component-value">REST Gateway</div>
</div>
<div class="component">
<div class="component-label">Database</div>
<div class="component-value">PostgreSQL</div>
</div>
</div>
</div>
```