feat(engine): 添加MCP连接管理和工具集成功能

- 集成MCP连接管理器,支持MCP服务器连接
- 添加多种内置工具:ClarifyTool、CronTool、DelegateTool、ExecuteCodeTool、
  PatchFileTool、ProcessTool、SendMessageTool、SpawnTool、TerminalTool、
  TodoTool、WebFetchTool、WebSearchTool、WriteFileTool等
- 实现工具注册和装配功能
- 添加技能选择上下文参数
- 支持思考模式控制参数thinking_enabled

feat(coordinator): 重构任务执行计划器参数命名

- 将learning_candidate_enabled重命名为allow_candidate_generation
- 更新TeamGraphScheduler中的参数传递
- 修改LocalAgentRunner中的相关参数处理
- 更新README文档中的相应描述

refactor(context): 标准化工具调用参数格式

- 添加_json导入用于参数序列化
- 实现_provider_tool_calls方法标准化OpenAI兼容的工具调用载荷
- 修复工具调用中参数非字符串类型的序列化问题

refactor(session): 优化消息历史记录过滤逻辑

- 修改get_messages_as_conversation为基于运行状态过滤消息
- 排除未完成、失败或错误结束的运行记录
- 改进对话历史的可见性控制机制

fix(store): 修复FTS索引重建逻辑

- 添加异常处理防止FTS索引创建失败
- 实现_rebuild_fts_index方法重新构建全文搜索索引
- 优化索引触发器和表的维护流程
This commit is contained in:
2026-05-14 09:43:48 +08:00
parent 8a12c30141
commit 30ab74ffb2
149 changed files with 12293 additions and 2812 deletions

View File

@ -1,13 +1,26 @@
"""Configuration models and loaders."""
from .loader import default_config_path, load_config
from .schema import AgentDefaultsConfig, BeaverConfig, EmbeddingConfig, ProviderConfig
from .schema import (
AgentDefaultsConfig,
AuthzConfig,
BackendIdentityConfig,
BeaverConfig,
EmbeddingConfig,
MCPServerConfig,
ProviderConfig,
ToolsConfig,
)
__all__ = [
"AgentDefaultsConfig",
"AuthzConfig",
"BackendIdentityConfig",
"BeaverConfig",
"EmbeddingConfig",
"MCPServerConfig",
"ProviderConfig",
"ToolsConfig",
"default_config_path",
"load_config",
]

View File

@ -4,10 +4,30 @@ from __future__ import annotations
import json
import os
import sys
from pathlib import Path
from typing import Any
from .schema import AgentDefaultsConfig, BeaverConfig, EmbeddingConfig, ProviderConfig
from .schema import (
AgentDefaultsConfig,
AuthzConfig,
BackendIdentityConfig,
BeaverConfig,
EmbeddingConfig,
MCPServerConfig,
ProviderConfig,
ToolsConfig,
)
LOCAL_MCP_CATEGORIES: dict[str, dict[str, str]] = {
"local_filesystem_mcp": {"category": "filesystem", "display_name": "本地文件工具"},
"local_runtime_mcp": {"category": "runtime", "display_name": "本地运行工具"},
"local_memory_mcp": {"category": "memory", "display_name": "本地记忆工具"},
"local_skills_mcp": {"category": "skills", "display_name": "本地技能工具"},
"local_coordination_mcp": {"category": "coordination", "display_name": "本地协作工具"},
"local_scheduler_mcp": {"category": "scheduler", "display_name": "本地定时工具"},
"local_web_mcp": {"category": "web", "display_name": "本地联网工具"},
}
def default_config_path(*, workspace: str | Path | None = None) -> Path:
@ -57,6 +77,9 @@ def load_config(
agents_defaults=_parse_agent_defaults(data),
providers=_parse_providers(data.get("providers")),
embedding=_parse_embedding(data),
tools=_parse_tools(data.get("tools")),
authz=_parse_authz(data.get("authz")),
backend_identity=_parse_backend_identity(data.get("backend_identity") or data.get("backendIdentity")),
config_path=path,
)
@ -104,6 +127,73 @@ def _parse_embedding(data: dict[str, Any]) -> EmbeddingConfig:
)
def _parse_tools(raw: Any) -> ToolsConfig:
data = _as_dict(raw)
mcp_servers: dict[str, MCPServerConfig] = {}
for server_id, payload in _as_dict(data.get("mcpServers") or data.get("mcp_servers")).items():
if not isinstance(payload, dict):
continue
mcp_servers[str(server_id)] = MCPServerConfig(
command=_string(payload.get("command")) or "",
args=_string_list(payload.get("args")),
env=_string_dict(payload.get("env")),
url=_string(payload.get("url")) or "",
headers=_string_dict(payload.get("headers")),
auth_mode=(_string(payload.get("authMode") or payload.get("auth_mode")) or "none").lower(),
auth_audience=_string(payload.get("authAudience") or payload.get("auth_audience")) or "",
auth_scopes=_string_list(payload.get("authScopes") or payload.get("auth_scopes")),
tool_timeout=int(_float(payload.get("toolTimeout") or payload.get("tool_timeout")) or 30),
sensitive=_bool(payload.get("sensitive"), default=False),
kind=(_string(payload.get("kind")) or ("local" if payload.get("command") else "online")).lower(),
category=_string(payload.get("category")) or ("local" if payload.get("command") else "online"),
managed=_bool(payload.get("managed"), default=False),
display_name=_string(payload.get("displayName") or payload.get("display_name")) or "",
source=_string(payload.get("source")) or "config",
)
for server_id, meta in LOCAL_MCP_CATEGORIES.items():
if server_id in mcp_servers:
continue
mcp_servers[server_id] = MCPServerConfig(
command=sys.executable or "python",
args=["-m", "beaver.interfaces.mcp.tools_server", "--category", meta["category"]],
env={},
kind="local",
category=meta["category"],
managed=True,
display_name=meta["display_name"],
source="beaver-default",
tool_timeout=60,
)
return ToolsConfig(
restrict_to_workspace=_bool(
data.get("restrictToWorkspace") if "restrictToWorkspace" in data else data.get("restrict_to_workspace"),
default=True,
),
mcp_servers=mcp_servers,
)
def _parse_authz(raw: Any) -> AuthzConfig:
data = _as_dict(raw)
return AuthzConfig(
enabled=_bool(data.get("enabled"), default=False),
base_url=_string(data.get("baseUrl") or data.get("base_url")) or "",
request_timeout_seconds=int(_float(data.get("requestTimeoutSeconds") or data.get("request_timeout_seconds")) or 10),
outlook_mcp_url=_string(data.get("outlookMcpUrl") or data.get("outlook_mcp_url")) or "",
)
def _parse_backend_identity(raw: Any) -> BackendIdentityConfig:
data = _as_dict(raw)
return BackendIdentityConfig(
backend_id=_string(data.get("backendId") or data.get("backend_id")) or "",
client_id=_string(data.get("clientId") or data.get("client_id")) or "",
client_secret=_string(data.get("clientSecret") or data.get("client_secret")) or "",
name=_string(data.get("name")) or "",
public_base_url=_string(data.get("publicBaseUrl") or data.get("public_base_url")) or "",
)
def _as_dict(value: Any) -> dict[str, Any]:
return value if isinstance(value, dict) else {}
@ -121,7 +211,23 @@ def _string_dict(value: Any) -> dict[str, str]:
return {str(key): str(item) for key, item in value.items() if item is not None}
def _string_list(value: Any) -> list[str]:
if not isinstance(value, list):
return []
return [str(item) for item in value if str(item).strip()]
def _float(value: Any) -> float | None:
if value in (None, ""):
return None
return float(value)
def _bool(value: Any, *, default: bool) -> bool:
if isinstance(value, bool):
return value
if value in (None, ""):
return default
if isinstance(value, str):
return value.strip().lower() in {"1", "true", "yes", "on"}
return bool(value)

View File

@ -39,6 +39,65 @@ class EmbeddingConfig:
request_timeout_seconds: float | None = None
@dataclass(slots=True)
class MCPServerConfig:
"""One configured MCP server.
Transport is inferred from fields:
- command => local stdio MCP server
- url => remote streamable HTTP MCP server
"""
command: str = ""
args: list[str] = field(default_factory=list)
env: dict[str, str] = field(default_factory=dict)
url: str = ""
headers: dict[str, str] = field(default_factory=dict)
auth_mode: str = "none"
auth_audience: str = ""
auth_scopes: list[str] = field(default_factory=list)
tool_timeout: int = 30
sensitive: bool = False
kind: str = "online"
category: str = "online"
managed: bool = False
display_name: str = ""
source: str = "config"
@property
def transport(self) -> str:
return "stdio" if _clean(self.command) else "http"
@dataclass(slots=True)
class ToolsConfig:
"""Runtime tool configuration."""
restrict_to_workspace: bool = True
mcp_servers: dict[str, MCPServerConfig] = field(default_factory=dict)
@dataclass(slots=True)
class AuthzConfig:
"""External AuthZ service configuration."""
enabled: bool = False
base_url: str = ""
request_timeout_seconds: int = 10
outlook_mcp_url: str = ""
@dataclass(slots=True)
class BackendIdentityConfig:
"""This backend's AuthZ client identity."""
backend_id: str = ""
client_id: str = ""
client_secret: str = ""
name: str = ""
public_base_url: str = ""
@dataclass(slots=True)
class BeaverConfig:
"""Config loaded once per backend sandbox instance."""
@ -46,6 +105,9 @@ class BeaverConfig:
agents_defaults: AgentDefaultsConfig = field(default_factory=AgentDefaultsConfig)
providers: dict[str, ProviderConfig] = field(default_factory=dict)
embedding: EmbeddingConfig = field(default_factory=EmbeddingConfig)
tools: ToolsConfig = field(default_factory=ToolsConfig)
authz: AuthzConfig = field(default_factory=AuthzConfig)
backend_identity: BackendIdentityConfig = field(default_factory=BackendIdentityConfig)
config_path: Path | None = None
@property
@ -69,7 +131,13 @@ class BeaverConfig:
"""
resolved_model = _clean(model) or self.default_model
resolved_provider = _clean(provider_name) or self._infer_provider(resolved_model)
requested_provider = _clean(provider_name)
enabled_providers = self._enabled_provider_names()
resolved_provider = (
requested_provider
if requested_provider and requested_provider in enabled_providers
else self._infer_provider(resolved_model)
)
provider_cfg = self.providers.get(resolved_provider or "") if resolved_provider else None
payload: dict[str, Any] = {
"model": resolved_model,
@ -115,22 +183,36 @@ class BeaverConfig:
def _infer_provider(self, model: str | None) -> str | None:
configured_provider = _clean(self.agents_defaults.provider)
if configured_provider:
if configured_provider and configured_provider != "custom":
return configured_provider
if model and "/" in model:
prefix = model.split("/", 1)[0]
if prefix in self.providers:
if prefix in self._enabled_provider_names():
return prefix
if len(self.providers) == 1:
return next(iter(self.providers))
enabled_providers = self._enabled_provider_names()
if len(enabled_providers) == 1:
return enabled_providers[0]
return None
def _enabled_provider_names(self) -> list[str]:
return [
name
for name, provider in self.providers.items()
if name != "custom"
and any(
[
_clean(provider.api_key),
_clean(provider.api_base),
provider.extra_headers,
]
)
]
def _clean(value: str | None) -> str | None:
if value is None:
return None
value = str(value).strip()
return value or None

View File

@ -19,7 +19,7 @@ class EmbeddingRetriever:
api_key_env: str = "OPENAI_API_KEY",
api_base_env: str = "OPENAI_API_BASE",
model: str = "text-embedding-v4",
timeout_seconds: float = 20.0,
timeout_seconds: float = 3.0,
) -> None:
self.api_key_env = api_key_env
self.api_base_env = api_base_env

View File

@ -1,2 +1,11 @@
"""Shared data models."""
"""Shared Beaver data models."""
from .cron import CronExecutionResult, CronJob, CronPayload, CronRunRecord, CronSchedule
__all__ = [
"CronExecutionResult",
"CronJob",
"CronPayload",
"CronRunRecord",
"CronSchedule",
]

View File

@ -0,0 +1,266 @@
"""Scheduled task models for Beaver cron.
The scheduler borrows Hermes' durable JSON + explicit schedule parsing shape,
but the execution target is Beaver Task mode: every trigger creates a normal
Task run instead of a detached agent turn.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any, Literal
from uuid import uuid4
CronScheduleKind = Literal["at", "every", "cron"]
CronPayloadKind = Literal["agent_turn", "system_event"]
CronPayloadMode = Literal["notification", "task"]
@dataclass(slots=True)
class CronSchedule:
kind: CronScheduleKind
at_ms: int | None = None
every_ms: int | None = None
expr: str | None = None
tz: str | None = None
display: str | None = None
def to_dict(self) -> dict[str, Any]:
return {
"kind": self.kind,
"at_ms": self.at_ms,
"every_ms": self.every_ms,
"expr": self.expr,
"tz": self.tz,
"display": self.display,
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "CronSchedule":
return cls(
kind=str(payload.get("kind") or "every"), # type: ignore[arg-type]
at_ms=_optional_int(payload.get("at_ms") or payload.get("atMs")),
every_ms=_optional_int(payload.get("every_ms") or payload.get("everyMs")),
expr=_optional_str(payload.get("expr")),
tz=_optional_str(payload.get("tz")),
display=_optional_str(payload.get("display")),
)
@dataclass(slots=True)
class CronPayload:
kind: CronPayloadKind = "agent_turn"
mode: CronPayloadMode = "notification"
message: str = ""
session_key: str | None = None
requires_followup: bool = False
deliver: bool = False
channel: str | None = None
to: str | None = None
def to_dict(self) -> dict[str, Any]:
return {
"kind": self.kind,
"mode": self.mode,
"message": self.message,
"session_key": self.session_key,
"requires_followup": self.requires_followup,
"deliver": self.deliver,
"channel": self.channel,
"to": self.to,
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "CronPayload":
return cls(
kind=str(payload.get("kind") or "agent_turn"), # type: ignore[arg-type]
mode=_payload_mode(payload.get("mode"), default="task"),
message=str(payload.get("message") or ""),
session_key=_optional_str(payload.get("session_key") or payload.get("sessionKey")),
requires_followup=bool(payload.get("requires_followup") or payload.get("requiresFollowup") or False),
deliver=bool(payload.get("deliver", False)),
channel=_optional_str(payload.get("channel")),
to=_optional_str(payload.get("to")),
)
@dataclass(slots=True)
class CronRunRecord:
started_at_ms: int
scheduled_run_id: str = field(default_factory=lambda: uuid4().hex)
finished_at_ms: int | None = None
status: Literal["running", "ok", "error", "skipped"] = "running"
mode: CronPayloadMode = "notification"
notification_session_id: str | None = None
output: str | None = None
task_id: str | None = None
run_id: str | None = None
error: str | None = None
engaged: bool = False
engaged_at_ms: int | None = None
engage_intent: str | None = None
def to_dict(self) -> dict[str, Any]:
return {
"scheduled_run_id": self.scheduled_run_id,
"started_at_ms": self.started_at_ms,
"finished_at_ms": self.finished_at_ms,
"status": self.status,
"mode": self.mode,
"notification_session_id": self.notification_session_id,
"output": self.output,
"task_id": self.task_id,
"run_id": self.run_id,
"error": self.error,
"engaged": self.engaged,
"engaged_at_ms": self.engaged_at_ms,
"engage_intent": self.engage_intent,
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "CronRunRecord":
return cls(
scheduled_run_id=str(payload.get("scheduled_run_id") or payload.get("scheduledRunId") or uuid4().hex),
started_at_ms=int(payload.get("started_at_ms") or payload.get("startedAtMs") or 0),
finished_at_ms=_optional_int(payload.get("finished_at_ms") or payload.get("finishedAtMs")),
status=str(payload.get("status") or "running"), # type: ignore[arg-type]
mode=_payload_mode(payload.get("mode"), default="notification"),
notification_session_id=_optional_str(payload.get("notification_session_id") or payload.get("notificationSessionId")),
output=_optional_str(payload.get("output")),
task_id=_optional_str(payload.get("task_id") or payload.get("taskId")),
run_id=_optional_str(payload.get("run_id") or payload.get("runId")),
error=_optional_str(payload.get("error")),
engaged=bool(payload.get("engaged", False)),
engaged_at_ms=_optional_int(payload.get("engaged_at_ms") or payload.get("engagedAtMs")),
engage_intent=_optional_str(payload.get("engage_intent") or payload.get("engageIntent")),
)
@dataclass(slots=True)
class CronJob:
id: str
name: str
enabled: bool
schedule: CronSchedule
payload: CronPayload
created_at_ms: int
updated_at_ms: int
next_run_at_ms: int | None = None
last_run_at_ms: int | None = None
last_status: Literal["ok", "error", "skipped"] | None = None
last_error: str | None = None
delete_after_run: bool = False
history: list[CronRunRecord] = field(default_factory=list)
def to_dict(self) -> dict[str, Any]:
return {
"id": self.id,
"name": self.name,
"enabled": self.enabled,
"schedule": self.schedule.to_dict(),
"payload": self.payload.to_dict(),
"created_at_ms": self.created_at_ms,
"updated_at_ms": self.updated_at_ms,
"next_run_at_ms": self.next_run_at_ms,
"last_run_at_ms": self.last_run_at_ms,
"last_status": self.last_status,
"last_error": self.last_error,
"delete_after_run": self.delete_after_run,
"history": [item.to_dict() for item in self.history],
}
def to_api_dict(self) -> dict[str, Any]:
latest = self.history[-1] if self.history else None
return {
"id": self.id,
"name": self.name,
"enabled": self.enabled,
"schedule_kind": self.schedule.kind,
"schedule_display": self.schedule.display or _schedule_display(self.schedule),
"schedule_expr": self.schedule.expr,
"schedule_every_ms": self.schedule.every_ms,
"message": self.payload.message,
"mode": self.payload.mode,
"requires_followup": self.payload.requires_followup,
"deliver": self.payload.deliver,
"channel": self.payload.channel,
"to": self.payload.to,
"session_key": self.payload.session_key,
"next_run_at_ms": self.next_run_at_ms,
"last_run_at_ms": self.last_run_at_ms,
"last_status": self.last_status,
"last_error": self.last_error,
"last_scheduled_run_id": latest.scheduled_run_id if latest else None,
"last_task_id": latest.task_id if latest else None,
"last_run_id": latest.run_id if latest else None,
"history": [item.to_dict() for item in self.history],
"created_at_ms": self.created_at_ms,
"updated_at_ms": self.updated_at_ms,
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "CronJob":
schedule_payload = payload.get("schedule") if isinstance(payload.get("schedule"), dict) else {}
payload_payload = payload.get("payload") if isinstance(payload.get("payload"), dict) else {}
return cls(
id=str(payload["id"]),
name=str(payload.get("name") or payload["id"]),
enabled=bool(payload.get("enabled", True)),
schedule=CronSchedule.from_dict(schedule_payload),
payload=CronPayload.from_dict(payload_payload),
created_at_ms=int(payload.get("created_at_ms") or payload.get("createdAtMs") or 0),
updated_at_ms=int(payload.get("updated_at_ms") or payload.get("updatedAtMs") or 0),
next_run_at_ms=_optional_int(payload.get("next_run_at_ms") or payload.get("nextRunAtMs")),
last_run_at_ms=_optional_int(payload.get("last_run_at_ms") or payload.get("lastRunAtMs")),
last_status=_optional_str(payload.get("last_status") or payload.get("lastStatus")), # type: ignore[arg-type]
last_error=_optional_str(payload.get("last_error") or payload.get("lastError")),
delete_after_run=bool(payload.get("delete_after_run") or payload.get("deleteAfterRun") or False),
history=[
CronRunRecord.from_dict(item)
for item in payload.get("history") or []
if isinstance(item, dict)
],
)
@dataclass(slots=True)
class CronExecutionResult:
response: str | None = None
task_id: str | None = None
run_id: str | None = None
notification_session_id: str | None = None
mode: CronPayloadMode = "notification"
def _schedule_display(schedule: CronSchedule) -> str:
if schedule.kind == "every":
seconds = int((schedule.every_ms or 0) / 1000)
return f"every {seconds}s"
if schedule.kind == "cron":
return schedule.expr or "cron"
return "one-time"
def _optional_str(value: Any) -> str | None:
if value in (None, ""):
return None
return str(value)
def _optional_int(value: Any) -> int | None:
if value in (None, ""):
return None
def _payload_mode(value: Any, *, default: CronPayloadMode = "notification") -> CronPayloadMode:
if value in (None, ""):
return default
cleaned = str(value or "").strip().lower()
if cleaned == "task":
return "task"
return "notification"
try:
return int(value)
except (TypeError, ValueError):
return None