feat(app-instance): 集成Beaver后端并更新配置管理
集成新的Beaver后端服务到应用实例中,替换原有的nanobot实现。 主要变更包括: - 在Dockerfile和环境配置中添加Beaver相关路径和配置变量 - 更新工作目录结构从.nanobot到.beaver - 实现Beaver引擎加载器,支持配置文件加载和工具组装 - 添加内置工具如ListDirectoryTool、ReadFileTool、SearchFilesTool - 更新消息处理流程,支持通道适配器和网关模式 - 重构技能系统,支持显式工具提示和嵌入式检索 - 改进错误处理和生命周期管理 此变更使应用实例能够使用统一的Beaver后端进行AI代理运行时管理。
This commit is contained in:
@ -1,2 +1,13 @@
|
||||
"""Configuration models and loaders."""
|
||||
|
||||
from .loader import default_config_path, load_config
|
||||
from .schema import AgentDefaultsConfig, BeaverConfig, EmbeddingConfig, ProviderConfig
|
||||
|
||||
__all__ = [
|
||||
"AgentDefaultsConfig",
|
||||
"BeaverConfig",
|
||||
"EmbeddingConfig",
|
||||
"ProviderConfig",
|
||||
"default_config_path",
|
||||
"load_config",
|
||||
]
|
||||
|
||||
127
app-instance/backend/beaver/foundation/config/loader.py
Normal file
127
app-instance/backend/beaver/foundation/config/loader.py
Normal file
@ -0,0 +1,127 @@
|
||||
"""Config loader for per-sandbox Beaver runtime settings."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from .schema import AgentDefaultsConfig, BeaverConfig, EmbeddingConfig, ProviderConfig
|
||||
|
||||
|
||||
def default_config_path(*, workspace: str | Path | None = None) -> Path:
|
||||
"""Resolve the default config path for a single-user sandbox instance.
|
||||
|
||||
Priority:
|
||||
1. `BEAVER_CONFIG_PATH`
|
||||
2. `NANOBOT_CONFIG_PATH` for compatibility during migration
|
||||
3. `BEAVER_HOME/config.json`
|
||||
4. `NANOBOT_HOME/config.json` for migration compatibility
|
||||
5. `<workspace>/.beaver/config.json`
|
||||
6. `./.beaver/config.json`
|
||||
"""
|
||||
|
||||
explicit = os.getenv("BEAVER_CONFIG_PATH") or os.getenv("NANOBOT_CONFIG_PATH")
|
||||
if explicit:
|
||||
return Path(explicit).expanduser()
|
||||
|
||||
beaver_home = os.getenv("BEAVER_HOME")
|
||||
if beaver_home:
|
||||
return Path(beaver_home).expanduser() / "config.json"
|
||||
|
||||
nanobot_home = os.getenv("NANOBOT_HOME")
|
||||
if nanobot_home:
|
||||
return Path(nanobot_home).expanduser() / "config.json"
|
||||
|
||||
root = Path(workspace).expanduser() if workspace is not None else Path.cwd()
|
||||
return root / ".beaver" / "config.json"
|
||||
|
||||
|
||||
def load_config(
|
||||
*,
|
||||
workspace: str | Path | None = None,
|
||||
config_path: str | Path | None = None,
|
||||
) -> BeaverConfig:
|
||||
"""Load backend config; missing config is treated as an empty config."""
|
||||
|
||||
path = Path(config_path).expanduser() if config_path is not None else default_config_path(workspace=workspace)
|
||||
if not path.exists():
|
||||
return BeaverConfig(config_path=path)
|
||||
|
||||
data = json.loads(path.read_text(encoding="utf-8"))
|
||||
if not isinstance(data, dict):
|
||||
raise ValueError(f"Beaver config must be a JSON object: {path}")
|
||||
|
||||
return BeaverConfig(
|
||||
agents_defaults=_parse_agent_defaults(data),
|
||||
providers=_parse_providers(data.get("providers")),
|
||||
embedding=_parse_embedding(data),
|
||||
config_path=path,
|
||||
)
|
||||
|
||||
|
||||
def _parse_agent_defaults(data: dict[str, Any]) -> AgentDefaultsConfig:
|
||||
agents = _as_dict(data.get("agents"))
|
||||
defaults = _as_dict(agents.get("defaults"))
|
||||
return AgentDefaultsConfig(
|
||||
workspace=_string(defaults.get("workspace") or data.get("workspace")),
|
||||
model=_string(defaults.get("model") or data.get("model")),
|
||||
provider=_string(defaults.get("provider") or data.get("provider")),
|
||||
embedding_model=_string(defaults.get("embeddingModel") or defaults.get("embedding_model") or data.get("embeddingModel")),
|
||||
)
|
||||
|
||||
|
||||
def _parse_providers(raw: Any) -> dict[str, ProviderConfig]:
|
||||
providers: dict[str, ProviderConfig] = {}
|
||||
for name, payload in _as_dict(raw).items():
|
||||
if not isinstance(payload, dict):
|
||||
continue
|
||||
providers[str(name)] = ProviderConfig(
|
||||
api_key=_string(payload.get("apiKey") or payload.get("api_key")),
|
||||
api_base=_string(payload.get("apiBase") or payload.get("api_base") or payload.get("baseUrl") or payload.get("base_url")),
|
||||
extra_headers=_string_dict(payload.get("extraHeaders") or payload.get("extra_headers") or payload.get("headers")),
|
||||
request_timeout_seconds=_float(
|
||||
payload.get("requestTimeoutSeconds")
|
||||
or payload.get("request_timeout_seconds")
|
||||
or payload.get("timeout")
|
||||
),
|
||||
)
|
||||
return providers
|
||||
|
||||
|
||||
def _parse_embedding(data: dict[str, Any]) -> EmbeddingConfig:
|
||||
raw = _as_dict(data.get("embedding") or data.get("embeddings"))
|
||||
return EmbeddingConfig(
|
||||
provider=_string(raw.get("provider") or raw.get("provider_name")),
|
||||
model=_string(raw.get("model") or data.get("embeddingModel") or data.get("embedding_model")),
|
||||
api_key=_string(raw.get("apiKey") or raw.get("api_key")),
|
||||
api_base=_string(raw.get("apiBase") or raw.get("api_base") or raw.get("baseUrl") or raw.get("base_url")),
|
||||
extra_headers=_string_dict(raw.get("extraHeaders") or raw.get("extra_headers") or raw.get("headers")),
|
||||
request_timeout_seconds=_float(
|
||||
raw.get("requestTimeoutSeconds") or raw.get("request_timeout_seconds") or raw.get("timeout")
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _as_dict(value: Any) -> dict[str, Any]:
|
||||
return value if isinstance(value, dict) else {}
|
||||
|
||||
|
||||
def _string(value: Any) -> str | None:
|
||||
if value is None:
|
||||
return None
|
||||
value = str(value).strip()
|
||||
return value or None
|
||||
|
||||
|
||||
def _string_dict(value: Any) -> dict[str, str]:
|
||||
if not isinstance(value, dict):
|
||||
return {}
|
||||
return {str(key): str(item) for key, item in value.items() if item is not None}
|
||||
|
||||
|
||||
def _float(value: Any) -> float | None:
|
||||
if value in (None, ""):
|
||||
return None
|
||||
return float(value)
|
||||
136
app-instance/backend/beaver/foundation/config/schema.py
Normal file
136
app-instance/backend/beaver/foundation/config/schema.py
Normal file
@ -0,0 +1,136 @@
|
||||
"""Runtime configuration schema for Beaver sandbox instances."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ProviderConfig:
|
||||
"""One configured LLM provider profile."""
|
||||
|
||||
api_key: str | None = None
|
||||
api_base: str | None = None
|
||||
extra_headers: dict[str, str] = field(default_factory=dict)
|
||||
request_timeout_seconds: float | None = None
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class AgentDefaultsConfig:
|
||||
"""Default agent settings for this sandbox instance."""
|
||||
|
||||
workspace: str | None = None
|
||||
model: str | None = None
|
||||
provider: str | None = None
|
||||
embedding_model: str | None = None
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class EmbeddingConfig:
|
||||
"""Optional dedicated embedding model settings."""
|
||||
|
||||
provider: str | None = None
|
||||
model: str | None = None
|
||||
api_key: str | None = None
|
||||
api_base: str | None = None
|
||||
extra_headers: dict[str, str] = field(default_factory=dict)
|
||||
request_timeout_seconds: float | None = None
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class BeaverConfig:
|
||||
"""Config loaded once per backend sandbox instance."""
|
||||
|
||||
agents_defaults: AgentDefaultsConfig = field(default_factory=AgentDefaultsConfig)
|
||||
providers: dict[str, ProviderConfig] = field(default_factory=dict)
|
||||
embedding: EmbeddingConfig = field(default_factory=EmbeddingConfig)
|
||||
config_path: Path | None = None
|
||||
|
||||
@property
|
||||
def default_model(self) -> str | None:
|
||||
return _clean(self.agents_defaults.model)
|
||||
|
||||
@property
|
||||
def default_embedding_model(self) -> str:
|
||||
return _clean(self.embedding.model) or _clean(self.agents_defaults.embedding_model) or "text-embedding-v4"
|
||||
|
||||
def resolve_provider_target(
|
||||
self,
|
||||
*,
|
||||
model: str | None = None,
|
||||
provider_name: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Resolve model/provider credentials from instance config.
|
||||
|
||||
Request-level model/provider overrides are allowed, but credentials are still
|
||||
read from backend config, not from Web/channel payloads.
|
||||
"""
|
||||
|
||||
resolved_model = _clean(model) or self.default_model
|
||||
resolved_provider = _clean(provider_name) or self._infer_provider(resolved_model)
|
||||
provider_cfg = self.providers.get(resolved_provider or "") if resolved_provider else None
|
||||
payload: dict[str, Any] = {
|
||||
"model": resolved_model,
|
||||
"provider_name": resolved_provider,
|
||||
}
|
||||
if provider_cfg is not None:
|
||||
payload.update(
|
||||
{
|
||||
"api_key": provider_cfg.api_key,
|
||||
"api_base": provider_cfg.api_base,
|
||||
"extra_headers": dict(provider_cfg.extra_headers),
|
||||
"request_timeout_seconds": provider_cfg.request_timeout_seconds,
|
||||
}
|
||||
)
|
||||
return {key: value for key, value in payload.items() if value not in (None, "", {})}
|
||||
|
||||
def resolve_embedding_target(self) -> dict[str, Any] | None:
|
||||
"""Return an explicit embedding target when configured."""
|
||||
|
||||
has_explicit_embedding = any(
|
||||
[
|
||||
_clean(self.embedding.provider),
|
||||
_clean(self.embedding.api_key),
|
||||
_clean(self.embedding.api_base),
|
||||
self.embedding.extra_headers,
|
||||
self.embedding.request_timeout_seconds is not None,
|
||||
]
|
||||
)
|
||||
if not has_explicit_embedding:
|
||||
return None
|
||||
|
||||
provider_cfg = self.providers.get(_clean(self.embedding.provider) or "")
|
||||
payload: dict[str, Any] = {
|
||||
"provider": _clean(self.embedding.provider),
|
||||
"model": self.default_embedding_model,
|
||||
"api_key": _clean(self.embedding.api_key) or (provider_cfg.api_key if provider_cfg else None),
|
||||
"api_base": _clean(self.embedding.api_base) or (provider_cfg.api_base if provider_cfg else None),
|
||||
"extra_headers": self.embedding.extra_headers or (dict(provider_cfg.extra_headers) if provider_cfg else {}),
|
||||
"request_timeout_seconds": self.embedding.request_timeout_seconds
|
||||
or (provider_cfg.request_timeout_seconds if provider_cfg else None),
|
||||
}
|
||||
return {key: value for key, value in payload.items() if value not in (None, "", {})}
|
||||
|
||||
def _infer_provider(self, model: str | None) -> str | None:
|
||||
configured_provider = _clean(self.agents_defaults.provider)
|
||||
if configured_provider:
|
||||
return configured_provider
|
||||
|
||||
if model and "/" in model:
|
||||
prefix = model.split("/", 1)[0]
|
||||
if prefix in self.providers:
|
||||
return prefix
|
||||
|
||||
if len(self.providers) == 1:
|
||||
return next(iter(self.providers))
|
||||
return None
|
||||
|
||||
|
||||
def _clean(value: str | None) -> str | None:
|
||||
if value is None:
|
||||
return None
|
||||
value = str(value).strip()
|
||||
return value or None
|
||||
|
||||
205
app-instance/backend/beaver/foundation/embedding.py
Normal file
205
app-instance/backend/beaver/foundation/embedding.py
Normal file
@ -0,0 +1,205 @@
|
||||
"""Shared embedding-based semantic retrieval utilities."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
from typing import Any
|
||||
from urllib import request
|
||||
|
||||
|
||||
class EmbeddingRetriever:
|
||||
"""Use an OpenAI-compatible embeddings API to rank lightweight candidates."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
api_key_env: str = "OPENAI_API_KEY",
|
||||
api_base_env: str = "OPENAI_API_BASE",
|
||||
model: str = "text-embedding-v4",
|
||||
timeout_seconds: float = 20.0,
|
||||
) -> None:
|
||||
self.api_key_env = api_key_env
|
||||
self.api_base_env = api_base_env
|
||||
self.model = model
|
||||
self.timeout_seconds = timeout_seconds
|
||||
|
||||
async def retrieve(
|
||||
self,
|
||||
*,
|
||||
query: str,
|
||||
candidates: list[dict[str, str]],
|
||||
top_k: int,
|
||||
api_key: str | None = None,
|
||||
api_base: str | None = None,
|
||||
model: str | None = None,
|
||||
extra_headers: dict[str, str] | None = None,
|
||||
timeout_seconds: float | None = None,
|
||||
fallback_top_k: int | None = None,
|
||||
) -> list[dict[str, str]]:
|
||||
"""Return candidates ordered by embedding similarity.
|
||||
|
||||
If embedding config is missing or the request fails, return the original
|
||||
candidate order. This keeps retrieval non-blocking for the main run.
|
||||
"""
|
||||
|
||||
if not candidates or top_k <= 0:
|
||||
return []
|
||||
|
||||
fallback = self._fallback_candidates(candidates, fallback_top_k=fallback_top_k)
|
||||
resolved_api_key = api_key or os.getenv(self.api_key_env)
|
||||
resolved_api_base = api_base or os.getenv(self.api_base_env)
|
||||
if not resolved_api_key or not resolved_api_base:
|
||||
return fallback
|
||||
|
||||
try:
|
||||
query_embedding = await self._embed_texts(
|
||||
api_key=resolved_api_key,
|
||||
api_base=resolved_api_base,
|
||||
texts=[query],
|
||||
model=model or self.model,
|
||||
extra_headers=extra_headers,
|
||||
timeout_seconds=timeout_seconds,
|
||||
)
|
||||
candidate_embeddings = await self._embed_texts(
|
||||
api_key=resolved_api_key,
|
||||
api_base=resolved_api_base,
|
||||
texts=[self._candidate_text(item) for item in candidates],
|
||||
model=model or self.model,
|
||||
extra_headers=extra_headers,
|
||||
timeout_seconds=timeout_seconds,
|
||||
)
|
||||
except Exception:
|
||||
return fallback
|
||||
|
||||
if not query_embedding or not query_embedding[0] or len(candidate_embeddings) != len(candidates):
|
||||
return fallback
|
||||
|
||||
query_vector = query_embedding[0]
|
||||
scored: list[tuple[float, dict[str, str]]] = []
|
||||
for candidate, vector in zip(candidates, candidate_embeddings, strict=False):
|
||||
if vector:
|
||||
scored.append((self._cosine_similarity(query_vector, vector), candidate))
|
||||
|
||||
scored.sort(key=lambda item: item[0], reverse=True)
|
||||
return [item[1] for item in scored[:top_k]]
|
||||
|
||||
async def _embed_texts(
|
||||
self,
|
||||
*,
|
||||
api_key: str,
|
||||
api_base: str,
|
||||
texts: list[str],
|
||||
model: str,
|
||||
extra_headers: dict[str, str] | None = None,
|
||||
timeout_seconds: float | None = None,
|
||||
) -> list[list[float]]:
|
||||
all_vectors: list[list[float]] = []
|
||||
endpoint = self._normalize_embeddings_endpoint(api_base)
|
||||
for start in range(0, len(texts), 10):
|
||||
batch = texts[start:start + 10]
|
||||
payload = await self._post_embeddings(
|
||||
endpoint=endpoint,
|
||||
api_key=api_key,
|
||||
model=model,
|
||||
texts=batch,
|
||||
extra_headers=extra_headers,
|
||||
timeout_seconds=timeout_seconds,
|
||||
)
|
||||
embeddings = payload.get("data") or []
|
||||
embeddings = sorted(embeddings, key=lambda item: item.get("index", 0))
|
||||
all_vectors.extend([list(item.get("embedding") or []) for item in embeddings])
|
||||
return all_vectors
|
||||
|
||||
async def _post_embeddings(
|
||||
self,
|
||||
*,
|
||||
endpoint: str,
|
||||
api_key: str,
|
||||
model: str,
|
||||
texts: list[str],
|
||||
extra_headers: dict[str, str] | None = None,
|
||||
timeout_seconds: float | None = None,
|
||||
) -> dict[str, Any]:
|
||||
return await asyncio.to_thread(
|
||||
self._post_embeddings_sync,
|
||||
endpoint=endpoint,
|
||||
api_key=api_key,
|
||||
model=model,
|
||||
texts=texts,
|
||||
extra_headers=extra_headers,
|
||||
timeout_seconds=timeout_seconds,
|
||||
)
|
||||
|
||||
def _post_embeddings_sync(
|
||||
self,
|
||||
*,
|
||||
endpoint: str,
|
||||
api_key: str,
|
||||
model: str,
|
||||
texts: list[str],
|
||||
extra_headers: dict[str, str] | None = None,
|
||||
timeout_seconds: float | None = None,
|
||||
) -> dict[str, Any]:
|
||||
body = json.dumps(
|
||||
{
|
||||
"model": model,
|
||||
"input": texts if len(texts) > 1 else texts[0],
|
||||
"encoding_format": "float",
|
||||
}
|
||||
).encode("utf-8")
|
||||
req = request.Request(
|
||||
endpoint,
|
||||
data=body,
|
||||
headers={
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
**(extra_headers or {}),
|
||||
},
|
||||
method="POST",
|
||||
)
|
||||
with request.urlopen(req, timeout=timeout_seconds or self.timeout_seconds) as response:
|
||||
return json.loads(response.read().decode("utf-8"))
|
||||
|
||||
@staticmethod
|
||||
def _fallback_candidates(
|
||||
candidates: list[dict[str, str]],
|
||||
*,
|
||||
fallback_top_k: int | None,
|
||||
) -> list[dict[str, str]]:
|
||||
if fallback_top_k is None:
|
||||
return list(candidates)
|
||||
if fallback_top_k <= 0:
|
||||
return []
|
||||
return candidates[:fallback_top_k]
|
||||
|
||||
@staticmethod
|
||||
def _candidate_text(candidate: dict[str, str]) -> str:
|
||||
parts = [
|
||||
(candidate.get("name") or "").strip(),
|
||||
(candidate.get("description") or "").strip(),
|
||||
(candidate.get("input_schema") or "").strip(),
|
||||
]
|
||||
return "\n".join(part for part in parts if part)
|
||||
|
||||
@staticmethod
|
||||
def _normalize_embeddings_endpoint(api_base: str) -> str:
|
||||
base = api_base.rstrip("/")
|
||||
if base.endswith("/embeddings"):
|
||||
return base
|
||||
if base.endswith("/v1"):
|
||||
return f"{base}/embeddings"
|
||||
return f"{base}/v1/embeddings"
|
||||
|
||||
@staticmethod
|
||||
def _cosine_similarity(left: list[float], right: list[float]) -> float:
|
||||
if not left or not right or len(left) != len(right):
|
||||
return -1.0
|
||||
dot = sum(a * b for a, b in zip(left, right, strict=False))
|
||||
left_norm = math.sqrt(sum(a * a for a in left))
|
||||
right_norm = math.sqrt(sum(b * b for b in right))
|
||||
if left_norm == 0 or right_norm == 0:
|
||||
return -1.0
|
||||
return dot / (left_norm * right_norm)
|
||||
Reference in New Issue
Block a user