feat(engine): 添加MCP连接管理和工具集成功能

- 集成MCP连接管理器,支持MCP服务器连接
- 添加多种内置工具:ClarifyTool、CronTool、DelegateTool、ExecuteCodeTool、
  PatchFileTool、ProcessTool、SendMessageTool、SpawnTool、TerminalTool、
  TodoTool、WebFetchTool、WebSearchTool、WriteFileTool等
- 实现工具注册和装配功能
- 添加技能选择上下文参数
- 支持思考模式控制参数thinking_enabled

feat(coordinator): 重构任务执行计划器参数命名

- 将learning_candidate_enabled重命名为allow_candidate_generation
- 更新TeamGraphScheduler中的参数传递
- 修改LocalAgentRunner中的相关参数处理
- 更新README文档中的相应描述

refactor(context): 标准化工具调用参数格式

- 添加_json导入用于参数序列化
- 实现_provider_tool_calls方法标准化OpenAI兼容的工具调用载荷
- 修复工具调用中参数非字符串类型的序列化问题

refactor(session): 优化消息历史记录过滤逻辑

- 修改get_messages_as_conversation为基于运行状态过滤消息
- 排除未完成、失败或错误结束的运行记录
- 改进对话历史的可见性控制机制

fix(store): 修复FTS索引重建逻辑

- 添加异常处理防止FTS索引创建失败
- 实现_rebuild_fts_index方法重新构建全文搜索索引
- 优化索引触发器和表的维护流程
This commit is contained in:
2026-05-14 09:43:48 +08:00
parent 8a12c30141
commit 30ab74ffb2
149 changed files with 12293 additions and 2812 deletions

View File

@ -1,6 +1,6 @@
"""Application services for Beaver."""
__all__ = ["AgentService", "MemoryService"]
__all__ = ["AgentService", "CronService", "MemoryService"]
def __getattr__(name: str):
@ -12,4 +12,8 @@ def __getattr__(name: str):
from .memory_service import MemoryService
return MemoryService
if name == "CronService":
from .cron_service import CronService
return CronService
raise AttributeError(name)

View File

@ -21,9 +21,13 @@ from beaver.coordinator.models import ExecutionNode, TeamRunResult
from beaver.engine import AgentLoop, AgentProfile, AgentRunResult, EngineLoader
from beaver.engine.providers import make_provider_bundle
from beaver.foundation.events import InboundMessage, OutboundMessage
from beaver.foundation.models import CronJob, CronRunRecord
from beaver.tasks import MainAgentRouter, TaskExecutionPlan, TaskRecord, ValidationResult
NOTIFICATION_SESSION_ID = "notify:default:scheduled"
class AgentService:
"""面向 interfaces 的统一 agent 运行入口。
@ -50,15 +54,24 @@ class AgentService:
self._loop: AgentLoop | None = None
self._run_task: asyncio.Task[None] | None = None
self._main_agent_router = MainAgentRouter()
self._runtime_services: dict[str, Any] = {}
def create_loop(self) -> AgentLoop:
"""创建并缓存当前 service 使用的 AgentLoop。"""
if self._loop is None:
self._loop = AgentLoop(profile=self.profile, loader=self.loader)
self._loop.runtime_services.update(self._runtime_services)
self._loop.boot()
return self._loop
def register_runtime_service(self, name: str, service: Any) -> None:
"""Expose process-level services to tools during agent runs."""
self._runtime_services[name] = service
if self._loop is not None:
self._loop.runtime_services[name] = service
@property
def has_loop(self) -> bool:
"""当前 service 是否已经创建过 loop。"""
@ -196,6 +209,191 @@ class AgentService:
loop = self.create_loop()
return await self._process_with_main_agent(message, runner=loop.submit_direct, kwargs=kwargs)
async def run_scheduled_task(
self,
message: str,
*,
session_id: str,
cron_job_id: str,
cron_job_name: str,
scheduled_run_id: str | None = None,
requires_followup: bool = False,
) -> AgentRunResult:
"""Run a cron trigger as a normal internal Task.
Scheduled jobs are product-level Tasks, not hidden one-off agent turns.
This entry bypasses the main-agent classifier and forces Task mode so
every trigger produces a TaskRecord, validation, feedback state, and a
run_id that the scheduled-task history can link to.
"""
loaded = self.create_loop().boot()
task_service = self._require_loaded(loaded, "task_service")
loop = self.create_loop()
task = task_service.create_task(
session_id=session_id,
description=message,
creator="cron",
metadata={
"source": "scheduled_cron",
"cron_job_id": cron_job_id,
"cron_job_name": cron_job_name,
"scheduled_run_id": scheduled_run_id,
"user_engaged": False,
"requires_followup": requires_followup,
},
)
execution_context = (
"This turn was triggered automatically by a scheduled task.\n\n"
f"Cron Job ID: {cron_job_id}\n"
f"Cron Job Name: {cron_job_name}\n"
f"Scheduled Run ID: {scheduled_run_id or 'unknown'}\n"
"Run it as a normal Beaver Task. Do not ask the user for confirmation; "
"execute the task and report the concrete outcome."
)
runner = loop.submit_direct if self.is_running else loop.process_direct
result = await self._run_task_mode(
message,
runner=runner,
task=task,
kwargs={
"session_id": session_id,
"source": "cron",
"user_id": "cron",
"title": cron_job_name,
"execution_context": execution_context,
},
)
loaded = self.create_loop().boot()
session_manager = self._require_loaded(loaded, "session_manager")
session_manager.update_latest_assistant_event_payload(
result.session_id,
result.run_id,
{
"message_type": "scheduled_reply",
"scheduled_job_id": job.id,
"scheduled_run_id": run.scheduled_run_id,
"cron_job_name": job.name,
"mode": "notification",
},
)
return result
async def run_scheduled_notification(
self,
message: str,
*,
session_id: str = NOTIFICATION_SESSION_ID,
cron_job_id: str,
cron_job_name: str,
scheduled_run_id: str,
) -> AgentRunResult:
"""Run a cron trigger as a notification result, not as an active Task."""
loop = self.create_loop()
loaded = loop.boot()
session_manager = self._require_loaded(loaded, "session_manager")
runner = loop.submit_direct if self.is_running else loop.process_direct
execution_context = (
"This turn was triggered automatically by a scheduled notification.\n\n"
f"Cron Job ID: {cron_job_id}\n"
f"Cron Job Name: {cron_job_name}\n"
f"Scheduled Run ID: {scheduled_run_id}\n"
"Generate the notification content directly for the user. Do not ask for confirmation."
)
result = await runner(
message,
session_id=session_id,
source="notification",
user_id="cron",
title=cron_job_name,
execution_context=execution_context,
)
session_manager.update_latest_assistant_event_payload(
result.session_id,
result.run_id,
{
"message_type": "scheduled_result",
"scheduled_job_id": cron_job_id,
"scheduled_run_id": scheduled_run_id,
"cron_job_name": cron_job_name,
"mode": "notification",
},
)
return result
def engage_scheduled_run(
self,
*,
job: CronJob,
run: CronRunRecord,
intent: str = "revise_once",
thinking_enabled: bool | None = None,
) -> TaskRecord:
"""Create or mark the Task that lets the user work on a scheduled result."""
loaded = self.create_loop().boot()
task_service = self._require_loaded(loaded, "task_service")
if run.task_id:
existing = task_service.get_task(run.task_id)
if existing is not None:
existing.metadata["user_engaged"] = True
existing.metadata["engage_intent"] = intent
task_service.store.upsert_task(existing)
return existing
task = task_service.create_task(
session_id=run.notification_session_id or NOTIFICATION_SESSION_ID,
description=f"修改定时通知:{job.name}",
creator="cron",
metadata={
"source": "scheduled_run",
"cron_job_id": job.id,
"cron_job_name": job.name,
"scheduled_run_id": run.scheduled_run_id,
"scheduled_output": run.output,
"user_engaged": True,
"engage_intent": intent,
},
)
return task
async def submit_scheduled_reply(
self,
message: str,
*,
job: CronJob,
run: CronRunRecord,
intent: str = "revise_once",
) -> AgentRunResult:
task = self.engage_scheduled_run(job=job, run=run, intent=intent)
loop = self.create_loop()
runner = loop.submit_direct if self.is_running else loop.process_direct
execution_context = (
"The user is replying to a scheduled notification result.\n\n"
f"Cron Job ID: {job.id}\n"
f"Cron Job Name: {job.name}\n"
f"Scheduled Run ID: {run.scheduled_run_id}\n"
f"Engagement intent: {intent}\n"
f"Original scheduled instruction: {job.payload.message}\n"
f"Original notification output:\n{run.output or ''}\n\n"
"Handle this as a Task continuation. If the intent is update_future, explain the durable change "
"that should apply to future notifications."
)
return await self._run_task_mode(
message,
runner=runner,
task=task,
kwargs={
"session_id": task.session_id,
"source": "notification",
"user_id": "web",
"title": job.name,
"execution_context": execution_context,
"thinking_enabled": thinking_enabled,
},
)
async def submit_feedback(
self,
*,
@ -269,19 +467,51 @@ class AgentService:
generated_candidates = []
validation = ValidationResult.from_dict(updated.validation_result)
if not already_recorded:
run_memory_store = self._require_loaded(loaded, "run_memory_store")
feedback_payload = {
"feedback_type": normalized,
"comment": comment or "",
"task_status": updated.status,
}
run_memory_store.update_run_record(
run_id,
success=normalized == "satisfied",
feedback=feedback_payload,
)
run_memory_store.update_skill_effects_for_run(
run_id,
success=normalized == "satisfied",
feedback_score=self._feedback_score_for_learning(normalized, validation),
notes=(comment or normalized).strip(),
)
skill_learning_service = self._require_loaded(loaded, "skill_learning_service")
skill_learning_service.rescore_skill_versions()
if already_recorded:
generated_candidates = []
elif normalized == "satisfied" and validation is not None and validation.accepted:
skill_learning_service = self._require_loaded(loaded, "skill_learning_service")
generated_candidates = [item.to_dict() for item in skill_learning_service.build_learning_candidates()]
generated_candidates = [
item.to_dict()
for item in skill_learning_service.build_learning_candidates_for_task(
updated.task_id,
trigger_run_id=run_id,
)
]
elif normalized == "abandon":
memory_service = self._require_loaded(loaded, "memory_service")
memory_service.get_store().add(
"memory",
(
f"Failure memory: task {task.task_id} in session {session_id} was abandoned. "
f"Reason: {(comment or 'not specified').strip()}"
),
session_manager.append_message(
session_id,
run_id=run_id,
role="system",
event_type="task_failure_evidence_recorded",
event_payload={
"task_id": updated.task_id,
"feedback_type": normalized,
"comment": comment or "",
"task_status": updated.status,
"durable_memory_written": False,
},
content=(comment or "Task abandoned; retained as run/session failure evidence."),
context_visible=False,
)
return {
@ -302,20 +532,46 @@ class AgentService:
) -> AgentRunResult:
loaded = self.create_loop().boot()
task_service = self._require_loaded(loaded, "task_service")
session_manager = self._require_loaded(loaded, "session_manager")
session_id = kwargs.get("session_id") or uuid4().hex
kwargs = dict(kwargs)
kwargs["session_id"] = session_id
provider_bundle = kwargs.get("provider_bundle") or self._make_provider_bundle_for_task(loaded, kwargs)
kwargs["provider_bundle"] = provider_bundle
router_provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
router_runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
active_task = task_service.get_latest_open_task(session_id)
decision = self._main_agent_router.classify(message, active_task=active_task)
decision = await self._main_agent_router.classify(
message,
active_task=active_task,
provider=router_provider,
model=getattr(router_runtime, "model", None),
recent_messages=session_manager.get_messages_as_conversation(session_id),
thinking_enabled=kwargs.get("thinking_enabled"),
)
if active_task is not None and decision.short_title and not active_task.metadata.get("short_title"):
active_task.metadata["short_title"] = decision.short_title
task_service.store.upsert_task(active_task)
if active_task is not None and decision.closes_task:
task_service.close_task(active_task.task_id, reason=decision.reason)
return await runner(message, **kwargs)
if active_task is not None and decision.abandons_task:
task_service.abandon_task(active_task.task_id, reason=decision.reason)
return await runner(message, **kwargs)
if not decision.is_task:
kwargs["include_skill_assembly"] = False
kwargs["include_tools"] = False
return await runner(message, **kwargs)
task = (
task_service.create_task(
session_id=session_id,
description=message,
metadata={"router_reason": decision.reason},
metadata={
"router_reason": decision.reason,
**({"short_title": decision.short_title} if decision.short_title else {}),
},
)
if active_task is None or decision.starts_new_task
else active_task
@ -420,7 +676,7 @@ class AgentService:
"task_id": task.task_id,
"task_mode": True,
"attempt_index": attempt_index,
"learning_candidate_enabled": False,
"allow_candidate_generation": False,
}
)
if attempt_index == 2 and latest_validation is not None:
@ -433,6 +689,14 @@ class AgentService:
)
elif team_execution_context:
attempt_kwargs["execution_context"] = self._join_context(base_execution_context, team_execution_context)
attempt_kwargs["skill_selection_context"] = self._build_skill_selection_context(
task=task,
user_message=message,
attempt_index=attempt_index,
latest_validation=latest_validation,
plan=plan,
team_summaries=team_summaries,
)
result = await runner(message, **attempt_kwargs)
last_result = result
@ -519,7 +783,7 @@ class AgentService:
parent_session_id=parent_session_id,
parent_run_id=None,
provider_bundle_factory=provider_bundle_factory,
learning_candidate_enabled=False,
allow_candidate_generation=False,
)
return result, None
except Exception as exc:
@ -542,6 +806,93 @@ class AgentService:
return [receipt.skill_name for receipt in record.activated_skills]
return []
@staticmethod
def _feedback_score_for_learning(feedback_type: str, validation: ValidationResult | None) -> float:
if feedback_type == "satisfied":
if validation is not None:
return max(0.0, min(1.0, float(validation.score)))
return 1.0
if feedback_type == "revise":
return 0.5
return 0.0
@staticmethod
def _build_skill_selection_context(
*,
task: TaskRecord,
user_message: str,
attempt_index: int,
latest_validation: ValidationResult | None = None,
plan: TaskExecutionPlan | None = None,
team_summaries: list[str] | None = None,
) -> str:
phase = f"attempt_{attempt_index}"
if latest_validation is not None:
phase = f"revision_attempt_{attempt_index}"
elif plan is not None and plan.is_team:
phase = f"team_synthesis_attempt_{attempt_index}"
sections = [
f"Task goal:\n{task.goal or task.description}",
f"Task description:\n{task.description}",
f"Current user request:\n{user_message}",
f"Execution phase:\n{phase}",
f"Task status:\n{task.status}",
]
if task.constraints:
sections.append("Known constraints:\n" + "\n".join(f"- {item}" for item in task.constraints))
if task.skill_names:
sections.append(
"Previously activated skills (reuse bias, not pinned):\n"
+ "\n".join(f"- {item}" for item in task.skill_names)
)
else:
sections.append("Previously activated skills:\nNone")
if latest_validation is not None:
validation_lines = [
f"accepted: {latest_validation.accepted}",
f"score: {latest_validation.score}",
]
if latest_validation.issues:
validation_lines.append("issues:\n" + "\n".join(f"- {item}" for item in latest_validation.issues))
if latest_validation.missing_requirements:
validation_lines.append(
"missing requirements:\n"
+ "\n".join(f"- {item}" for item in latest_validation.missing_requirements)
)
if latest_validation.recommended_revision_prompt:
validation_lines.append(
"recommended revision:\n"
+ latest_validation.recommended_revision_prompt
)
sections.append("Validation feedback:\n" + "\n".join(validation_lines))
if plan is not None:
plan_lines = [
f"mode: {plan.mode}",
f"reason: {plan.reason}",
]
if plan.final_synthesis_instruction:
plan_lines.append(f"final synthesis instruction: {plan.final_synthesis_instruction}")
if plan.graph is not None:
plan_lines.append(f"strategy: {plan.graph.strategy}")
plan_lines.append(
"nodes:\n"
+ "\n".join(
f"- {node.node_id}: {node.task}"
for node in plan.graph.nodes
)
)
sections.append("Execution plan:\n" + "\n".join(plan_lines))
if team_summaries:
sections.append("Team execution summaries:\n" + "\n\n".join(team_summaries)[:2400])
sections.append(
"Skill selection instruction:\n"
"Prefer reusing previously activated skills when they still match the Task. "
"Select new skills only if the current request, revision, or execution plan needs a different capability. "
"If no published skill matches, return [] and let the run continue without skills."
)
return "\n\n".join(section for section in sections if section.strip())
@staticmethod
def _run_excerpt(session_manager: Any, session_id: str, run_id: str) -> str:
lines = []
@ -611,8 +962,8 @@ class AgentService:
skill.name for skill in node.inherited_pinned_skill_contexts
]
payload["skill_query"] = node.agent.metadata.get("skill_query")
payload["generated_skill_draft_id"] = node.agent.metadata.get("generated_skill_draft_id")
payload["generated_skill_name"] = node.agent.metadata.get("generated_skill_name")
payload["ephemeral_guidance_id"] = node.agent.metadata.get("ephemeral_guidance_id")
payload["ephemeral_guidance_name"] = node.agent.metadata.get("ephemeral_guidance_name")
payload["ephemeral_used"] = bool(node.inherited_pinned_skill_contexts)
payloads.append(payload)
return payloads

View File

@ -0,0 +1,508 @@
"""Cron scheduling service for Beaver scheduled Tasks."""
from __future__ import annotations
import asyncio
import inspect
import json
import os
import re
import tempfile
import threading
import time
from collections.abc import Awaitable, Callable
from datetime import datetime, timedelta
from pathlib import Path
from typing import Any
from uuid import uuid4
from zoneinfo import ZoneInfo
from beaver.foundation.models import CronExecutionResult, CronJob, CronPayload, CronRunRecord, CronSchedule
try: # pragma: no cover - exercised through cron schedule tests when installed
from croniter import croniter
except ModuleNotFoundError: # pragma: no cover - defensive dependency guard
croniter = None # type: ignore[assignment]
CronCallback = Callable[..., Awaitable[CronExecutionResult | str | None]]
_DURATION_RE = re.compile(
r"^(\d+)\s*(s|sec|secs|second|seconds|m|min|mins|minute|minutes|h|hr|hrs|hour|hours|d|day|days)$",
re.IGNORECASE,
)
_CRON_FIELD_RE = re.compile(r"^[\d\*\?,\-/LW#]+$", re.IGNORECASE)
_MAX_HISTORY = 20
class CronService:
"""Persistent single-timer scheduler.
Hermes' cron implementation stores jobs as JSON and ticks safely in the
background. Beaver keeps that shape, but the callback is required to route
agent work through Task mode so every scheduled trigger is visible as a
normal Task.
"""
def __init__(self, store_path: str | Path, *, on_job: CronCallback | None = None) -> None:
self.store_path = Path(store_path)
self.on_job = on_job
self._jobs: list[CronJob] | None = None
self._lock = threading.Lock()
self._running = False
self._timer_task: asyncio.Task[None] | None = None
async def start(self) -> None:
self._running = True
self._load_jobs()
self._recompute_next_runs()
self._save_jobs()
self._arm_timer()
def stop(self) -> None:
self._running = False
if self._timer_task is not None:
self._timer_task.cancel()
self._timer_task = None
def status(self) -> dict[str, Any]:
jobs = self.list_jobs(include_disabled=True)
return {
"enabled": self._running,
"jobs": len(jobs),
"next_wake_at_ms": self._next_wake_ms(),
}
def list_jobs(self, *, include_disabled: bool = False) -> list[CronJob]:
jobs = list(self._load_jobs())
if not include_disabled:
jobs = [job for job in jobs if job.enabled]
return sorted(jobs, key=lambda job: job.next_run_at_ms or 9_999_999_999_999)
def get_job(self, job_id: str) -> CronJob | None:
for job in self._load_jobs():
if job.id == job_id:
return job
return None
def add_job(
self,
*,
name: str,
message: str,
schedule: CronSchedule,
session_key: str | None = None,
payload_kind: str = "agent_turn",
mode: str = "notification",
requires_followup: bool = False,
deliver: bool = False,
channel: str | None = None,
to: str | None = None,
delete_after_run: bool = False,
) -> CronJob:
cleaned_name = name.strip() or message[:50].strip() or "scheduled task"
cleaned_message = message.strip()
if not cleaned_message:
raise ValueError("message is required")
validate_schedule(schedule)
now = _now_ms()
job = CronJob(
id=uuid4().hex[:12],
name=cleaned_name,
enabled=True,
schedule=schedule,
payload=CronPayload(
kind=payload_kind if payload_kind in {"agent_turn", "system_event"} else "agent_turn", # type: ignore[arg-type]
mode="task" if mode == "task" else "notification",
message=cleaned_message,
session_key=session_key,
requires_followup=requires_followup,
deliver=deliver,
channel=channel,
to=to,
),
next_run_at_ms=compute_next_run(schedule, now_ms=now),
created_at_ms=now,
updated_at_ms=now,
delete_after_run=delete_after_run,
)
with self._lock:
jobs = self._load_jobs_unlocked()
jobs.append(job)
self._jobs = jobs
self._save_jobs_unlocked()
self._arm_timer()
return job
def update_enabled(self, job_id: str, enabled: bool) -> CronJob | None:
with self._lock:
jobs = self._load_jobs_unlocked()
for job in jobs:
if job.id != job_id:
continue
job.enabled = bool(enabled)
job.updated_at_ms = _now_ms()
job.next_run_at_ms = compute_next_run(job.schedule) if job.enabled else None
self._save_jobs_unlocked()
self._arm_timer()
return job
return None
def remove_job(self, job_id: str) -> bool:
with self._lock:
jobs = self._load_jobs_unlocked()
next_jobs = [job for job in jobs if job.id != job_id]
if len(next_jobs) == len(jobs):
return False
self._jobs = next_jobs
self._save_jobs_unlocked()
self._arm_timer()
return True
async def run_job(self, job_id: str, *, force: bool = False) -> bool:
job = self.get_job(job_id)
if job is None:
return False
if not force and not job.enabled:
return False
await self._execute_job(job)
self._save_jobs()
self._arm_timer()
return True
def list_runs(self) -> list[tuple[CronJob, CronRunRecord]]:
runs: list[tuple[CronJob, CronRunRecord]] = []
for job in self.list_jobs(include_disabled=True):
runs.extend((job, run) for run in job.history)
return sorted(runs, key=lambda item: item[1].started_at_ms, reverse=True)
def get_run(self, scheduled_run_id: str) -> tuple[CronJob, CronRunRecord] | None:
for job, run in self.list_runs():
if run.scheduled_run_id == scheduled_run_id:
return job, run
return None
def mark_run_engaged(
self,
scheduled_run_id: str,
*,
task_id: str,
intent: str,
) -> tuple[CronJob, CronRunRecord] | None:
with self._lock:
jobs = self._load_jobs_unlocked()
for job in jobs:
for run in job.history:
if run.scheduled_run_id != scheduled_run_id:
continue
run.engaged = True
run.engaged_at_ms = _now_ms()
run.engage_intent = intent
run.task_id = task_id
job.updated_at_ms = _now_ms()
self._save_jobs_unlocked()
return job, run
return None
def update_job_message(self, job_id: str, message: str) -> CronJob | None:
cleaned = message.strip()
if not cleaned:
raise ValueError("message is required")
with self._lock:
jobs = self._load_jobs_unlocked()
for job in jobs:
if job.id != job_id:
continue
job.payload.message = cleaned
job.updated_at_ms = _now_ms()
self._save_jobs_unlocked()
return job
return None
async def _on_timer(self) -> None:
now = _now_ms()
due_jobs = [
job
for job in self.list_jobs(include_disabled=False)
if job.next_run_at_ms is not None and job.next_run_at_ms <= now
]
for job in due_jobs:
await self._execute_job(job)
self._save_jobs()
self._arm_timer()
async def _execute_job(self, job: CronJob) -> None:
start_ms = _now_ms()
run_record = CronRunRecord(started_at_ms=start_ms, mode=job.payload.mode)
try:
result = CronExecutionResult(mode=job.payload.mode)
if self.on_job is not None:
raw = await self._call_on_job(job, run_record)
result = raw if isinstance(raw, CronExecutionResult) else CronExecutionResult(response=raw, mode=job.payload.mode)
run_record.status = "ok"
run_record.mode = result.mode
run_record.output = result.response
run_record.notification_session_id = result.notification_session_id
run_record.task_id = result.task_id
run_record.run_id = result.run_id
job.last_status = "ok"
job.last_error = None
except Exception as exc:
run_record.status = "error"
run_record.error = str(exc)
job.last_status = "error"
job.last_error = str(exc)
finally:
finish_ms = _now_ms()
run_record.finished_at_ms = finish_ms
job.last_run_at_ms = start_ms
job.updated_at_ms = finish_ms
job.history.append(run_record)
job.history = job.history[-_MAX_HISTORY:]
if job.schedule.kind == "at":
if job.delete_after_run:
with self._lock:
self._jobs = [item for item in self._load_jobs_unlocked() if item.id != job.id]
return
job.enabled = False
job.next_run_at_ms = None
return
job.next_run_at_ms = compute_next_run(job.schedule, now_ms=_now_ms(), last_run_at_ms=job.last_run_at_ms)
async def _call_on_job(self, job: CronJob, run_record: CronRunRecord) -> CronExecutionResult | str | None:
if self.on_job is None:
return None
try:
params = inspect.signature(self.on_job).parameters
except (TypeError, ValueError):
params = {}
if len(params) >= 2:
return await self.on_job(job, run_record)
return await self.on_job(job)
def _recompute_next_runs(self) -> None:
now = _now_ms()
changed = False
for job in self._load_jobs():
if not job.enabled:
continue
if job.next_run_at_ms is None or job.next_run_at_ms < now - 7_200_000:
job.next_run_at_ms = compute_next_run(job.schedule, now_ms=now, last_run_at_ms=job.last_run_at_ms)
changed = True
if changed:
self._save_jobs()
def _next_wake_ms(self) -> int | None:
candidates = [
job.next_run_at_ms
for job in self._load_jobs()
if job.enabled and job.next_run_at_ms is not None
]
return min(candidates) if candidates else None
def _arm_timer(self) -> None:
if self._timer_task is not None:
self._timer_task.cancel()
self._timer_task = None
if not self._running:
return
next_wake = self._next_wake_ms()
if next_wake is None:
return
async def tick() -> None:
await asyncio.sleep(max(0, next_wake - _now_ms()) / 1000)
if self._running:
await self._on_timer()
self._timer_task = asyncio.create_task(tick())
def _load_jobs(self) -> list[CronJob]:
with self._lock:
return list(self._load_jobs_unlocked())
def _load_jobs_unlocked(self) -> list[CronJob]:
if self._jobs is not None:
return self._jobs
self.store_path.parent.mkdir(parents=True, exist_ok=True)
_secure_dir(self.store_path.parent)
if not self.store_path.exists():
self._jobs = []
return self._jobs
payload = json.loads(self.store_path.read_text(encoding="utf-8"))
raw_jobs = payload.get("jobs") if isinstance(payload, dict) else []
self._jobs = [CronJob.from_dict(item) for item in raw_jobs or [] if isinstance(item, dict)]
return self._jobs
def _save_jobs(self) -> None:
with self._lock:
self._save_jobs_unlocked()
def _save_jobs_unlocked(self) -> None:
if self._jobs is None:
return
self.store_path.parent.mkdir(parents=True, exist_ok=True)
_secure_dir(self.store_path.parent)
fd, tmp_name = tempfile.mkstemp(prefix=".jobs-", suffix=".json", dir=str(self.store_path.parent))
tmp_path = Path(tmp_name)
try:
with os.fdopen(fd, "w", encoding="utf-8") as handle:
json.dump(
{"version": 1, "updated_at_ms": _now_ms(), "jobs": [job.to_dict() for job in self._jobs]},
handle,
ensure_ascii=False,
indent=2,
sort_keys=True,
)
handle.write("\n")
handle.flush()
os.fsync(handle.fileno())
os.replace(tmp_path, self.store_path)
_secure_file(self.store_path)
finally:
if tmp_path.exists():
tmp_path.unlink()
def parse_duration(value: str) -> int:
match = _DURATION_RE.match(value.strip())
if not match:
raise ValueError("duration must look like 30s, 15m, 2h, or 1d")
amount = int(match.group(1))
unit = match.group(2).lower()[0]
multipliers = {"s": 1, "m": 60, "h": 3600, "d": 86400}
return amount * multipliers[unit]
def parse_schedule(value: str) -> CronSchedule:
raw = value.strip()
lowered = raw.lower()
if lowered.startswith("every "):
seconds = parse_duration(raw[6:].strip())
return CronSchedule(kind="every", every_ms=seconds * 1000, display=f"every {seconds}s")
parts = raw.split()
if len(parts) in {5, 6} and all(_CRON_FIELD_RE.match(item) for item in parts[:5]):
schedule = CronSchedule(kind="cron", expr=raw, display=raw)
validate_schedule(schedule)
return schedule
if "T" in raw or re.match(r"^\d{4}-\d{2}-\d{2}", raw):
dt = _parse_datetime(raw)
return CronSchedule(kind="at", at_ms=int(dt.timestamp() * 1000), display=f"once at {dt:%Y-%m-%d %H:%M}")
seconds = parse_duration(raw)
at_ms = _now_ms() + seconds * 1000
return CronSchedule(kind="at", at_ms=at_ms, display=f"once in {raw}")
def schedule_from_api(payload: dict[str, Any]) -> CronSchedule:
if payload.get("schedule"):
return parse_schedule(str(payload["schedule"]))
if payload.get("every_seconds") not in (None, ""):
seconds = int(payload["every_seconds"])
if seconds <= 0:
raise ValueError("every_seconds must be greater than 0")
return CronSchedule(kind="every", every_ms=seconds * 1000, display=f"every {seconds}s")
if payload.get("cron_expr"):
expr = str(payload["cron_expr"]).strip()
schedule = CronSchedule(kind="cron", expr=expr, tz=_optional_str(payload.get("tz")), display=expr)
validate_schedule(schedule)
return schedule
if payload.get("at_iso"):
dt = _parse_datetime(str(payload["at_iso"]))
return CronSchedule(kind="at", at_ms=int(dt.timestamp() * 1000), display=f"once at {dt:%Y-%m-%d %H:%M}")
raise ValueError("one of schedule, every_seconds, cron_expr, or at_iso is required")
def validate_schedule(schedule: CronSchedule) -> None:
if schedule.kind == "every":
if not schedule.every_ms or schedule.every_ms <= 0:
raise ValueError("every schedule requires a positive every_ms")
return
if schedule.kind == "at":
if not schedule.at_ms:
raise ValueError("at schedule requires at_ms")
return
if schedule.kind == "cron":
if not schedule.expr:
raise ValueError("cron schedule requires expr")
if schedule.tz:
try:
ZoneInfo(schedule.tz)
except Exception as exc:
raise ValueError(f"unknown timezone: {schedule.tz}") from exc
if croniter is None:
raise ValueError("cron schedules require the croniter package")
try:
croniter(schedule.expr, _aware_now(schedule.tz))
except Exception as exc:
raise ValueError(f"invalid cron expression: {schedule.expr}") from exc
return
raise ValueError(f"unknown schedule kind: {schedule.kind}")
def compute_next_run(
schedule: CronSchedule,
*,
now_ms: int | None = None,
last_run_at_ms: int | None = None,
) -> int | None:
now_ms = now_ms or _now_ms()
if schedule.kind == "at":
return schedule.at_ms if schedule.at_ms and schedule.at_ms > now_ms else None
if schedule.kind == "every":
if not schedule.every_ms or schedule.every_ms <= 0:
return None
base = last_run_at_ms or now_ms
next_run = base + schedule.every_ms
while next_run <= now_ms:
next_run += schedule.every_ms
return next_run
if schedule.kind == "cron" and schedule.expr and croniter is not None:
base = datetime.fromtimestamp((last_run_at_ms or now_ms) / 1000, tz=_timezone(schedule.tz))
return int(croniter(schedule.expr, base).get_next(datetime).timestamp() * 1000)
return None
def _parse_datetime(value: str) -> datetime:
dt = datetime.fromisoformat(value.replace("Z", "+00:00"))
if dt.tzinfo is None:
return dt.astimezone()
return dt
def _aware_now(tz_name: str | None = None) -> datetime:
return datetime.now(tz=_timezone(tz_name))
def _timezone(tz_name: str | None = None) -> Any:
if tz_name:
return ZoneInfo(tz_name)
return datetime.now().astimezone().tzinfo
def _now_ms() -> int:
return int(time.time() * 1000)
def _secure_dir(path: Path) -> None:
try:
os.chmod(path, 0o700)
except OSError:
pass
def _secure_file(path: Path) -> None:
try:
os.chmod(path, 0o600)
except OSError:
pass
def _optional_str(value: Any) -> str | None:
if value in (None, ""):
return None
return str(value).strip() or None

View File

@ -0,0 +1,262 @@
"""Import no-credential Hermes Agent skills into Beaver."""
from __future__ import annotations
from dataclasses import dataclass, field
from datetime import datetime, timezone
import json
import re
import shutil
from pathlib import Path
from typing import Any
from beaver.skills.catalog.utils import parse_frontmatter, strip_frontmatter
from beaver.skills.specs import SkillSpec, SkillSpecStore, SkillVersion
from beaver.skills.specs.serialization import canonical_hash, normalize_frontmatter, summarize_skill_content
HERMES_REPO_URL = "https://github.com/NousResearch/hermes-agent"
_CREDENTIAL_PATTERNS = [
re.compile(pattern, re.IGNORECASE)
for pattern in [
r"\bapi[_ -]?key\b",
r"\boauth\b",
r"\bbearer\s+token\b",
r"\baccess[_ -]?token\b",
r"\bclient[_ -]?secret\b",
r"\bsecret\b",
r"\bcredential",
r"\bspotify\b",
r"\bdiscord\b",
r"\bfeishu\b",
r"\bhome\s*assistant\b",
r"\bfal\b",
r"\bopenrouter\b",
r"\bwandb\b",
]
]
@dataclass(slots=True)
class HermesMigrationService:
store: SkillSpecStore
manifest_path: Path | None = None
included_tools: list[dict[str, Any]] = field(default_factory=list)
skipped_tools: list[dict[str, Any]] = field(default_factory=list)
def migrate(
self,
repo_path: str | Path,
*,
include_optional: bool = True,
dry_run: bool = False,
) -> dict[str, Any]:
repo = Path(repo_path)
if not repo.exists():
raise ValueError(f"Hermes repository not found: {repo}")
skill_files = self._discover_skill_files(repo, include_optional=include_optional)
included: list[dict[str, Any]] = []
skipped: list[dict[str, Any]] = []
for skill_file in skill_files:
result = self._migrate_skill(repo, skill_file, dry_run=dry_run)
if result["status"] in {"included", "unchanged"}:
included.append(result)
else:
skipped.append(result)
manifest = {
"source": "hermes-agent",
"repo_url": HERMES_REPO_URL,
"repo_path": str(repo),
"generated_at": datetime.now(timezone.utc).isoformat(),
"dry_run": dry_run,
"included": included,
"skipped": skipped,
"tools": self._tool_manifest(),
}
path = self.manifest_path or (self.store.workspace / "hermes_migration_manifest.json")
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(manifest, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
return manifest
def _discover_skill_files(self, repo: Path, *, include_optional: bool) -> list[Path]:
roots = [repo / "skills"]
if include_optional:
roots.append(repo / "optional-skills")
files: list[Path] = []
for root in roots:
if root.exists():
files.extend(sorted(root.glob("**/SKILL.md")))
return files
def _migrate_skill(self, repo: Path, skill_file: Path, *, dry_run: bool) -> dict[str, Any]:
relative = skill_file.relative_to(repo)
content = skill_file.read_text(encoding="utf-8")
frontmatter, body = parse_frontmatter(content)
skill_name = _safe_skill_name(str(frontmatter.get("name") or skill_file.parent.name))
if not skill_name:
return _skip(relative, "unsafe_skill_name")
credential_reason = _credential_reason(content)
if credential_reason:
return _skip(relative, credential_reason, skill_name=skill_name)
normalized = normalize_frontmatter(
{
**frontmatter,
"name": skill_name,
"description": frontmatter.get("description") or skill_name,
}
)
rendered = _render_skill_content(normalized, body)
content_hash = canonical_hash(rendered)
existing = self.store.read_published_skill(skill_name)
existing_spec = self.store.get_skill_spec(skill_name)
if existing is not None and existing.version.content_hash == content_hash:
return {
"status": "unchanged",
"skill_name": skill_name,
"version": existing.version.version,
"path": str(relative),
"reason": "same_content_hash",
}
next_version = self._next_version(skill_name)
if dry_run:
return {
"status": "included",
"skill_name": skill_name,
"version": next_version,
"path": str(relative),
"dry_run": True,
}
now = datetime.now(timezone.utc).isoformat()
skill_version = SkillVersion(
skill_name=skill_name,
version=next_version,
content_hash=content_hash,
summary_hash=canonical_hash(strip_frontmatter(rendered).strip()),
created_at=now,
created_by="hermes_migration",
change_reason=f"Import Hermes skill {relative}",
parent_version=existing.version.version if existing is not None else None,
review_state="published",
frontmatter=normalized,
summary=summarize_skill_content(body),
tool_hints=self.store._extract_tool_hints(normalized),
provenance={
"source": "hermes-agent",
"repo_url": HERMES_REPO_URL,
"repo_path": str(repo),
"relative_path": str(relative),
},
)
self.store.write_skill_version(skill_version, rendered)
self._copy_supporting_files(skill_file.parent, skill_name, next_version)
spec = existing_spec or SkillSpec(
name=skill_name,
display_name=skill_name,
description=str(normalized.get("description") or skill_name),
created_at=now,
updated_at=now,
current_version=next_version,
status="active",
tags=[],
owners=["hermes-agent"],
source_kind="hermes-agent",
lineage=[],
)
spec.current_version = next_version
spec.updated_at = now
spec.status = "active"
spec.source_kind = "hermes-agent"
if "hermes-agent" not in spec.owners:
spec.owners.append("hermes-agent")
self.store.write_skill_spec(spec)
self.store.set_current_version(skill_name, next_version)
published = self.store.read_index("published")
if skill_name not in published:
published.append(skill_name)
self.store.update_index("published", published)
return {
"status": "included",
"skill_name": skill_name,
"version": next_version,
"path": str(relative),
}
def _copy_supporting_files(self, source_dir: Path, skill_name: str, version: str) -> None:
target_root = self.store.root / skill_name / "versions" / version
for source in sorted(source_dir.rglob("*")):
if not source.is_file() or source.name == "SKILL.md" or source.is_symlink():
continue
relative = source.relative_to(source_dir)
if any(part in {"", ".", ".."} for part in relative.parts):
continue
target = target_root / relative
target.parent.mkdir(parents=True, exist_ok=True)
shutil.copyfile(source, target)
def _next_version(self, skill_name: str) -> str:
versions = [item for item in self.store.list_versions(skill_name) if item.startswith("v")]
numbers = [int(item[1:]) for item in versions if item[1:].isdigit()]
return f"v{(max(numbers) if numbers else 0) + 1:04d}"
def _tool_manifest(self) -> dict[str, list[dict[str, Any]]]:
included = self.included_tools or [
{"name": "todo", "reason": "implemented_builtin_no_api"},
{"name": "clarify", "reason": "implemented_builtin_no_api"},
{"name": "delegate", "reason": "implemented_builtin_no_api"},
{"name": "spawn", "reason": "implemented_builtin_no_api"},
{"name": "skills_list", "reason": "implemented_builtin_no_api"},
{"name": "skill_manage", "reason": "implemented_builtin_no_api"},
{"name": "terminal", "reason": "implemented_builtin_no_api"},
{"name": "process", "reason": "implemented_builtin_no_api"},
{"name": "patch", "reason": "implemented_builtin_no_api"},
{"name": "write_file", "reason": "implemented_builtin_no_api"},
{"name": "web_fetch", "reason": "implemented_builtin_no_api"},
{"name": "web_search", "reason": "implemented_builtin_no_api"},
{"name": "execute_code", "reason": "implemented_builtin_no_api"},
]
skipped = self.skipped_tools or [
{"name": "spotify", "reason": "requires_oauth"},
{"name": "discord", "reason": "requires_external_token"},
{"name": "feishu", "reason": "requires_external_token"},
{"name": "home_assistant", "reason": "requires_external_service_credentials"},
{"name": "fal_image_generation", "reason": "requires_api_key"},
{"name": "remote_web_providers", "reason": "requires_api_key_or_oauth"},
]
return {"included": included, "skipped": skipped}
def _credential_reason(content: str) -> str | None:
for pattern in _CREDENTIAL_PATTERNS:
if pattern.search(content):
return "requires_external_credentials"
return None
def _safe_skill_name(value: str) -> str:
cleaned = value.strip().replace(" ", "-")
if not cleaned or cleaned in {".", ".."} or "/" in cleaned or "\\" in cleaned:
return ""
if not re.fullmatch(r"[A-Za-z0-9_.-]+", cleaned):
return ""
return cleaned
def _skip(relative: Path, reason: str, *, skill_name: str | None = None) -> dict[str, Any]:
result = {"status": "skipped", "path": str(relative), "reason": reason}
if skill_name:
result["skill_name"] = skill_name
return result
def _render_skill_content(frontmatter: dict[str, Any], body: str) -> str:
lines = ["---"]
for key, value in normalize_frontmatter(frontmatter).items():
if isinstance(value, list):
lines.append(f"{key}:")
for item in value:
lines.append(f" - {item}")
else:
lines.append(f"{key}: {value}")
lines.extend(["---", "", body.strip()])
return "\n".join(lines).rstrip() + "\n"

View File

@ -16,6 +16,7 @@ class SessionProcessProjector:
run_records = {record.run_id: record for record in self.run_memory_store.list_runs()}
runs: dict[str, dict[str, Any]] = {}
events: list[dict[str, Any]] = []
artifacts: list[dict[str, Any]] = []
def add_event(
*,
@ -84,7 +85,7 @@ class SessionProcessProjector:
"node_ids": node_ids,
"skill_queries": payload.get("skill_queries") or [],
"selected_skill_names": payload.get("selected_skill_names") or [],
"generated_skill_draft_ids": payload.get("generated_skill_draft_ids") or [],
"ephemeral_guidance_ids": payload.get("ephemeral_guidance_ids") or [],
"skill_resolution_report": payload.get("skill_resolution_report") or [],
"fallback_error": payload.get("fallback_error"),
}
@ -151,13 +152,42 @@ class SessionProcessProjector:
"skill_query": item.get("skill_query"),
"selected_skill_names": item.get("selected_skill_names") or [],
"ephemeral_skill_names": item.get("ephemeral_skill_names") or [],
"generated_skill_draft_id": item.get("generated_skill_draft_id"),
"generated_skill_name": item.get("generated_skill_name"),
"ephemeral_guidance_id": item.get("ephemeral_guidance_id"),
"ephemeral_guidance_name": item.get("ephemeral_guidance_name"),
"ephemeral_used": bool(item.get("ephemeral_used")),
"finish_reason": item.get("finish_reason"),
"error": item.get("error"),
},
}
guidance_id = item.get("ephemeral_guidance_id")
if guidance_id:
guidance_name = str(item.get("ephemeral_guidance_name") or guidance_id)
artifacts.append(
{
"artifact_id": f"{node_run_id}:ephemeral-guidance:{guidance_id}",
"run_id": str(node_run_id),
"actor_type": "agent",
"actor_id": str(item.get("node_id") or "sub-agent"),
"actor_name": str(item.get("node_id") or "Sub-agent"),
"title": f"Ephemeral guidance: {guidance_name}",
"artifact_type": "markdown",
"content": (
f"# Ephemeral guidance\n\n"
f"- Guidance: {guidance_name}\n"
f"- Guidance ID: {guidance_id}\n"
f"- Scope: current delegated sub-agent run only"
),
"metadata": {
"task_id": task_id,
"attempt_index": attempt_index,
"node_id": item.get("node_id"),
"ephemeral_guidance_id": guidance_id,
"ephemeral_guidance_name": guidance_name,
"ephemeral_skill_names": item.get("ephemeral_skill_names") or [],
},
"created_at": created_at,
}
)
add_event(
event_id=f"{_event_id(record, 'node')}:{item.get('node_id')}",
run_id=str(node_run_id),
@ -231,7 +261,7 @@ class SessionProcessProjector:
return {
"runs": sorted(runs.values(), key=lambda item: item.get("started_at") or ""),
"events": sorted(events, key=lambda item: item.get("created_at") or ""),
"artifacts": [],
"artifacts": sorted(artifacts, key=lambda item: item.get("created_at") or ""),
"agents": [],
}

View File

@ -0,0 +1,208 @@
"""Import legacy and staged skills into the Beaver SkillSpecStore."""
from __future__ import annotations
from dataclasses import dataclass
from datetime import datetime, timezone
import io
import json
import re
import zipfile
from pathlib import Path
from typing import Any
from beaver.skills.catalog.utils import parse_frontmatter, strip_frontmatter
from beaver.skills.specs import SkillSpec, SkillSpecStore, SkillVersion
from beaver.skills.specs.serialization import canonical_hash, normalize_frontmatter, summarize_skill_content
@dataclass(slots=True)
class SkillMigrationService:
store: SkillSpecStore
repo_root: Path | None = None
def migrate_all(self) -> dict[str, Any]:
included: list[dict[str, Any]] = []
skipped: list[dict[str, Any]] = []
for path in self._backend_old_skills():
self._migrate_skill_file(path, "backend-old", included, skipped)
for path in self._staged_skills():
self._migrate_skill_file(path, "stevenli-staged", included, skipped)
for path in self._skill_zips():
self._migrate_zip(path, included, skipped)
manifest = {
"generated_at": _now(),
"workspace": str(self.store.workspace),
"included": included,
"skipped": skipped,
}
manifest_path = self.store.workspace / "skill_migration_manifest.json"
manifest_path.write_text(json.dumps(manifest, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
return manifest
def _backend_old_skills(self) -> list[Path]:
root = self._repo_root() / "app-instance" / "backend-old" / "nanobot" / "skills"
if not root.exists():
return []
return sorted(root.glob("*/SKILL.md"))
def _staged_skills(self) -> list[Path]:
root = self.store.workspace / "state" / "skill-reviews"
if not root.exists():
return []
return sorted(root.glob("*/staged/*/SKILL.md"))
def _skill_zips(self) -> list[Path]:
root = self.store.workspace / "skills"
if not root.exists():
return []
return sorted(root.glob("*.zip"))
def _repo_root(self) -> Path:
if self.repo_root is not None:
return self.repo_root
return Path(__file__).resolve().parents[4]
def _migrate_skill_file(self, path: Path, source: str, included: list[dict[str, Any]], skipped: list[dict[str, Any]]) -> None:
try:
content = path.read_text(encoding="utf-8")
result = self._publish_content(content, source=source, source_path=str(path))
included.append(result)
except Exception as exc:
skipped.append({"source": source, "source_path": str(path), "reason": str(exc)})
def _migrate_zip(self, path: Path, included: list[dict[str, Any]], skipped: list[dict[str, Any]]) -> None:
try:
with zipfile.ZipFile(io.BytesIO(path.read_bytes()), "r") as archive:
entries = [info for info in archive.infolist() if not info.is_dir()]
skill_entry = _find_skill_entry(entries)
content = archive.read(skill_entry).decode("utf-8", errors="replace")
result = self._publish_content(content, source="stevenli-zip", source_path=str(path))
skill_name = result["skill_name"]
version = result["version"]
top = Path(skill_entry).parts[0] if len(Path(skill_entry).parts) == 2 else ""
for info in entries:
raw = info.filename.replace("\\", "/")
if raw == skill_entry or raw.startswith("/") or "__MACOSX" in Path(raw).parts:
continue
parts = Path(raw).parts
rel_parts = parts[1:] if top and parts and parts[0] == top else parts
if not rel_parts or any(part in {"", ".", ".."} for part in rel_parts):
continue
target = self.store.root / skill_name / "versions" / version / "/".join(rel_parts)
target.parent.mkdir(parents=True, exist_ok=True)
target.write_bytes(archive.read(info))
included.append(result)
except Exception as exc:
skipped.append({"source": "stevenli-zip", "source_path": str(path), "reason": str(exc)})
def _publish_content(self, content: str, *, source: str, source_path: str) -> dict[str, Any]:
frontmatter, body = parse_frontmatter(content)
skill_name = _safe_name(str(frontmatter.get("name") or Path(source_path).parent.name))
if not skill_name:
raise ValueError("unsafe or missing skill name")
normalized = normalize_frontmatter(
{
**frontmatter,
"name": skill_name,
"description": frontmatter.get("description") or skill_name,
}
)
rendered = _render_skill_content(normalized, body)
content_hash = canonical_hash(rendered)
existing = self.store.read_published_skill(skill_name)
if existing is not None and existing.version.content_hash == content_hash:
return {
"status": "unchanged",
"skill_name": skill_name,
"version": existing.version.version,
"source": source,
"source_path": source_path,
}
version_id = self._next_version(skill_name)
now = _now()
skill_version = SkillVersion(
skill_name=skill_name,
version=version_id,
content_hash=content_hash,
summary_hash=canonical_hash(strip_frontmatter(rendered).strip()),
created_at=now,
created_by="migration",
change_reason=f"Import skill from {source}",
parent_version=existing.version.version if existing is not None else None,
review_state="published",
frontmatter=normalized,
summary=summarize_skill_content(body),
tool_hints=self.store._extract_tool_hints(normalized),
provenance={"source": source, "source_path": source_path, "imported_at": now},
)
self.store.write_skill_version(skill_version, rendered)
spec = self.store.get_skill_spec(skill_name) or SkillSpec(
name=skill_name,
display_name=skill_name,
description=str(normalized.get("description") or skill_name),
created_at=now,
updated_at=now,
current_version=version_id,
status="active",
tags=[],
owners=["migration"],
source_kind=source,
lineage=[],
)
spec.current_version = version_id
spec.updated_at = now
spec.status = "active"
spec.source_kind = source
if "migration" not in spec.owners:
spec.owners.append("migration")
self.store.write_skill_spec(spec)
self.store.set_current_version(skill_name, version_id)
published = self.store.read_index("published")
if skill_name not in published:
published.append(skill_name)
self.store.update_index("published", published)
return {"status": "included", "skill_name": skill_name, "version": version_id, "source": source, "source_path": source_path}
def _next_version(self, skill_name: str) -> str:
versions = [item for item in self.store.list_versions(skill_name) if item.startswith("v")]
numbers = [int(item[1:]) for item in versions if item[1:].isdigit()]
return f"v{(max(numbers) if numbers else 0) + 1:04d}"
def _find_skill_entry(entries: list[zipfile.ZipInfo]) -> str:
candidates = []
for info in entries:
raw = info.filename.replace("\\", "/")
parts = Path(raw).parts
if raw.startswith("/") or any(part in {"", ".", ".."} for part in parts):
raise ValueError(f"unsafe archive entry: {info.filename}")
if parts and parts[-1] == "SKILL.md" and len(parts) in (1, 2):
candidates.append(raw)
if not candidates:
raise ValueError("zip has no root SKILL.md")
return candidates[0]
def _safe_name(value: str) -> str:
cleaned = value.strip().replace(" ", "-")
if not cleaned or cleaned in {".", ".."} or "/" in cleaned or "\\" in cleaned:
return ""
return cleaned if re.fullmatch(r"[A-Za-z0-9_.-]+", cleaned) else ""
def _render_skill_content(frontmatter: dict[str, Any], body: str) -> str:
lines = ["---"]
for key, value in normalize_frontmatter(frontmatter).items():
if isinstance(value, list):
lines.append(f"{key}:")
for item in value:
lines.append(f" - {item}")
else:
lines.append(f"{key}: {value}")
lines.extend(["---", "", body.strip()])
return "\n".join(lines).rstrip() + "\n"
def _now() -> str:
return datetime.now(timezone.utc).isoformat()

View File

@ -0,0 +1,248 @@
"""SkillHub marketplace client and installer."""
from __future__ import annotations
from dataclasses import dataclass
from datetime import datetime, timezone
import posixpath
from typing import Any
import httpx
from beaver.skills.catalog.utils import parse_frontmatter, strip_frontmatter
from beaver.skills.specs import SkillSpec, SkillSpecStore, SkillVersion
from beaver.skills.specs.serialization import canonical_hash, normalize_frontmatter, summarize_skill_content
SKILLHUB_BASE_URL = "https://skillhub.bwgdi.com"
SKILLHUB_API_BASE = f"{SKILLHUB_BASE_URL}/api/web"
@dataclass(slots=True)
class SkillHubService:
store: SkillSpecStore
timeout_seconds: int = 30
async def search(
self,
*,
q: str = "",
sort: str = "relevance",
page: int = 0,
size: int = 12,
namespace: str | None = None,
) -> dict[str, Any]:
params = {
"q": q,
"sort": sort,
"page": str(max(0, page)),
"size": str(max(1, min(size, 50))),
}
if namespace:
params["namespace"] = namespace.removeprefix("@")
data = await self._get_json("/skills", params=params)
payload = _unwrap(data)
if not isinstance(payload, dict):
payload = {}
items = [self._with_install_state(item) for item in list(payload.get("items") or [])]
return {
"items": items,
"total": int(payload.get("total") or len(items)),
"page": int(payload.get("page") or page),
"size": int(payload.get("size") or size),
}
async def detail(self, namespace: str, slug: str) -> dict[str, Any]:
data = await self._get_json(f"/skills/{namespace.removeprefix('@')}/{slug}")
payload = _unwrap(data)
item = self._with_install_state(payload if isinstance(payload, dict) else {})
return item
async def version(self, namespace: str, slug: str, version: str) -> dict[str, Any]:
namespace = namespace.removeprefix("@")
detail = _unwrap(await self._get_json(f"/skills/{namespace}/{slug}/versions/{version}"))
files = _unwrap(await self._get_json(f"/skills/{namespace}/{slug}/versions/{version}/files"))
if not isinstance(detail, dict):
detail = {}
if not isinstance(files, list):
files = []
return {"detail": detail, "files": files}
async def install(self, namespace: str, slug: str, version: str | None = None) -> dict[str, Any]:
namespace = namespace.removeprefix("@")
skill = await self.detail(namespace, slug)
selected_version = version or _published_version(skill)
if not selected_version:
raise ValueError("SkillHub skill has no published version")
version_payload = await self.version(namespace, slug, selected_version)
files = list(version_payload.get("files") or [])
contents: dict[str, str] = {}
for item in files:
file_path = _safe_posix_path(str(item.get("filePath") or item.get("path") or ""))
contents[file_path] = await self._get_text(
f"/skills/{namespace}/{slug}/versions/{selected_version}/file",
params={"path": file_path},
)
skill_content = contents.get("SKILL.md")
if not skill_content:
raise ValueError("SkillHub version does not contain SKILL.md")
frontmatter, body = parse_frontmatter(skill_content)
skill_name = str(frontmatter.get("name") or skill.get("slug") or slug).strip()
if not skill_name or "/" in skill_name or "\\" in skill_name or skill_name in {".", ".."}:
raise ValueError(f"Unsafe skill name from SkillHub: {skill_name}")
normalized_frontmatter = normalize_frontmatter(
{
**frontmatter,
"name": skill_name,
"description": frontmatter.get("description") or skill.get("summary") or skill_name,
}
)
rendered = _render_skill_content(normalized_frontmatter, body)
content_hash = canonical_hash(rendered)
existing = self.store.read_published_skill(skill_name)
existing_spec = self.store.get_skill_spec(skill_name)
if existing is not None and existing.version.content_hash == content_hash:
return {
"ok": True,
"skill_name": skill_name,
"version": existing.version.version,
"source": "skillhub",
"namespace": namespace,
"slug": slug,
"installed_path": str(self.store.root / skill_name),
"already_installed": True,
}
next_version = self._next_version(skill_name)
now = datetime.now(timezone.utc).isoformat()
skill_version = SkillVersion(
skill_name=skill_name,
version=next_version,
content_hash=content_hash,
summary_hash=canonical_hash(strip_frontmatter(rendered).strip()),
created_at=now,
created_by="skillhub",
change_reason=f"Install SkillHub {namespace}/{slug}@{selected_version}",
parent_version=existing.version.version if existing is not None else None,
review_state="published",
frontmatter=normalized_frontmatter,
summary=summarize_skill_content(body),
tool_hints=self.store._extract_tool_hints(normalized_frontmatter),
provenance={
"source": "skillhub",
"namespace": namespace,
"slug": slug,
"skillhub_version": selected_version,
"source_url": f"{SKILLHUB_BASE_URL}/space/{namespace}/{slug}",
},
)
self.store.write_skill_version(skill_version, rendered)
for file_path, content in contents.items():
if file_path == "SKILL.md":
continue
target = self.store.root / skill_name / "versions" / next_version / file_path
target.parent.mkdir(parents=True, exist_ok=True)
target.write_text(content, encoding="utf-8")
spec = existing_spec or SkillSpec(
name=skill_name,
display_name=str(skill.get("displayName") or skill_name),
description=str(normalized_frontmatter.get("description") or skill_name),
created_at=now,
updated_at=now,
current_version=next_version,
status="active",
tags=[],
owners=["skillhub"],
source_kind="skillhub",
lineage=[],
)
spec.current_version = next_version
spec.updated_at = now
spec.status = "active"
spec.source_kind = "skillhub"
if "skillhub" not in spec.owners:
spec.owners.append("skillhub")
self.store.write_skill_spec(spec)
self.store.set_current_version(skill_name, next_version)
published = self.store.read_index("published")
if skill_name not in published:
published.append(skill_name)
self.store.update_index("published", published)
return {
"ok": True,
"skill_name": skill_name,
"version": next_version,
"source": "skillhub",
"namespace": namespace,
"slug": slug,
"installed_path": str(self.store.root / skill_name),
"already_installed": False,
}
async def _get_json(self, path: str, *, params: dict[str, str] | None = None) -> dict[str, Any]:
async with httpx.AsyncClient(timeout=self.timeout_seconds, follow_redirects=True, trust_env=False) as client:
response = await client.get(f"{SKILLHUB_API_BASE}{path}", params=params)
response.raise_for_status()
data = response.json()
return data if isinstance(data, dict) else {}
async def _get_text(self, path: str, *, params: dict[str, str]) -> str:
async with httpx.AsyncClient(timeout=self.timeout_seconds, follow_redirects=True, trust_env=False) as client:
response = await client.get(f"{SKILLHUB_API_BASE}{path}", params=params)
response.raise_for_status()
return response.text
def _with_install_state(self, item: dict[str, Any]) -> dict[str, Any]:
result = dict(item)
slug = str(result.get("slug") or result.get("displayName") or "")
namespace = str(result.get("namespace") or "").removeprefix("@")
installed = self.store.get_skill_spec(slug) or self._find_installed_skillhub_spec(namespace, slug)
result["installed"] = installed is not None and installed.status == "active"
result["installed_version"] = installed.current_version if installed is not None else None
return result
def _find_installed_skillhub_spec(self, namespace: str, slug: str) -> SkillSpec | None:
for spec in self.store.list_skill_specs():
loaded = self.store.read_published_skill(spec.name)
provenance = loaded.version.provenance if loaded is not None else {}
if provenance.get("source") == "skillhub" and provenance.get("namespace") == namespace and provenance.get("slug") == slug:
return spec
return None
def _next_version(self, skill_name: str) -> str:
versions = [item for item in self.store.list_versions(skill_name) if item.startswith("v")]
numbers = [int(item[1:]) for item in versions if item[1:].isdigit()]
return f"v{(max(numbers) if numbers else 0) + 1:04d}"
def _unwrap(payload: dict[str, Any]) -> Any:
if "data" in payload:
return payload["data"]
return payload
def _published_version(item: dict[str, Any]) -> str | None:
for key in ("publishedVersion", "headlineVersion"):
value = item.get(key)
if isinstance(value, dict) and value.get("version"):
return str(value["version"])
return None
def _safe_posix_path(value: str) -> str:
cleaned = posixpath.normpath(value.replace("\\", "/")).lstrip("/")
if cleaned in {"", ".", ".."} or cleaned.startswith("../") or "/../" in cleaned:
raise ValueError(f"Unsafe SkillHub file path: {value}")
return cleaned
def _render_skill_content(frontmatter: dict[str, Any], body: str) -> str:
lines = ["---"]
for key, value in normalize_frontmatter(frontmatter).items():
if isinstance(value, list):
lines.append(f"{key}:")
for item in value:
lines.append(f" - {item}")
else:
lines.append(f"{key}: {value}")
lines.extend(["---", "", body.strip()])
return "\n".join(lines).rstrip() + "\n"

View File

@ -32,7 +32,7 @@ class TeamService:
provider_bundle_factory: Callable[[ExecutionNode], ProviderBundle | None] | None = None,
inherited_pinned_skills: list[str] | None = None,
inherited_pinned_skill_contexts: list["SkillContext"] | None = None,
learning_candidate_enabled: bool = False,
allow_candidate_generation: bool = False,
) -> TeamRunResult:
"""Run a team graph inside the parent task context."""
@ -46,7 +46,7 @@ class TeamService:
provider_bundle_factory=provider_bundle_factory,
inherited_pinned_skills=inherited_pinned_skills,
inherited_pinned_skill_contexts=inherited_pinned_skill_contexts,
learning_candidate_enabled=learning_candidate_enabled,
allow_candidate_generation=allow_candidate_generation,
)
self._attach_runs_to_parent_task(result)
return result