Files
beaver_project/app-instance/backend/beaver/services/cron_service.py
steven_li 30ab74ffb2 feat(engine): 添加MCP连接管理和工具集成功能
- 集成MCP连接管理器,支持MCP服务器连接
- 添加多种内置工具:ClarifyTool、CronTool、DelegateTool、ExecuteCodeTool、
  PatchFileTool、ProcessTool、SendMessageTool、SpawnTool、TerminalTool、
  TodoTool、WebFetchTool、WebSearchTool、WriteFileTool等
- 实现工具注册和装配功能
- 添加技能选择上下文参数
- 支持思考模式控制参数thinking_enabled

feat(coordinator): 重构任务执行计划器参数命名

- 将learning_candidate_enabled重命名为allow_candidate_generation
- 更新TeamGraphScheduler中的参数传递
- 修改LocalAgentRunner中的相关参数处理
- 更新README文档中的相应描述

refactor(context): 标准化工具调用参数格式

- 添加_json导入用于参数序列化
- 实现_provider_tool_calls方法标准化OpenAI兼容的工具调用载荷
- 修复工具调用中参数非字符串类型的序列化问题

refactor(session): 优化消息历史记录过滤逻辑

- 修改get_messages_as_conversation为基于运行状态过滤消息
- 排除未完成、失败或错误结束的运行记录
- 改进对话历史的可见性控制机制

fix(store): 修复FTS索引重建逻辑

- 添加异常处理防止FTS索引创建失败
- 实现_rebuild_fts_index方法重新构建全文搜索索引
- 优化索引触发器和表的维护流程
2026-05-14 09:43:48 +08:00

509 lines
18 KiB
Python

"""Cron scheduling service for Beaver scheduled Tasks."""
from __future__ import annotations
import asyncio
import inspect
import json
import os
import re
import tempfile
import threading
import time
from collections.abc import Awaitable, Callable
from datetime import datetime, timedelta
from pathlib import Path
from typing import Any
from uuid import uuid4
from zoneinfo import ZoneInfo
from beaver.foundation.models import CronExecutionResult, CronJob, CronPayload, CronRunRecord, CronSchedule
try: # pragma: no cover - exercised through cron schedule tests when installed
from croniter import croniter
except ModuleNotFoundError: # pragma: no cover - defensive dependency guard
croniter = None # type: ignore[assignment]
CronCallback = Callable[..., Awaitable[CronExecutionResult | str | None]]
_DURATION_RE = re.compile(
r"^(\d+)\s*(s|sec|secs|second|seconds|m|min|mins|minute|minutes|h|hr|hrs|hour|hours|d|day|days)$",
re.IGNORECASE,
)
_CRON_FIELD_RE = re.compile(r"^[\d\*\?,\-/LW#]+$", re.IGNORECASE)
_MAX_HISTORY = 20
class CronService:
"""Persistent single-timer scheduler.
Hermes' cron implementation stores jobs as JSON and ticks safely in the
background. Beaver keeps that shape, but the callback is required to route
agent work through Task mode so every scheduled trigger is visible as a
normal Task.
"""
def __init__(self, store_path: str | Path, *, on_job: CronCallback | None = None) -> None:
self.store_path = Path(store_path)
self.on_job = on_job
self._jobs: list[CronJob] | None = None
self._lock = threading.Lock()
self._running = False
self._timer_task: asyncio.Task[None] | None = None
async def start(self) -> None:
self._running = True
self._load_jobs()
self._recompute_next_runs()
self._save_jobs()
self._arm_timer()
def stop(self) -> None:
self._running = False
if self._timer_task is not None:
self._timer_task.cancel()
self._timer_task = None
def status(self) -> dict[str, Any]:
jobs = self.list_jobs(include_disabled=True)
return {
"enabled": self._running,
"jobs": len(jobs),
"next_wake_at_ms": self._next_wake_ms(),
}
def list_jobs(self, *, include_disabled: bool = False) -> list[CronJob]:
jobs = list(self._load_jobs())
if not include_disabled:
jobs = [job for job in jobs if job.enabled]
return sorted(jobs, key=lambda job: job.next_run_at_ms or 9_999_999_999_999)
def get_job(self, job_id: str) -> CronJob | None:
for job in self._load_jobs():
if job.id == job_id:
return job
return None
def add_job(
self,
*,
name: str,
message: str,
schedule: CronSchedule,
session_key: str | None = None,
payload_kind: str = "agent_turn",
mode: str = "notification",
requires_followup: bool = False,
deliver: bool = False,
channel: str | None = None,
to: str | None = None,
delete_after_run: bool = False,
) -> CronJob:
cleaned_name = name.strip() or message[:50].strip() or "scheduled task"
cleaned_message = message.strip()
if not cleaned_message:
raise ValueError("message is required")
validate_schedule(schedule)
now = _now_ms()
job = CronJob(
id=uuid4().hex[:12],
name=cleaned_name,
enabled=True,
schedule=schedule,
payload=CronPayload(
kind=payload_kind if payload_kind in {"agent_turn", "system_event"} else "agent_turn", # type: ignore[arg-type]
mode="task" if mode == "task" else "notification",
message=cleaned_message,
session_key=session_key,
requires_followup=requires_followup,
deliver=deliver,
channel=channel,
to=to,
),
next_run_at_ms=compute_next_run(schedule, now_ms=now),
created_at_ms=now,
updated_at_ms=now,
delete_after_run=delete_after_run,
)
with self._lock:
jobs = self._load_jobs_unlocked()
jobs.append(job)
self._jobs = jobs
self._save_jobs_unlocked()
self._arm_timer()
return job
def update_enabled(self, job_id: str, enabled: bool) -> CronJob | None:
with self._lock:
jobs = self._load_jobs_unlocked()
for job in jobs:
if job.id != job_id:
continue
job.enabled = bool(enabled)
job.updated_at_ms = _now_ms()
job.next_run_at_ms = compute_next_run(job.schedule) if job.enabled else None
self._save_jobs_unlocked()
self._arm_timer()
return job
return None
def remove_job(self, job_id: str) -> bool:
with self._lock:
jobs = self._load_jobs_unlocked()
next_jobs = [job for job in jobs if job.id != job_id]
if len(next_jobs) == len(jobs):
return False
self._jobs = next_jobs
self._save_jobs_unlocked()
self._arm_timer()
return True
async def run_job(self, job_id: str, *, force: bool = False) -> bool:
job = self.get_job(job_id)
if job is None:
return False
if not force and not job.enabled:
return False
await self._execute_job(job)
self._save_jobs()
self._arm_timer()
return True
def list_runs(self) -> list[tuple[CronJob, CronRunRecord]]:
runs: list[tuple[CronJob, CronRunRecord]] = []
for job in self.list_jobs(include_disabled=True):
runs.extend((job, run) for run in job.history)
return sorted(runs, key=lambda item: item[1].started_at_ms, reverse=True)
def get_run(self, scheduled_run_id: str) -> tuple[CronJob, CronRunRecord] | None:
for job, run in self.list_runs():
if run.scheduled_run_id == scheduled_run_id:
return job, run
return None
def mark_run_engaged(
self,
scheduled_run_id: str,
*,
task_id: str,
intent: str,
) -> tuple[CronJob, CronRunRecord] | None:
with self._lock:
jobs = self._load_jobs_unlocked()
for job in jobs:
for run in job.history:
if run.scheduled_run_id != scheduled_run_id:
continue
run.engaged = True
run.engaged_at_ms = _now_ms()
run.engage_intent = intent
run.task_id = task_id
job.updated_at_ms = _now_ms()
self._save_jobs_unlocked()
return job, run
return None
def update_job_message(self, job_id: str, message: str) -> CronJob | None:
cleaned = message.strip()
if not cleaned:
raise ValueError("message is required")
with self._lock:
jobs = self._load_jobs_unlocked()
for job in jobs:
if job.id != job_id:
continue
job.payload.message = cleaned
job.updated_at_ms = _now_ms()
self._save_jobs_unlocked()
return job
return None
async def _on_timer(self) -> None:
now = _now_ms()
due_jobs = [
job
for job in self.list_jobs(include_disabled=False)
if job.next_run_at_ms is not None and job.next_run_at_ms <= now
]
for job in due_jobs:
await self._execute_job(job)
self._save_jobs()
self._arm_timer()
async def _execute_job(self, job: CronJob) -> None:
start_ms = _now_ms()
run_record = CronRunRecord(started_at_ms=start_ms, mode=job.payload.mode)
try:
result = CronExecutionResult(mode=job.payload.mode)
if self.on_job is not None:
raw = await self._call_on_job(job, run_record)
result = raw if isinstance(raw, CronExecutionResult) else CronExecutionResult(response=raw, mode=job.payload.mode)
run_record.status = "ok"
run_record.mode = result.mode
run_record.output = result.response
run_record.notification_session_id = result.notification_session_id
run_record.task_id = result.task_id
run_record.run_id = result.run_id
job.last_status = "ok"
job.last_error = None
except Exception as exc:
run_record.status = "error"
run_record.error = str(exc)
job.last_status = "error"
job.last_error = str(exc)
finally:
finish_ms = _now_ms()
run_record.finished_at_ms = finish_ms
job.last_run_at_ms = start_ms
job.updated_at_ms = finish_ms
job.history.append(run_record)
job.history = job.history[-_MAX_HISTORY:]
if job.schedule.kind == "at":
if job.delete_after_run:
with self._lock:
self._jobs = [item for item in self._load_jobs_unlocked() if item.id != job.id]
return
job.enabled = False
job.next_run_at_ms = None
return
job.next_run_at_ms = compute_next_run(job.schedule, now_ms=_now_ms(), last_run_at_ms=job.last_run_at_ms)
async def _call_on_job(self, job: CronJob, run_record: CronRunRecord) -> CronExecutionResult | str | None:
if self.on_job is None:
return None
try:
params = inspect.signature(self.on_job).parameters
except (TypeError, ValueError):
params = {}
if len(params) >= 2:
return await self.on_job(job, run_record)
return await self.on_job(job)
def _recompute_next_runs(self) -> None:
now = _now_ms()
changed = False
for job in self._load_jobs():
if not job.enabled:
continue
if job.next_run_at_ms is None or job.next_run_at_ms < now - 7_200_000:
job.next_run_at_ms = compute_next_run(job.schedule, now_ms=now, last_run_at_ms=job.last_run_at_ms)
changed = True
if changed:
self._save_jobs()
def _next_wake_ms(self) -> int | None:
candidates = [
job.next_run_at_ms
for job in self._load_jobs()
if job.enabled and job.next_run_at_ms is not None
]
return min(candidates) if candidates else None
def _arm_timer(self) -> None:
if self._timer_task is not None:
self._timer_task.cancel()
self._timer_task = None
if not self._running:
return
next_wake = self._next_wake_ms()
if next_wake is None:
return
async def tick() -> None:
await asyncio.sleep(max(0, next_wake - _now_ms()) / 1000)
if self._running:
await self._on_timer()
self._timer_task = asyncio.create_task(tick())
def _load_jobs(self) -> list[CronJob]:
with self._lock:
return list(self._load_jobs_unlocked())
def _load_jobs_unlocked(self) -> list[CronJob]:
if self._jobs is not None:
return self._jobs
self.store_path.parent.mkdir(parents=True, exist_ok=True)
_secure_dir(self.store_path.parent)
if not self.store_path.exists():
self._jobs = []
return self._jobs
payload = json.loads(self.store_path.read_text(encoding="utf-8"))
raw_jobs = payload.get("jobs") if isinstance(payload, dict) else []
self._jobs = [CronJob.from_dict(item) for item in raw_jobs or [] if isinstance(item, dict)]
return self._jobs
def _save_jobs(self) -> None:
with self._lock:
self._save_jobs_unlocked()
def _save_jobs_unlocked(self) -> None:
if self._jobs is None:
return
self.store_path.parent.mkdir(parents=True, exist_ok=True)
_secure_dir(self.store_path.parent)
fd, tmp_name = tempfile.mkstemp(prefix=".jobs-", suffix=".json", dir=str(self.store_path.parent))
tmp_path = Path(tmp_name)
try:
with os.fdopen(fd, "w", encoding="utf-8") as handle:
json.dump(
{"version": 1, "updated_at_ms": _now_ms(), "jobs": [job.to_dict() for job in self._jobs]},
handle,
ensure_ascii=False,
indent=2,
sort_keys=True,
)
handle.write("\n")
handle.flush()
os.fsync(handle.fileno())
os.replace(tmp_path, self.store_path)
_secure_file(self.store_path)
finally:
if tmp_path.exists():
tmp_path.unlink()
def parse_duration(value: str) -> int:
match = _DURATION_RE.match(value.strip())
if not match:
raise ValueError("duration must look like 30s, 15m, 2h, or 1d")
amount = int(match.group(1))
unit = match.group(2).lower()[0]
multipliers = {"s": 1, "m": 60, "h": 3600, "d": 86400}
return amount * multipliers[unit]
def parse_schedule(value: str) -> CronSchedule:
raw = value.strip()
lowered = raw.lower()
if lowered.startswith("every "):
seconds = parse_duration(raw[6:].strip())
return CronSchedule(kind="every", every_ms=seconds * 1000, display=f"every {seconds}s")
parts = raw.split()
if len(parts) in {5, 6} and all(_CRON_FIELD_RE.match(item) for item in parts[:5]):
schedule = CronSchedule(kind="cron", expr=raw, display=raw)
validate_schedule(schedule)
return schedule
if "T" in raw or re.match(r"^\d{4}-\d{2}-\d{2}", raw):
dt = _parse_datetime(raw)
return CronSchedule(kind="at", at_ms=int(dt.timestamp() * 1000), display=f"once at {dt:%Y-%m-%d %H:%M}")
seconds = parse_duration(raw)
at_ms = _now_ms() + seconds * 1000
return CronSchedule(kind="at", at_ms=at_ms, display=f"once in {raw}")
def schedule_from_api(payload: dict[str, Any]) -> CronSchedule:
if payload.get("schedule"):
return parse_schedule(str(payload["schedule"]))
if payload.get("every_seconds") not in (None, ""):
seconds = int(payload["every_seconds"])
if seconds <= 0:
raise ValueError("every_seconds must be greater than 0")
return CronSchedule(kind="every", every_ms=seconds * 1000, display=f"every {seconds}s")
if payload.get("cron_expr"):
expr = str(payload["cron_expr"]).strip()
schedule = CronSchedule(kind="cron", expr=expr, tz=_optional_str(payload.get("tz")), display=expr)
validate_schedule(schedule)
return schedule
if payload.get("at_iso"):
dt = _parse_datetime(str(payload["at_iso"]))
return CronSchedule(kind="at", at_ms=int(dt.timestamp() * 1000), display=f"once at {dt:%Y-%m-%d %H:%M}")
raise ValueError("one of schedule, every_seconds, cron_expr, or at_iso is required")
def validate_schedule(schedule: CronSchedule) -> None:
if schedule.kind == "every":
if not schedule.every_ms or schedule.every_ms <= 0:
raise ValueError("every schedule requires a positive every_ms")
return
if schedule.kind == "at":
if not schedule.at_ms:
raise ValueError("at schedule requires at_ms")
return
if schedule.kind == "cron":
if not schedule.expr:
raise ValueError("cron schedule requires expr")
if schedule.tz:
try:
ZoneInfo(schedule.tz)
except Exception as exc:
raise ValueError(f"unknown timezone: {schedule.tz}") from exc
if croniter is None:
raise ValueError("cron schedules require the croniter package")
try:
croniter(schedule.expr, _aware_now(schedule.tz))
except Exception as exc:
raise ValueError(f"invalid cron expression: {schedule.expr}") from exc
return
raise ValueError(f"unknown schedule kind: {schedule.kind}")
def compute_next_run(
schedule: CronSchedule,
*,
now_ms: int | None = None,
last_run_at_ms: int | None = None,
) -> int | None:
now_ms = now_ms or _now_ms()
if schedule.kind == "at":
return schedule.at_ms if schedule.at_ms and schedule.at_ms > now_ms else None
if schedule.kind == "every":
if not schedule.every_ms or schedule.every_ms <= 0:
return None
base = last_run_at_ms or now_ms
next_run = base + schedule.every_ms
while next_run <= now_ms:
next_run += schedule.every_ms
return next_run
if schedule.kind == "cron" and schedule.expr and croniter is not None:
base = datetime.fromtimestamp((last_run_at_ms or now_ms) / 1000, tz=_timezone(schedule.tz))
return int(croniter(schedule.expr, base).get_next(datetime).timestamp() * 1000)
return None
def _parse_datetime(value: str) -> datetime:
dt = datetime.fromisoformat(value.replace("Z", "+00:00"))
if dt.tzinfo is None:
return dt.astimezone()
return dt
def _aware_now(tz_name: str | None = None) -> datetime:
return datetime.now(tz=_timezone(tz_name))
def _timezone(tz_name: str | None = None) -> Any:
if tz_name:
return ZoneInfo(tz_name)
return datetime.now().astimezone().tzinfo
def _now_ms() -> int:
return int(time.time() * 1000)
def _secure_dir(path: Path) -> None:
try:
os.chmod(path, 0o700)
except OSError:
pass
def _secure_file(path: Path) -> None:
try:
os.chmod(path, 0o600)
except OSError:
pass
def _optional_str(value: Any) -> str | None:
if value in (None, ""):
return None
return str(value).strip() or None