Files
beaver_project/app-instance/backend/beaver/services/cron_service.py
steven_li 3b0af173cc refactor(beaver): 移除Hermes相关引用和迁移代码,完善Beaver后端主线实现
移除了所有Hermes相关的命名引用,包括:
- 从.gitignore中清理相关构建缓存文件
- 将README中的beaver-home路径配置更新
- 完善backend/README.md文档说明Beaver后端主线实现
- 移除Hermes风格的相关注释和兼容性代码
- 清理nanobot环境变量兼容性处理
- 删除技能迁移和服务迁移相关功能代码
- 更新测试用例中相关命名和函数名

BREAKING CHANGE: 移除了Hermes迁移相关API和CLI命令,不再支持nanobot环境变量兼容性
2026-05-14 17:20:32 +08:00

508 lines
18 KiB
Python

"""Cron scheduling service for Beaver scheduled Tasks."""
from __future__ import annotations
import asyncio
import inspect
import json
import os
import re
import tempfile
import threading
import time
from collections.abc import Awaitable, Callable
from datetime import datetime, timedelta
from pathlib import Path
from typing import Any
from uuid import uuid4
from zoneinfo import ZoneInfo
from beaver.foundation.models import CronExecutionResult, CronJob, CronPayload, CronRunRecord, CronSchedule
try: # pragma: no cover - exercised through cron schedule tests when installed
from croniter import croniter
except ModuleNotFoundError: # pragma: no cover - defensive dependency guard
croniter = None # type: ignore[assignment]
CronCallback = Callable[..., Awaitable[CronExecutionResult | str | None]]
_DURATION_RE = re.compile(
r"^(\d+)\s*(s|sec|secs|second|seconds|m|min|mins|minute|minutes|h|hr|hrs|hour|hours|d|day|days)$",
re.IGNORECASE,
)
_CRON_FIELD_RE = re.compile(r"^[\d\*\?,\-/LW#]+$", re.IGNORECASE)
_MAX_HISTORY = 20
class CronService:
"""Persistent single-timer scheduler.
Jobs are stored as JSON and ticked safely in the background. The callback
routes agent work through Task mode so every scheduled trigger is visible as
a normal Task.
"""
def __init__(self, store_path: str | Path, *, on_job: CronCallback | None = None) -> None:
self.store_path = Path(store_path)
self.on_job = on_job
self._jobs: list[CronJob] | None = None
self._lock = threading.Lock()
self._running = False
self._timer_task: asyncio.Task[None] | None = None
async def start(self) -> None:
self._running = True
self._load_jobs()
self._recompute_next_runs()
self._save_jobs()
self._arm_timer()
def stop(self) -> None:
self._running = False
if self._timer_task is not None:
self._timer_task.cancel()
self._timer_task = None
def status(self) -> dict[str, Any]:
jobs = self.list_jobs(include_disabled=True)
return {
"enabled": self._running,
"jobs": len(jobs),
"next_wake_at_ms": self._next_wake_ms(),
}
def list_jobs(self, *, include_disabled: bool = False) -> list[CronJob]:
jobs = list(self._load_jobs())
if not include_disabled:
jobs = [job for job in jobs if job.enabled]
return sorted(jobs, key=lambda job: job.next_run_at_ms or 9_999_999_999_999)
def get_job(self, job_id: str) -> CronJob | None:
for job in self._load_jobs():
if job.id == job_id:
return job
return None
def add_job(
self,
*,
name: str,
message: str,
schedule: CronSchedule,
session_key: str | None = None,
payload_kind: str = "agent_turn",
mode: str = "notification",
requires_followup: bool = False,
deliver: bool = False,
channel: str | None = None,
to: str | None = None,
delete_after_run: bool = False,
) -> CronJob:
cleaned_name = name.strip() or message[:50].strip() or "scheduled task"
cleaned_message = message.strip()
if not cleaned_message:
raise ValueError("message is required")
validate_schedule(schedule)
now = _now_ms()
job = CronJob(
id=uuid4().hex[:12],
name=cleaned_name,
enabled=True,
schedule=schedule,
payload=CronPayload(
kind=payload_kind if payload_kind in {"agent_turn", "system_event"} else "agent_turn", # type: ignore[arg-type]
mode="task" if mode == "task" else "notification",
message=cleaned_message,
session_key=session_key,
requires_followup=requires_followup,
deliver=deliver,
channel=channel,
to=to,
),
next_run_at_ms=compute_next_run(schedule, now_ms=now),
created_at_ms=now,
updated_at_ms=now,
delete_after_run=delete_after_run,
)
with self._lock:
jobs = self._load_jobs_unlocked()
jobs.append(job)
self._jobs = jobs
self._save_jobs_unlocked()
self._arm_timer()
return job
def update_enabled(self, job_id: str, enabled: bool) -> CronJob | None:
with self._lock:
jobs = self._load_jobs_unlocked()
for job in jobs:
if job.id != job_id:
continue
job.enabled = bool(enabled)
job.updated_at_ms = _now_ms()
job.next_run_at_ms = compute_next_run(job.schedule) if job.enabled else None
self._save_jobs_unlocked()
self._arm_timer()
return job
return None
def remove_job(self, job_id: str) -> bool:
with self._lock:
jobs = self._load_jobs_unlocked()
next_jobs = [job for job in jobs if job.id != job_id]
if len(next_jobs) == len(jobs):
return False
self._jobs = next_jobs
self._save_jobs_unlocked()
self._arm_timer()
return True
async def run_job(self, job_id: str, *, force: bool = False) -> bool:
job = self.get_job(job_id)
if job is None:
return False
if not force and not job.enabled:
return False
await self._execute_job(job)
self._save_jobs()
self._arm_timer()
return True
def list_runs(self) -> list[tuple[CronJob, CronRunRecord]]:
runs: list[tuple[CronJob, CronRunRecord]] = []
for job in self.list_jobs(include_disabled=True):
runs.extend((job, run) for run in job.history)
return sorted(runs, key=lambda item: item[1].started_at_ms, reverse=True)
def get_run(self, scheduled_run_id: str) -> tuple[CronJob, CronRunRecord] | None:
for job, run in self.list_runs():
if run.scheduled_run_id == scheduled_run_id:
return job, run
return None
def mark_run_engaged(
self,
scheduled_run_id: str,
*,
task_id: str,
intent: str,
) -> tuple[CronJob, CronRunRecord] | None:
with self._lock:
jobs = self._load_jobs_unlocked()
for job in jobs:
for run in job.history:
if run.scheduled_run_id != scheduled_run_id:
continue
run.engaged = True
run.engaged_at_ms = _now_ms()
run.engage_intent = intent
run.task_id = task_id
job.updated_at_ms = _now_ms()
self._save_jobs_unlocked()
return job, run
return None
def update_job_message(self, job_id: str, message: str) -> CronJob | None:
cleaned = message.strip()
if not cleaned:
raise ValueError("message is required")
with self._lock:
jobs = self._load_jobs_unlocked()
for job in jobs:
if job.id != job_id:
continue
job.payload.message = cleaned
job.updated_at_ms = _now_ms()
self._save_jobs_unlocked()
return job
return None
async def _on_timer(self) -> None:
now = _now_ms()
due_jobs = [
job
for job in self.list_jobs(include_disabled=False)
if job.next_run_at_ms is not None and job.next_run_at_ms <= now
]
for job in due_jobs:
await self._execute_job(job)
self._save_jobs()
self._arm_timer()
async def _execute_job(self, job: CronJob) -> None:
start_ms = _now_ms()
run_record = CronRunRecord(started_at_ms=start_ms, mode=job.payload.mode)
try:
result = CronExecutionResult(mode=job.payload.mode)
if self.on_job is not None:
raw = await self._call_on_job(job, run_record)
result = raw if isinstance(raw, CronExecutionResult) else CronExecutionResult(response=raw, mode=job.payload.mode)
run_record.status = "ok"
run_record.mode = result.mode
run_record.output = result.response
run_record.notification_session_id = result.notification_session_id
run_record.task_id = result.task_id
run_record.run_id = result.run_id
job.last_status = "ok"
job.last_error = None
except Exception as exc:
run_record.status = "error"
run_record.error = str(exc)
job.last_status = "error"
job.last_error = str(exc)
finally:
finish_ms = _now_ms()
run_record.finished_at_ms = finish_ms
job.last_run_at_ms = start_ms
job.updated_at_ms = finish_ms
job.history.append(run_record)
job.history = job.history[-_MAX_HISTORY:]
if job.schedule.kind == "at":
if job.delete_after_run:
with self._lock:
self._jobs = [item for item in self._load_jobs_unlocked() if item.id != job.id]
return
job.enabled = False
job.next_run_at_ms = None
return
job.next_run_at_ms = compute_next_run(job.schedule, now_ms=_now_ms(), last_run_at_ms=job.last_run_at_ms)
async def _call_on_job(self, job: CronJob, run_record: CronRunRecord) -> CronExecutionResult | str | None:
if self.on_job is None:
return None
try:
params = inspect.signature(self.on_job).parameters
except (TypeError, ValueError):
params = {}
if len(params) >= 2:
return await self.on_job(job, run_record)
return await self.on_job(job)
def _recompute_next_runs(self) -> None:
now = _now_ms()
changed = False
for job in self._load_jobs():
if not job.enabled:
continue
if job.next_run_at_ms is None or job.next_run_at_ms < now - 7_200_000:
job.next_run_at_ms = compute_next_run(job.schedule, now_ms=now, last_run_at_ms=job.last_run_at_ms)
changed = True
if changed:
self._save_jobs()
def _next_wake_ms(self) -> int | None:
candidates = [
job.next_run_at_ms
for job in self._load_jobs()
if job.enabled and job.next_run_at_ms is not None
]
return min(candidates) if candidates else None
def _arm_timer(self) -> None:
if self._timer_task is not None:
self._timer_task.cancel()
self._timer_task = None
if not self._running:
return
next_wake = self._next_wake_ms()
if next_wake is None:
return
async def tick() -> None:
await asyncio.sleep(max(0, next_wake - _now_ms()) / 1000)
if self._running:
await self._on_timer()
self._timer_task = asyncio.create_task(tick())
def _load_jobs(self) -> list[CronJob]:
with self._lock:
return list(self._load_jobs_unlocked())
def _load_jobs_unlocked(self) -> list[CronJob]:
if self._jobs is not None:
return self._jobs
self.store_path.parent.mkdir(parents=True, exist_ok=True)
_secure_dir(self.store_path.parent)
if not self.store_path.exists():
self._jobs = []
return self._jobs
payload = json.loads(self.store_path.read_text(encoding="utf-8"))
raw_jobs = payload.get("jobs") if isinstance(payload, dict) else []
self._jobs = [CronJob.from_dict(item) for item in raw_jobs or [] if isinstance(item, dict)]
return self._jobs
def _save_jobs(self) -> None:
with self._lock:
self._save_jobs_unlocked()
def _save_jobs_unlocked(self) -> None:
if self._jobs is None:
return
self.store_path.parent.mkdir(parents=True, exist_ok=True)
_secure_dir(self.store_path.parent)
fd, tmp_name = tempfile.mkstemp(prefix=".jobs-", suffix=".json", dir=str(self.store_path.parent))
tmp_path = Path(tmp_name)
try:
with os.fdopen(fd, "w", encoding="utf-8") as handle:
json.dump(
{"version": 1, "updated_at_ms": _now_ms(), "jobs": [job.to_dict() for job in self._jobs]},
handle,
ensure_ascii=False,
indent=2,
sort_keys=True,
)
handle.write("\n")
handle.flush()
os.fsync(handle.fileno())
os.replace(tmp_path, self.store_path)
_secure_file(self.store_path)
finally:
if tmp_path.exists():
tmp_path.unlink()
def parse_duration(value: str) -> int:
match = _DURATION_RE.match(value.strip())
if not match:
raise ValueError("duration must look like 30s, 15m, 2h, or 1d")
amount = int(match.group(1))
unit = match.group(2).lower()[0]
multipliers = {"s": 1, "m": 60, "h": 3600, "d": 86400}
return amount * multipliers[unit]
def parse_schedule(value: str) -> CronSchedule:
raw = value.strip()
lowered = raw.lower()
if lowered.startswith("every "):
seconds = parse_duration(raw[6:].strip())
return CronSchedule(kind="every", every_ms=seconds * 1000, display=f"every {seconds}s")
parts = raw.split()
if len(parts) in {5, 6} and all(_CRON_FIELD_RE.match(item) for item in parts[:5]):
schedule = CronSchedule(kind="cron", expr=raw, display=raw)
validate_schedule(schedule)
return schedule
if "T" in raw or re.match(r"^\d{4}-\d{2}-\d{2}", raw):
dt = _parse_datetime(raw)
return CronSchedule(kind="at", at_ms=int(dt.timestamp() * 1000), display=f"once at {dt:%Y-%m-%d %H:%M}")
seconds = parse_duration(raw)
at_ms = _now_ms() + seconds * 1000
return CronSchedule(kind="at", at_ms=at_ms, display=f"once in {raw}")
def schedule_from_api(payload: dict[str, Any]) -> CronSchedule:
if payload.get("schedule"):
return parse_schedule(str(payload["schedule"]))
if payload.get("every_seconds") not in (None, ""):
seconds = int(payload["every_seconds"])
if seconds <= 0:
raise ValueError("every_seconds must be greater than 0")
return CronSchedule(kind="every", every_ms=seconds * 1000, display=f"every {seconds}s")
if payload.get("cron_expr"):
expr = str(payload["cron_expr"]).strip()
schedule = CronSchedule(kind="cron", expr=expr, tz=_optional_str(payload.get("tz")), display=expr)
validate_schedule(schedule)
return schedule
if payload.get("at_iso"):
dt = _parse_datetime(str(payload["at_iso"]))
return CronSchedule(kind="at", at_ms=int(dt.timestamp() * 1000), display=f"once at {dt:%Y-%m-%d %H:%M}")
raise ValueError("one of schedule, every_seconds, cron_expr, or at_iso is required")
def validate_schedule(schedule: CronSchedule) -> None:
if schedule.kind == "every":
if not schedule.every_ms or schedule.every_ms <= 0:
raise ValueError("every schedule requires a positive every_ms")
return
if schedule.kind == "at":
if not schedule.at_ms:
raise ValueError("at schedule requires at_ms")
return
if schedule.kind == "cron":
if not schedule.expr:
raise ValueError("cron schedule requires expr")
if schedule.tz:
try:
ZoneInfo(schedule.tz)
except Exception as exc:
raise ValueError(f"unknown timezone: {schedule.tz}") from exc
if croniter is None:
raise ValueError("cron schedules require the croniter package")
try:
croniter(schedule.expr, _aware_now(schedule.tz))
except Exception as exc:
raise ValueError(f"invalid cron expression: {schedule.expr}") from exc
return
raise ValueError(f"unknown schedule kind: {schedule.kind}")
def compute_next_run(
schedule: CronSchedule,
*,
now_ms: int | None = None,
last_run_at_ms: int | None = None,
) -> int | None:
now_ms = now_ms or _now_ms()
if schedule.kind == "at":
return schedule.at_ms if schedule.at_ms and schedule.at_ms > now_ms else None
if schedule.kind == "every":
if not schedule.every_ms or schedule.every_ms <= 0:
return None
base = last_run_at_ms or now_ms
next_run = base + schedule.every_ms
while next_run <= now_ms:
next_run += schedule.every_ms
return next_run
if schedule.kind == "cron" and schedule.expr and croniter is not None:
base = datetime.fromtimestamp((last_run_at_ms or now_ms) / 1000, tz=_timezone(schedule.tz))
return int(croniter(schedule.expr, base).get_next(datetime).timestamp() * 1000)
return None
def _parse_datetime(value: str) -> datetime:
dt = datetime.fromisoformat(value.replace("Z", "+00:00"))
if dt.tzinfo is None:
return dt.astimezone()
return dt
def _aware_now(tz_name: str | None = None) -> datetime:
return datetime.now(tz=_timezone(tz_name))
def _timezone(tz_name: str | None = None) -> Any:
if tz_name:
return ZoneInfo(tz_name)
return datetime.now().astimezone().tzinfo
def _now_ms() -> int:
return int(time.time() * 1000)
def _secure_dir(path: Path) -> None:
try:
os.chmod(path, 0o700)
except OSError:
pass
def _secure_file(path: Path) -> None:
try:
os.chmod(path, 0o600)
except OSError:
pass
def _optional_str(value: Any) -> str | None:
if value in (None, ""):
return None
return str(value).strip() or None