```
feat(learning): 添加技能学习候选者合成锁定机制 添加了 DraftSynthesisInProgress 和 DraftHasNoChanges 异常来处理并发场景, 确保同一技能学习候选者的合成过程不会重复执行。实现了 claim_learning_candidate_for_synthesis 方法来原子性地锁定候选者进行合成。 fix(web): 为技能草案创建端点添加适当的HTTP状态码 当草案没有变化或正在合成时,现在正确返回409状态码而不是内部错误。 feat(skills): 实现技能修订内容比较以检测无变化情况 添加了 _is_noop_revision 方法来比较基础技能和提议的修订, 如果内容没有实际变化则抛出 NoDraftChanges 异常。 refactor(process): 修复任务证据记录后根运行状态更新逻辑 将任务证据记录事件后的状态从 waiting 更改为 done,并设置 finished_at 时间戳。 feat(tools): 防止在同一运行中重复执行外部写入操作 为邮件发送、日历创建等外部写入工具添加去重机制,避免重复的外部操作。 test: 添加技能学习和工具执行的单元测试 增加测试用例验证并发草案合成、重复外部写入抑制和无变化修订检测等功能。 ```
This commit is contained in:
@ -52,7 +52,13 @@ from beaver.services.user_file_resolver import (
|
||||
)
|
||||
from beaver.skills.authoring import canonical_skill_format_instructions, ensure_canonical_skill_body, normalize_skill_frontmatter
|
||||
from beaver.skills.authoring.format import parse_skill_rewrite_json
|
||||
from beaver.skills.learning import SkillLearningService, SkillLearningWorker, SkillLearningWorkerConfig
|
||||
from beaver.skills.learning import (
|
||||
DraftHasNoChanges,
|
||||
DraftSynthesisInProgress,
|
||||
SkillLearningService,
|
||||
SkillLearningWorker,
|
||||
SkillLearningWorkerConfig,
|
||||
)
|
||||
from beaver.skills.learning.replay import ReplayRunner
|
||||
from beaver.skills.catalog.utils import extract_required_tool_names, parse_frontmatter
|
||||
|
||||
@ -2236,6 +2242,10 @@ def create_app(
|
||||
candidate_id,
|
||||
provider_bundle=provider_bundle,
|
||||
)
|
||||
except DraftHasNoChanges as exc:
|
||||
raise HTTPException(status_code=409, detail=str(exc)) from exc
|
||||
except DraftSynthesisInProgress as exc:
|
||||
raise HTTPException(status_code=409, detail=str(exc)) from exc
|
||||
except ValueError as exc:
|
||||
raise HTTPException(status_code=404, detail=str(exc)) from exc
|
||||
return _skill_draft_payload(loaded, draft.skill_name, draft.draft_id)
|
||||
@ -2251,6 +2261,10 @@ def create_app(
|
||||
candidate_id,
|
||||
provider_bundle=provider_bundle,
|
||||
)
|
||||
except DraftHasNoChanges as exc:
|
||||
raise HTTPException(status_code=409, detail=str(exc)) from exc
|
||||
except DraftSynthesisInProgress as exc:
|
||||
raise HTTPException(status_code=409, detail=str(exc)) from exc
|
||||
except ValueError as exc:
|
||||
raise HTTPException(status_code=404, detail=str(exc)) from exc
|
||||
return _skill_draft_payload(loaded, draft.skill_name, draft.draft_id)
|
||||
|
||||
@ -114,6 +114,52 @@ class SkillLearningStore:
|
||||
)
|
||||
return updated
|
||||
|
||||
def claim_learning_candidate_for_synthesis(
|
||||
self,
|
||||
candidate_id: str,
|
||||
*,
|
||||
force: bool = False,
|
||||
) -> SkillLearningCandidate | None:
|
||||
"""Atomically claim a candidate before the expensive draft synthesis step."""
|
||||
|
||||
with self._locked():
|
||||
candidates = self.list_learning_candidates()
|
||||
claimed: SkillLearningCandidate | None = None
|
||||
for index, candidate in enumerate(candidates):
|
||||
if candidate.candidate_id != candidate_id:
|
||||
continue
|
||||
if candidate.status in {"queued", "synthesizing"}:
|
||||
return None
|
||||
if not force and candidate.draft_skill_name and candidate.draft_id:
|
||||
return None
|
||||
payload = candidate.to_dict()
|
||||
payload.update(
|
||||
{
|
||||
"status": "synthesizing",
|
||||
"last_error": None,
|
||||
"updated_at": _utc_now(),
|
||||
}
|
||||
)
|
||||
claimed = SkillLearningCandidate.from_dict(payload)
|
||||
candidates[index] = claimed
|
||||
break
|
||||
if claimed is None:
|
||||
return None
|
||||
self.candidates_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
self.candidates_path.write_text(
|
||||
"".join(
|
||||
json.dumps(candidate.to_dict(), ensure_ascii=False, sort_keys=True) + "\n"
|
||||
for candidate in candidates
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
self.append_audit_event(
|
||||
candidate_id,
|
||||
"draft_synthesis_started",
|
||||
{"status": "synthesizing", "force": force},
|
||||
)
|
||||
return claimed
|
||||
|
||||
def list_learning_candidates(self, status: str | None = None) -> list[SkillLearningCandidate]:
|
||||
results: list[SkillLearningCandidate] = []
|
||||
for payload in self._read_jsonl(self.candidates_path):
|
||||
|
||||
@ -351,8 +351,8 @@ class SessionProcessProjector:
|
||||
)
|
||||
|
||||
elif record.event_type == "task_evidence_recorded":
|
||||
root["status"] = "waiting"
|
||||
root["finished_at"] = None
|
||||
root["status"] = "done"
|
||||
root["finished_at"] = created_at
|
||||
add_event(
|
||||
event_id=_event_id(record, "evidence"),
|
||||
run_id=record.run_id or root_run_id,
|
||||
|
||||
@ -9,7 +9,7 @@ from .missing_skill import (
|
||||
MissingSkillDraftResult,
|
||||
MissingSkillSynthesizer,
|
||||
)
|
||||
from .pipeline import SkillLearningPipelineService
|
||||
from .pipeline import DraftHasNoChanges, DraftSynthesisInProgress, SkillLearningPipelineService
|
||||
from .preservation import check_preservation
|
||||
from .replay import ReplayArmRequest, ReplayRunner, ReplayToolExecutor, ReplayToolPolicy, classify_tool_mode
|
||||
from .service import RunReceiptContext, SkillLearningService
|
||||
@ -27,6 +27,8 @@ __all__ = [
|
||||
"MissingSkillDraftResult",
|
||||
"MissingSkillSynthesizer",
|
||||
"RunReceiptContext",
|
||||
"DraftHasNoChanges",
|
||||
"DraftSynthesisInProgress",
|
||||
"SkillLearningPipelineService",
|
||||
"check_preservation",
|
||||
"ReplayToolExecutor",
|
||||
|
||||
@ -9,7 +9,7 @@ from beaver.memory.skills import SkillDraftEvalReport, SkillDraftSafetyReport, S
|
||||
from beaver.skills.drafts import DraftService
|
||||
from beaver.skills.learning.eval import SkillDraftEvaluator
|
||||
from beaver.skills.learning.replay import ReplayRunner
|
||||
from beaver.skills.learning.service import SkillLearningService
|
||||
from beaver.skills.learning.service import NoDraftChanges, SkillLearningService
|
||||
from beaver.skills.learning.safety import SkillDraftSafetyChecker
|
||||
from beaver.skills.publisher import SkillPublisher
|
||||
from beaver.skills.reviews import ReviewService
|
||||
@ -22,6 +22,14 @@ _REJECTABLE_DRAFT_STATUSES = {
|
||||
}
|
||||
|
||||
|
||||
class DraftSynthesisInProgress(RuntimeError):
|
||||
"""Raised when another request already claimed the candidate for synthesis."""
|
||||
|
||||
|
||||
class DraftHasNoChanges(RuntimeError):
|
||||
"""Raised when synthesis produced no effective changes from the base skill."""
|
||||
|
||||
|
||||
class SkillLearningPipelineService:
|
||||
"""Coordinates candidate -> draft -> review -> publish lifecycle."""
|
||||
|
||||
@ -60,8 +68,23 @@ class SkillLearningPipelineService:
|
||||
candidate_id: str,
|
||||
*,
|
||||
provider_bundle: ProviderBundle,
|
||||
force: bool = False,
|
||||
) -> SkillDraft:
|
||||
draft = await self.learning_service.synthesize_draft(candidate_id, provider_bundle)
|
||||
if not force:
|
||||
existing = self._draft_for_candidate(candidate_id)
|
||||
if existing is not None:
|
||||
return existing
|
||||
claimed = self.learning_store.claim_learning_candidate_for_synthesis(candidate_id, force=force)
|
||||
if claimed is None:
|
||||
existing = self._draft_for_candidate(candidate_id)
|
||||
if existing is not None:
|
||||
return existing
|
||||
raise DraftSynthesisInProgress(f"Draft synthesis is already in progress for candidate: {candidate_id}")
|
||||
try:
|
||||
draft = await self.learning_service.synthesize_draft(candidate_id, provider_bundle)
|
||||
except NoDraftChanges as exc:
|
||||
self.mark_candidate_superseded(candidate_id, str(exc))
|
||||
raise DraftHasNoChanges(str(exc)) from exc
|
||||
self.mark_draft_synthesized(candidate_id, draft)
|
||||
return draft
|
||||
|
||||
@ -71,13 +94,7 @@ class SkillLearningPipelineService:
|
||||
*,
|
||||
provider_bundle: ProviderBundle,
|
||||
) -> SkillDraft:
|
||||
self.learning_store.transition_learning_candidate(
|
||||
candidate_id,
|
||||
"synthesizing",
|
||||
event_type="draft_synthesis_started",
|
||||
last_error=None,
|
||||
)
|
||||
return await self.synthesize_draft(candidate_id, provider_bundle=provider_bundle)
|
||||
return await self.synthesize_draft(candidate_id, provider_bundle=provider_bundle, force=True)
|
||||
|
||||
def mark_candidate_queued(self, candidate_id: str) -> SkillLearningCandidate:
|
||||
return self._require_updated(
|
||||
@ -162,6 +179,12 @@ class SkillLearningPipelineService:
|
||||
raise ValueError(f"Draft not found: {skill_name}/{draft_id}")
|
||||
return draft
|
||||
|
||||
def _draft_for_candidate(self, candidate_id: str) -> SkillDraft | None:
|
||||
candidate = self.get_candidate(candidate_id)
|
||||
if not candidate.draft_skill_name or not candidate.draft_id:
|
||||
return None
|
||||
return self.draft_service.get_draft(candidate.draft_skill_name, candidate.draft_id)
|
||||
|
||||
def submit_review(
|
||||
self,
|
||||
skill_name: str,
|
||||
|
||||
@ -20,8 +20,9 @@ from beaver.plugins.tree_merge import merge_supporting_file_trees
|
||||
from beaver.skills.drafts.service import DraftService
|
||||
from beaver.skills.learning.evidence import EvidencePacket, EvidenceSelector
|
||||
from beaver.skills.learning.synthesizer import SkillDraftSynthesizer
|
||||
from beaver.skills.catalog.utils import parse_frontmatter
|
||||
from beaver.skills.catalog.utils import parse_frontmatter, strip_frontmatter
|
||||
from beaver.skills.specs import SkillActivationReceipt
|
||||
from beaver.skills.specs.serialization import normalize_frontmatter
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
@ -30,6 +31,10 @@ class RunReceiptContext:
|
||||
effect_records: list[SkillEffectRecord] = field(default_factory=list)
|
||||
|
||||
|
||||
class NoDraftChanges(ValueError):
|
||||
"""Raised when synthesis produces the same effective skill content as the base version."""
|
||||
|
||||
|
||||
class SkillLearningService:
|
||||
def __init__(
|
||||
self,
|
||||
@ -231,13 +236,18 @@ class SkillLearningService:
|
||||
)
|
||||
target_skill = candidate.related_skill_names[0]
|
||||
base_version = candidate.evidence.get("skill_version")
|
||||
base_skill = self._base_skill_snapshot(target_skill, base_version)
|
||||
payload = await self.synthesizer.synthesize_revision(
|
||||
candidate,
|
||||
packet,
|
||||
provider,
|
||||
model,
|
||||
base_skill=self._base_skill_snapshot(target_skill, base_version),
|
||||
base_skill=base_skill,
|
||||
)
|
||||
if self._is_noop_revision(payload, base_skill):
|
||||
raise NoDraftChanges(
|
||||
f"Synthesis produced no changes for {target_skill}/{base_version or 'current'}"
|
||||
)
|
||||
return self.draft_service.create_revision_draft(
|
||||
skill_name=target_skill,
|
||||
base_version=base_version,
|
||||
@ -340,6 +350,16 @@ class SkillLearningService:
|
||||
"tool_hints": list(loaded.version.tool_hints),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _is_noop_revision(payload: dict[str, Any], base_skill: dict[str, Any] | None) -> bool:
|
||||
if base_skill is None:
|
||||
return False
|
||||
base_frontmatter = normalize_frontmatter(dict(base_skill.get("frontmatter") or {}))
|
||||
proposed_frontmatter = normalize_frontmatter(dict(payload.get("frontmatter") or {}))
|
||||
base_body = _normalize_skill_body(str(base_skill.get("content") or ""))
|
||||
proposed_body = _normalize_skill_body(str(payload.get("content") or ""))
|
||||
return base_frontmatter == proposed_frontmatter and base_body == proposed_body
|
||||
|
||||
def _merged_base_skill_snapshot(self, skill_names: list[str]) -> dict[str, Any] | None:
|
||||
snapshots = [
|
||||
snapshot
|
||||
@ -602,6 +622,10 @@ class SkillLearningService:
|
||||
return parsed.astimezone(timezone.utc)
|
||||
|
||||
|
||||
def _normalize_skill_body(content: str) -> str:
|
||||
return "\n".join(line.rstrip() for line in strip_frontmatter(content).strip().splitlines()).strip()
|
||||
|
||||
|
||||
def _digest_map(root: Path) -> dict[str, dict[str, Any]]:
|
||||
digest = hash_plugin_skill_tree(root)
|
||||
return {
|
||||
|
||||
@ -9,7 +9,7 @@ from typing import Callable
|
||||
|
||||
from beaver.engine.providers import ProviderBundle
|
||||
from beaver.memory.skills import SkillLearningCandidate
|
||||
from beaver.skills.learning.pipeline import SkillLearningPipelineService
|
||||
from beaver.skills.learning.pipeline import DraftHasNoChanges, SkillLearningPipelineService
|
||||
from beaver.skills.learning.replay import ReplayRunner
|
||||
|
||||
|
||||
@ -114,13 +114,13 @@ class SkillLearningWorker:
|
||||
if self._has_active_draft(candidate):
|
||||
self.pipeline.mark_candidate_superseded(candidate.candidate_id, "active draft already exists for this skill")
|
||||
return False
|
||||
self.pipeline.mark_candidate_queued(candidate.candidate_id)
|
||||
self.pipeline.mark_candidate_synthesizing(candidate.candidate_id)
|
||||
draft = await self.pipeline.synthesize_draft(
|
||||
candidate.candidate_id,
|
||||
provider_bundle=self.provider_bundle_factory(),
|
||||
)
|
||||
self.pipeline.mark_draft_synthesized(candidate.candidate_id, draft)
|
||||
try:
|
||||
draft = await self.pipeline.synthesize_draft(
|
||||
candidate.candidate_id,
|
||||
provider_bundle=self.provider_bundle_factory(),
|
||||
)
|
||||
except DraftHasNoChanges:
|
||||
return False
|
||||
safety = self.pipeline.check_safety(draft.skill_name, draft.draft_id)
|
||||
if not safety.passed or safety.risk_level == "critical":
|
||||
return True
|
||||
|
||||
@ -11,6 +11,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
@ -44,7 +45,45 @@ class ToolExecutor:
|
||||
tool_name=tool_name,
|
||||
error="tool_not_found",
|
||||
)
|
||||
return await tool.invoke(arguments or {}, context or ToolContext())
|
||||
normalized_arguments = dict(arguments or {})
|
||||
tool_context = context or ToolContext()
|
||||
write_key = _external_write_key(tool_name, normalized_arguments)
|
||||
if write_key is None:
|
||||
return await tool.invoke(normalized_arguments, tool_context)
|
||||
|
||||
external_writes = _external_write_state(tool_context)
|
||||
previous = external_writes.get(write_key)
|
||||
if previous is not None:
|
||||
previous_content = str(previous.get("content") or "").strip()
|
||||
detail = f" Previous result: {previous_content}" if previous_content else ""
|
||||
return ToolResult(
|
||||
success=True,
|
||||
content=(
|
||||
f"Duplicate external write suppressed for {tool_name}. "
|
||||
"A matching write was already attempted in this run."
|
||||
f"{detail}"
|
||||
),
|
||||
tool_name=tool_name,
|
||||
error="duplicate_external_write_suppressed",
|
||||
raw_output={"duplicate": True, "previous": previous},
|
||||
)
|
||||
|
||||
external_writes[write_key] = {
|
||||
"tool_name": tool_name,
|
||||
"arguments": normalized_arguments,
|
||||
"status": "attempted",
|
||||
"content": "",
|
||||
"error": None,
|
||||
}
|
||||
result = await tool.invoke(normalized_arguments, tool_context)
|
||||
external_writes[write_key] = {
|
||||
"tool_name": tool_name,
|
||||
"arguments": normalized_arguments,
|
||||
"status": "done" if result.success else "error",
|
||||
"content": result.content,
|
||||
"error": result.error,
|
||||
}
|
||||
return result
|
||||
|
||||
async def execute_tool_call(
|
||||
self,
|
||||
@ -115,3 +154,42 @@ class ToolExecutor:
|
||||
if tool_call.get("name"):
|
||||
return str(tool_call["name"])
|
||||
return "unknown"
|
||||
|
||||
|
||||
_EXTERNAL_WRITE_TOOL_TERMS = (
|
||||
"mail_send_email",
|
||||
"mail_reply_to_message",
|
||||
"mail_forward_message",
|
||||
"mail_move_message",
|
||||
"calendar_create_event",
|
||||
"calendar_update_event",
|
||||
)
|
||||
|
||||
|
||||
def _external_write_state(context: ToolContext) -> dict[str, dict[str, Any]]:
|
||||
state = context.metadata.setdefault("external_write_attempts", {})
|
||||
if not isinstance(state, dict):
|
||||
state = {}
|
||||
context.metadata["external_write_attempts"] = state
|
||||
return state
|
||||
|
||||
|
||||
def _external_write_key(tool_name: str, arguments: dict[str, Any]) -> str | None:
|
||||
lowered = tool_name.lower()
|
||||
if not any(term in lowered for term in _EXTERNAL_WRITE_TOOL_TERMS):
|
||||
return None
|
||||
payload = json.dumps(_normalize_for_key(arguments), ensure_ascii=False, sort_keys=True, separators=(",", ":"))
|
||||
digest = hashlib.sha256(payload.encode("utf-8")).hexdigest()
|
||||
return f"{lowered}:{digest}"
|
||||
|
||||
|
||||
def _normalize_for_key(value: Any) -> Any:
|
||||
if isinstance(value, dict):
|
||||
return {str(key): _normalize_for_key(value[key]) for key in sorted(value, key=str)}
|
||||
if isinstance(value, list):
|
||||
return [_normalize_for_key(item) for item in value]
|
||||
if isinstance(value, tuple):
|
||||
return [_normalize_for_key(item) for item in value]
|
||||
if isinstance(value, (str, int, float, bool)) or value is None:
|
||||
return value
|
||||
return str(value)
|
||||
|
||||
@ -0,0 +1,69 @@
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def test_create_instance_writes_default_max_tool_iterations(tmp_path) -> None:
|
||||
app_instance_dir = Path(__file__).resolve().parents[3]
|
||||
fake_bin = tmp_path / "bin"
|
||||
fake_bin.mkdir()
|
||||
docker = fake_bin / "docker"
|
||||
docker.write_text(
|
||||
"""#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
case "${1:-}" in
|
||||
image)
|
||||
[[ "${2:-}" == "inspect" ]]
|
||||
exit 0
|
||||
;;
|
||||
container)
|
||||
[[ "${2:-}" == "inspect" ]]
|
||||
exit 1
|
||||
;;
|
||||
run)
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "unexpected docker command: $*" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
docker.chmod(0o755)
|
||||
|
||||
env = os.environ.copy()
|
||||
env["PATH"] = f"{fake_bin}:{env['PATH']}"
|
||||
instances_root = tmp_path / "instances"
|
||||
result = subprocess.run(
|
||||
[
|
||||
str(app_instance_dir / "create-instance.sh"),
|
||||
"--instance-id",
|
||||
"default-tools",
|
||||
"--auth-username",
|
||||
"steven",
|
||||
"--auth-password",
|
||||
"secret",
|
||||
"--skip-provider-config",
|
||||
"--host-port",
|
||||
"29001",
|
||||
"--instances-root",
|
||||
str(instances_root),
|
||||
"--registry",
|
||||
str(tmp_path / "registry.json"),
|
||||
"--skip-initial-skills",
|
||||
],
|
||||
cwd=app_instance_dir,
|
||||
env=env,
|
||||
text=True,
|
||||
capture_output=True,
|
||||
check=False,
|
||||
)
|
||||
|
||||
assert result.returncode == 0, result.stderr
|
||||
config_path = instances_root / "default-tools" / "beaver-home" / "config.json"
|
||||
config = json.loads(config_path.read_text(encoding="utf-8"))
|
||||
|
||||
assert config["agents"]["defaults"]["maxToolIterations"] == 100
|
||||
@ -363,6 +363,52 @@ def test_process_projection_emits_tool_cards_from_run_messages(tmp_path: Path) -
|
||||
assert tool_result["metadata"]["success"] is True
|
||||
|
||||
|
||||
def test_process_projection_marks_root_done_when_result_is_ready(tmp_path: Path) -> None:
|
||||
session = SessionManager(tmp_path)
|
||||
run_store = RunMemoryStore(tmp_path / "memory" / "runs")
|
||||
run_store.append_run_record(
|
||||
RunRecord(
|
||||
run_id="main-run",
|
||||
session_id="web:test",
|
||||
task_id="task-1",
|
||||
attempt_index=1,
|
||||
task_text="send email",
|
||||
started_at="2026-01-01T00:00:03+00:00",
|
||||
ended_at="2026-01-01T00:00:04+00:00",
|
||||
success=True,
|
||||
finish_reason="stop",
|
||||
)
|
||||
)
|
||||
session.append_message(
|
||||
"web:test",
|
||||
role="system",
|
||||
event_type="task_execution_planned",
|
||||
event_payload={"task_id": "task-1", "attempt_index": 1, "plan_mode": "single", "strategy": "single"},
|
||||
context_visible=False,
|
||||
)
|
||||
session.append_message(
|
||||
"web:test",
|
||||
role="system",
|
||||
event_type="task_synthesis_completed",
|
||||
event_payload={"task_id": "task-1", "attempt_index": 1, "main_run_id": "main-run"},
|
||||
context_visible=False,
|
||||
)
|
||||
session.append_message(
|
||||
"web:test",
|
||||
run_id="main-run",
|
||||
role="system",
|
||||
event_type="task_evidence_recorded",
|
||||
event_payload={"task_id": "task-1", "attempt_index": 1, "evidence_status": "recorded"},
|
||||
context_visible=False,
|
||||
)
|
||||
|
||||
projection = SessionProcessProjector(session, run_store).project("web:test")
|
||||
|
||||
root_run = next(run for run in projection["runs"] if run["run_id"] == "task:task-1:attempt:1")
|
||||
assert root_run["status"] == "done"
|
||||
assert root_run["finished_at"] is not None
|
||||
|
||||
|
||||
def test_process_projection_exposes_ephemeral_guidance_artifacts(tmp_path: Path) -> None:
|
||||
session = SessionManager(tmp_path)
|
||||
run_store = RunMemoryStore(tmp_path / "memory" / "runs")
|
||||
|
||||
@ -5,6 +5,8 @@ import json
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
from beaver.engine.providers.base import LLMProvider, LLMResponse
|
||||
from beaver.engine.providers.factory import ProviderBundle
|
||||
from beaver.engine.session import SessionManager
|
||||
@ -13,6 +15,8 @@ from beaver.memory.skills import SkillLearningCandidate, SkillLearningStore
|
||||
from beaver.skills.authoring.format import is_canonical_skill_body
|
||||
from beaver.skills.drafts import DraftService
|
||||
from beaver.skills.learning import (
|
||||
DraftHasNoChanges,
|
||||
DraftSynthesisInProgress,
|
||||
EvidenceSelector,
|
||||
SkillDraftSynthesizer,
|
||||
SkillLearningPipelineService,
|
||||
@ -22,7 +26,7 @@ from beaver.skills.learning import (
|
||||
)
|
||||
from beaver.skills.publisher import SkillPublisher
|
||||
from beaver.skills.reviews import ReviewService
|
||||
from beaver.skills.specs import SkillSpecStore
|
||||
from beaver.skills.specs import SkillSpecStore, SkillVersion
|
||||
|
||||
|
||||
class JsonProvider(LLMProvider):
|
||||
@ -44,6 +48,20 @@ class JsonProvider(LLMProvider):
|
||||
return "stub"
|
||||
|
||||
|
||||
class BlockingJsonProvider(JsonProvider):
|
||||
def __init__(self, *, started: asyncio.Event, release: asyncio.Event) -> None:
|
||||
super().__init__()
|
||||
self.started = started
|
||||
self.release = release
|
||||
self.calls = 0
|
||||
|
||||
async def chat(self, messages: list[dict], tools: list[dict] | None = None, model: str | None = None, max_tokens: int = 4096, temperature: float = 0.7) -> LLMResponse:
|
||||
self.calls += 1
|
||||
self.started.set()
|
||||
await self.release.wait()
|
||||
return await super().chat(messages, tools=tools, model=model, max_tokens=max_tokens, temperature=temperature)
|
||||
|
||||
|
||||
def _bundle(provider: LLMProvider) -> ProviderBundle:
|
||||
runtime = SimpleNamespace(model="stub", provider_name="stub")
|
||||
return ProviderBundle(main_runtime=runtime, main_provider=provider) # type: ignore[arg-type]
|
||||
@ -120,6 +138,69 @@ def _pipeline(tmp_path: Path) -> SkillLearningPipelineService:
|
||||
)
|
||||
|
||||
|
||||
def _revision_pipeline(tmp_path: Path, content: str, frontmatter: dict) -> SkillLearningPipelineService:
|
||||
spec_store = SkillSpecStore(tmp_path)
|
||||
spec_store.write_skill_version(
|
||||
SkillVersion(
|
||||
skill_name="web-operation",
|
||||
version="v0001",
|
||||
content_hash="hash-v1",
|
||||
summary_hash="summary-v1",
|
||||
created_at="2026-06-01T00:00:00+00:00",
|
||||
created_by="test",
|
||||
change_reason="initial",
|
||||
parent_version=None,
|
||||
review_state="published",
|
||||
frontmatter=frontmatter,
|
||||
summary="web operation",
|
||||
tool_hints=list(frontmatter.get("tools") or []),
|
||||
),
|
||||
content,
|
||||
)
|
||||
spec_store.set_current_version("web-operation", "v0001")
|
||||
run_store = RunMemoryStore(tmp_path / "memory" / "runs")
|
||||
learning_store = SkillLearningStore(tmp_path / "memory" / "skills")
|
||||
run_store.append_run_record(
|
||||
RunRecord(
|
||||
run_id="run-1",
|
||||
session_id="session-1",
|
||||
task_text="check detailed weather",
|
||||
started_at="start",
|
||||
ended_at="end",
|
||||
success=True,
|
||||
finish_reason="stop",
|
||||
)
|
||||
)
|
||||
learning_store.record_learning_candidate(
|
||||
SkillLearningCandidate(
|
||||
candidate_id="candidate-revision",
|
||||
kind="revise_skill",
|
||||
source_run_ids=["run-1"],
|
||||
source_session_ids=["session-1"],
|
||||
related_skill_names=["web-operation"],
|
||||
reason="revise web guidance",
|
||||
evidence={"skill_version": "v0001"},
|
||||
priority=10,
|
||||
confidence=0.9,
|
||||
)
|
||||
)
|
||||
draft_service = DraftService(spec_store)
|
||||
learning_service = SkillLearningService(
|
||||
run_store=run_store,
|
||||
learning_store=learning_store,
|
||||
draft_service=draft_service,
|
||||
evidence_selector=EvidenceSelector(run_store),
|
||||
synthesizer=SkillDraftSynthesizer(),
|
||||
)
|
||||
return SkillLearningPipelineService(
|
||||
learning_store=learning_store,
|
||||
learning_service=learning_service,
|
||||
draft_service=draft_service,
|
||||
review_service=ReviewService(spec_store),
|
||||
publisher=SkillPublisher(spec_store),
|
||||
)
|
||||
|
||||
|
||||
def test_worker_synthesizes_open_candidate_without_publish(tmp_path: Path) -> None:
|
||||
pipeline = _pipeline(tmp_path)
|
||||
worker = SkillLearningWorker(
|
||||
@ -137,6 +218,104 @@ def test_worker_synthesizes_open_candidate_without_publish(tmp_path: Path) -> No
|
||||
assert pipeline.list_drafts(candidate.draft_skill_name)[0].status == "draft"
|
||||
|
||||
|
||||
def test_concurrent_draft_synthesis_is_claimed_once(tmp_path: Path) -> None:
|
||||
pipeline = _pipeline(tmp_path)
|
||||
|
||||
async def scenario():
|
||||
started = asyncio.Event()
|
||||
release = asyncio.Event()
|
||||
provider = BlockingJsonProvider(started=started, release=release)
|
||||
first = asyncio.create_task(
|
||||
pipeline.synthesize_draft("candidate-1", provider_bundle=_bundle(provider))
|
||||
)
|
||||
await asyncio.wait_for(started.wait(), timeout=1)
|
||||
with pytest.raises(DraftSynthesisInProgress):
|
||||
await pipeline.synthesize_draft("candidate-1", provider_bundle=_bundle(JsonProvider()))
|
||||
release.set()
|
||||
return await first, provider
|
||||
|
||||
draft, provider = asyncio.run(scenario())
|
||||
candidate = pipeline.get_candidate("candidate-1")
|
||||
|
||||
assert provider.calls == 1
|
||||
assert candidate.status == "draft_ready"
|
||||
assert candidate.draft_id == draft.draft_id
|
||||
assert len(pipeline.list_drafts(candidate.draft_skill_name)) == 1
|
||||
|
||||
|
||||
def test_existing_draft_synthesis_request_returns_same_draft(tmp_path: Path) -> None:
|
||||
pipeline = _pipeline(tmp_path)
|
||||
first = asyncio.run(pipeline.synthesize_draft("candidate-1", provider_bundle=_bundle(JsonProvider())))
|
||||
second = asyncio.run(pipeline.synthesize_draft("candidate-1", provider_bundle=_bundle(JsonProvider(fail=True))))
|
||||
|
||||
assert second.draft_id == first.draft_id
|
||||
assert len(pipeline.list_drafts(first.skill_name)) == 1
|
||||
|
||||
|
||||
def test_revision_synthesis_with_no_content_changes_supersedes_candidate(tmp_path: Path) -> None:
|
||||
content = (
|
||||
"---\n"
|
||||
"name: web-operation\n"
|
||||
"description: Web search and fetch.\n"
|
||||
"tools:\n"
|
||||
" - web_fetch\n"
|
||||
" - web_search\n"
|
||||
"---\n"
|
||||
"\n"
|
||||
"# Web Operation\n"
|
||||
"\n"
|
||||
"## Overview\n"
|
||||
"\n"
|
||||
"Web search and fetch.\n"
|
||||
"\n"
|
||||
"## When to Use\n"
|
||||
"\n"
|
||||
"- Use when web information is required.\n"
|
||||
"\n"
|
||||
"## Required Tools\n"
|
||||
"\n"
|
||||
"- `web_fetch`\n"
|
||||
"- `web_search`\n"
|
||||
"\n"
|
||||
"## Workflow\n"
|
||||
"\n"
|
||||
"- Use web_search, then web_fetch.\n"
|
||||
"\n"
|
||||
"## Validation\n"
|
||||
"\n"
|
||||
"- Verify sources.\n"
|
||||
"\n"
|
||||
"## Boundaries\n"
|
||||
"\n"
|
||||
"- Stay within the request.\n"
|
||||
"\n"
|
||||
"## Anti-Patterns\n"
|
||||
"\n"
|
||||
"- Do not cite unsupported claims.\n"
|
||||
)
|
||||
frontmatter = {
|
||||
"name": "web-operation",
|
||||
"description": "Web search and fetch.",
|
||||
"tools": ["web_fetch", "web_search"],
|
||||
}
|
||||
pipeline = _revision_pipeline(tmp_path, content, frontmatter)
|
||||
provider = JsonProvider(
|
||||
payload={
|
||||
"frontmatter": frontmatter,
|
||||
"content": content,
|
||||
"change_reason": "No changes are required.",
|
||||
}
|
||||
)
|
||||
|
||||
with pytest.raises(DraftHasNoChanges):
|
||||
asyncio.run(pipeline.synthesize_draft("candidate-revision", provider_bundle=_bundle(provider)))
|
||||
candidate = pipeline.get_candidate("candidate-revision")
|
||||
|
||||
assert candidate.status == "superseded"
|
||||
assert "no changes" in (candidate.last_error or "").lower()
|
||||
assert pipeline.list_drafts("web-operation") == []
|
||||
|
||||
|
||||
def test_worker_evaluates_draft_with_replay_runner_when_available(tmp_path: Path) -> None:
|
||||
pipeline = _pipeline(tmp_path)
|
||||
replay_runner = FakeReplayRunner()
|
||||
|
||||
@ -28,12 +28,14 @@ class DummyTool(BaseTool):
|
||||
toolset=toolset,
|
||||
always_available=always_available,
|
||||
)
|
||||
self.calls: list[dict] = []
|
||||
|
||||
@property
|
||||
def spec(self) -> ToolSpec:
|
||||
return self._spec
|
||||
|
||||
async def invoke(self, arguments: dict, context: ToolContext) -> ToolResult:
|
||||
self.calls.append(dict(arguments))
|
||||
return ToolResult(success=True, content="ok", tool_name=self.spec.name)
|
||||
|
||||
|
||||
@ -198,3 +200,30 @@ def test_tool_executor_parses_object_tool_call_string_arguments() -> None:
|
||||
|
||||
assert name == "echo"
|
||||
assert arguments == {"text": "hello"}
|
||||
|
||||
|
||||
def test_tool_executor_suppresses_duplicate_external_write_in_same_run() -> None:
|
||||
registry = ToolRegistry()
|
||||
send_tool = DummyTool("mcp_outlook_mcp_mail_send_email", toolset="mcp")
|
||||
registry.register(send_tool)
|
||||
executor = ToolExecutor(registry)
|
||||
context = ToolContext(
|
||||
metadata={
|
||||
"task_id": "task-1",
|
||||
"run_id": "run-1",
|
||||
}
|
||||
)
|
||||
arguments = {
|
||||
"to_recipients": ["jay.chen@boardware.com"],
|
||||
"subject": "请回复今天下午的日程安排",
|
||||
"body": "Hi Jay",
|
||||
}
|
||||
|
||||
first = asyncio.run(executor.execute("mcp_outlook_mcp_mail_send_email", arguments, context=context))
|
||||
second = asyncio.run(executor.execute("mcp_outlook_mcp_mail_send_email", dict(arguments), context=context))
|
||||
|
||||
assert first.success is True
|
||||
assert second.success is True
|
||||
assert second.error == "duplicate_external_write_suppressed"
|
||||
assert "Duplicate external write suppressed" in second.content
|
||||
assert len(send_tool.calls) == 1
|
||||
|
||||
Reference in New Issue
Block a user