feat(learning): 添加技能学习候选者合成锁定机制

添加了 DraftSynthesisInProgress 和 DraftHasNoChanges 异常来处理并发场景,
确保同一技能学习候选者的合成过程不会重复执行。实现了 claim_learning_candidate_for_synthesis
方法来原子性地锁定候选者进行合成。

fix(web): 为技能草案创建端点添加适当的HTTP状态码

当草案没有变化或正在合成时,现在正确返回409状态码而不是内部错误。

feat(skills): 实现技能修订内容比较以检测无变化情况

添加了 _is_noop_revision 方法来比较基础技能和提议的修订,
如果内容没有实际变化则抛出 NoDraftChanges 异常。

refactor(process): 修复任务证据记录后根运行状态更新逻辑

将任务证据记录事件后的状态从 waiting 更改为 done,并设置 finished_at 时间戳。

feat(tools): 防止在同一运行中重复执行外部写入操作

为邮件发送、日历创建等外部写入工具添加去重机制,避免重复的外部操作。

test: 添加技能学习和工具执行的单元测试

增加测试用例验证并发草案合成、重复外部写入抑制和无变化修订检测等功能。
```
This commit is contained in:
2026-06-16 15:58:42 +08:00
parent f07ce019fe
commit 83d9d8c200
15 changed files with 615 additions and 29 deletions

View File

@ -0,0 +1,69 @@
import json
import os
import subprocess
from pathlib import Path
def test_create_instance_writes_default_max_tool_iterations(tmp_path) -> None:
app_instance_dir = Path(__file__).resolve().parents[3]
fake_bin = tmp_path / "bin"
fake_bin.mkdir()
docker = fake_bin / "docker"
docker.write_text(
"""#!/usr/bin/env bash
set -euo pipefail
case "${1:-}" in
image)
[[ "${2:-}" == "inspect" ]]
exit 0
;;
container)
[[ "${2:-}" == "inspect" ]]
exit 1
;;
run)
exit 0
;;
*)
echo "unexpected docker command: $*" >&2
exit 1
;;
esac
""",
encoding="utf-8",
)
docker.chmod(0o755)
env = os.environ.copy()
env["PATH"] = f"{fake_bin}:{env['PATH']}"
instances_root = tmp_path / "instances"
result = subprocess.run(
[
str(app_instance_dir / "create-instance.sh"),
"--instance-id",
"default-tools",
"--auth-username",
"steven",
"--auth-password",
"secret",
"--skip-provider-config",
"--host-port",
"29001",
"--instances-root",
str(instances_root),
"--registry",
str(tmp_path / "registry.json"),
"--skip-initial-skills",
],
cwd=app_instance_dir,
env=env,
text=True,
capture_output=True,
check=False,
)
assert result.returncode == 0, result.stderr
config_path = instances_root / "default-tools" / "beaver-home" / "config.json"
config = json.loads(config_path.read_text(encoding="utf-8"))
assert config["agents"]["defaults"]["maxToolIterations"] == 100

View File

@ -363,6 +363,52 @@ def test_process_projection_emits_tool_cards_from_run_messages(tmp_path: Path) -
assert tool_result["metadata"]["success"] is True
def test_process_projection_marks_root_done_when_result_is_ready(tmp_path: Path) -> None:
session = SessionManager(tmp_path)
run_store = RunMemoryStore(tmp_path / "memory" / "runs")
run_store.append_run_record(
RunRecord(
run_id="main-run",
session_id="web:test",
task_id="task-1",
attempt_index=1,
task_text="send email",
started_at="2026-01-01T00:00:03+00:00",
ended_at="2026-01-01T00:00:04+00:00",
success=True,
finish_reason="stop",
)
)
session.append_message(
"web:test",
role="system",
event_type="task_execution_planned",
event_payload={"task_id": "task-1", "attempt_index": 1, "plan_mode": "single", "strategy": "single"},
context_visible=False,
)
session.append_message(
"web:test",
role="system",
event_type="task_synthesis_completed",
event_payload={"task_id": "task-1", "attempt_index": 1, "main_run_id": "main-run"},
context_visible=False,
)
session.append_message(
"web:test",
run_id="main-run",
role="system",
event_type="task_evidence_recorded",
event_payload={"task_id": "task-1", "attempt_index": 1, "evidence_status": "recorded"},
context_visible=False,
)
projection = SessionProcessProjector(session, run_store).project("web:test")
root_run = next(run for run in projection["runs"] if run["run_id"] == "task:task-1:attempt:1")
assert root_run["status"] == "done"
assert root_run["finished_at"] is not None
def test_process_projection_exposes_ephemeral_guidance_artifacts(tmp_path: Path) -> None:
session = SessionManager(tmp_path)
run_store = RunMemoryStore(tmp_path / "memory" / "runs")

View File

@ -5,6 +5,8 @@ import json
from pathlib import Path
from types import SimpleNamespace
import pytest
from beaver.engine.providers.base import LLMProvider, LLMResponse
from beaver.engine.providers.factory import ProviderBundle
from beaver.engine.session import SessionManager
@ -13,6 +15,8 @@ from beaver.memory.skills import SkillLearningCandidate, SkillLearningStore
from beaver.skills.authoring.format import is_canonical_skill_body
from beaver.skills.drafts import DraftService
from beaver.skills.learning import (
DraftHasNoChanges,
DraftSynthesisInProgress,
EvidenceSelector,
SkillDraftSynthesizer,
SkillLearningPipelineService,
@ -22,7 +26,7 @@ from beaver.skills.learning import (
)
from beaver.skills.publisher import SkillPublisher
from beaver.skills.reviews import ReviewService
from beaver.skills.specs import SkillSpecStore
from beaver.skills.specs import SkillSpecStore, SkillVersion
class JsonProvider(LLMProvider):
@ -44,6 +48,20 @@ class JsonProvider(LLMProvider):
return "stub"
class BlockingJsonProvider(JsonProvider):
def __init__(self, *, started: asyncio.Event, release: asyncio.Event) -> None:
super().__init__()
self.started = started
self.release = release
self.calls = 0
async def chat(self, messages: list[dict], tools: list[dict] | None = None, model: str | None = None, max_tokens: int = 4096, temperature: float = 0.7) -> LLMResponse:
self.calls += 1
self.started.set()
await self.release.wait()
return await super().chat(messages, tools=tools, model=model, max_tokens=max_tokens, temperature=temperature)
def _bundle(provider: LLMProvider) -> ProviderBundle:
runtime = SimpleNamespace(model="stub", provider_name="stub")
return ProviderBundle(main_runtime=runtime, main_provider=provider) # type: ignore[arg-type]
@ -120,6 +138,69 @@ def _pipeline(tmp_path: Path) -> SkillLearningPipelineService:
)
def _revision_pipeline(tmp_path: Path, content: str, frontmatter: dict) -> SkillLearningPipelineService:
spec_store = SkillSpecStore(tmp_path)
spec_store.write_skill_version(
SkillVersion(
skill_name="web-operation",
version="v0001",
content_hash="hash-v1",
summary_hash="summary-v1",
created_at="2026-06-01T00:00:00+00:00",
created_by="test",
change_reason="initial",
parent_version=None,
review_state="published",
frontmatter=frontmatter,
summary="web operation",
tool_hints=list(frontmatter.get("tools") or []),
),
content,
)
spec_store.set_current_version("web-operation", "v0001")
run_store = RunMemoryStore(tmp_path / "memory" / "runs")
learning_store = SkillLearningStore(tmp_path / "memory" / "skills")
run_store.append_run_record(
RunRecord(
run_id="run-1",
session_id="session-1",
task_text="check detailed weather",
started_at="start",
ended_at="end",
success=True,
finish_reason="stop",
)
)
learning_store.record_learning_candidate(
SkillLearningCandidate(
candidate_id="candidate-revision",
kind="revise_skill",
source_run_ids=["run-1"],
source_session_ids=["session-1"],
related_skill_names=["web-operation"],
reason="revise web guidance",
evidence={"skill_version": "v0001"},
priority=10,
confidence=0.9,
)
)
draft_service = DraftService(spec_store)
learning_service = SkillLearningService(
run_store=run_store,
learning_store=learning_store,
draft_service=draft_service,
evidence_selector=EvidenceSelector(run_store),
synthesizer=SkillDraftSynthesizer(),
)
return SkillLearningPipelineService(
learning_store=learning_store,
learning_service=learning_service,
draft_service=draft_service,
review_service=ReviewService(spec_store),
publisher=SkillPublisher(spec_store),
)
def test_worker_synthesizes_open_candidate_without_publish(tmp_path: Path) -> None:
pipeline = _pipeline(tmp_path)
worker = SkillLearningWorker(
@ -137,6 +218,104 @@ def test_worker_synthesizes_open_candidate_without_publish(tmp_path: Path) -> No
assert pipeline.list_drafts(candidate.draft_skill_name)[0].status == "draft"
def test_concurrent_draft_synthesis_is_claimed_once(tmp_path: Path) -> None:
pipeline = _pipeline(tmp_path)
async def scenario():
started = asyncio.Event()
release = asyncio.Event()
provider = BlockingJsonProvider(started=started, release=release)
first = asyncio.create_task(
pipeline.synthesize_draft("candidate-1", provider_bundle=_bundle(provider))
)
await asyncio.wait_for(started.wait(), timeout=1)
with pytest.raises(DraftSynthesisInProgress):
await pipeline.synthesize_draft("candidate-1", provider_bundle=_bundle(JsonProvider()))
release.set()
return await first, provider
draft, provider = asyncio.run(scenario())
candidate = pipeline.get_candidate("candidate-1")
assert provider.calls == 1
assert candidate.status == "draft_ready"
assert candidate.draft_id == draft.draft_id
assert len(pipeline.list_drafts(candidate.draft_skill_name)) == 1
def test_existing_draft_synthesis_request_returns_same_draft(tmp_path: Path) -> None:
pipeline = _pipeline(tmp_path)
first = asyncio.run(pipeline.synthesize_draft("candidate-1", provider_bundle=_bundle(JsonProvider())))
second = asyncio.run(pipeline.synthesize_draft("candidate-1", provider_bundle=_bundle(JsonProvider(fail=True))))
assert second.draft_id == first.draft_id
assert len(pipeline.list_drafts(first.skill_name)) == 1
def test_revision_synthesis_with_no_content_changes_supersedes_candidate(tmp_path: Path) -> None:
content = (
"---\n"
"name: web-operation\n"
"description: Web search and fetch.\n"
"tools:\n"
" - web_fetch\n"
" - web_search\n"
"---\n"
"\n"
"# Web Operation\n"
"\n"
"## Overview\n"
"\n"
"Web search and fetch.\n"
"\n"
"## When to Use\n"
"\n"
"- Use when web information is required.\n"
"\n"
"## Required Tools\n"
"\n"
"- `web_fetch`\n"
"- `web_search`\n"
"\n"
"## Workflow\n"
"\n"
"- Use web_search, then web_fetch.\n"
"\n"
"## Validation\n"
"\n"
"- Verify sources.\n"
"\n"
"## Boundaries\n"
"\n"
"- Stay within the request.\n"
"\n"
"## Anti-Patterns\n"
"\n"
"- Do not cite unsupported claims.\n"
)
frontmatter = {
"name": "web-operation",
"description": "Web search and fetch.",
"tools": ["web_fetch", "web_search"],
}
pipeline = _revision_pipeline(tmp_path, content, frontmatter)
provider = JsonProvider(
payload={
"frontmatter": frontmatter,
"content": content,
"change_reason": "No changes are required.",
}
)
with pytest.raises(DraftHasNoChanges):
asyncio.run(pipeline.synthesize_draft("candidate-revision", provider_bundle=_bundle(provider)))
candidate = pipeline.get_candidate("candidate-revision")
assert candidate.status == "superseded"
assert "no changes" in (candidate.last_error or "").lower()
assert pipeline.list_drafts("web-operation") == []
def test_worker_evaluates_draft_with_replay_runner_when_available(tmp_path: Path) -> None:
pipeline = _pipeline(tmp_path)
replay_runner = FakeReplayRunner()

View File

@ -28,12 +28,14 @@ class DummyTool(BaseTool):
toolset=toolset,
always_available=always_available,
)
self.calls: list[dict] = []
@property
def spec(self) -> ToolSpec:
return self._spec
async def invoke(self, arguments: dict, context: ToolContext) -> ToolResult:
self.calls.append(dict(arguments))
return ToolResult(success=True, content="ok", tool_name=self.spec.name)
@ -198,3 +200,30 @@ def test_tool_executor_parses_object_tool_call_string_arguments() -> None:
assert name == "echo"
assert arguments == {"text": "hello"}
def test_tool_executor_suppresses_duplicate_external_write_in_same_run() -> None:
registry = ToolRegistry()
send_tool = DummyTool("mcp_outlook_mcp_mail_send_email", toolset="mcp")
registry.register(send_tool)
executor = ToolExecutor(registry)
context = ToolContext(
metadata={
"task_id": "task-1",
"run_id": "run-1",
}
)
arguments = {
"to_recipients": ["jay.chen@boardware.com"],
"subject": "请回复今天下午的日程安排",
"body": "Hi Jay",
}
first = asyncio.run(executor.execute("mcp_outlook_mcp_mail_send_email", arguments, context=context))
second = asyncio.run(executor.execute("mcp_outlook_mcp_mail_send_email", dict(arguments), context=context))
assert first.success is True
assert second.success is True
assert second.error == "duplicate_external_write_suppressed"
assert "Duplicate external write suppressed" in second.content
assert len(send_tool.calls) == 1