from __future__ import annotations import asyncio from pathlib import Path from types import SimpleNamespace import pytest from beaver.memory.curated.snapshot import MemorySnapshot from beaver.services.memory_service import MemoryService from beaver.coordinator import AgentDescriptor, DelegationEnvelope, ExecutionGraph, ExecutionNode from beaver.coordinator.local import LocalAgentRunner from beaver.engine import AgentLoop, EngineLoader from beaver.engine.context import SkillContext from beaver.engine.providers.base import LLMProvider, LLMResponse from beaver.engine.providers.factory import ProviderBundle from beaver.services.team_service import TeamService from beaver.skills.assembler import SkillAssemblyResult from beaver.skills.drafts import DraftService from beaver.skills.publisher import SkillPublisher from beaver.skills.reviews import ReviewService from beaver.skills.specs import SkillSpecStore class RecordingProvider(LLMProvider): def __init__(self, responses: list[LLMResponse]) -> None: super().__init__() self.responses = list(responses) self.calls: list[list[dict]] = [] async def chat( self, messages: list[dict], tools: list[dict] | None = None, model: str | None = None, max_tokens: int = 4096, temperature: float = 0.7, ) -> LLMResponse: self.calls.append(messages) if not self.responses: raise AssertionError("No stubbed provider responses left") return self.responses.pop(0) def get_default_model(self) -> str: return "stub-model" class StubSkillAssembler: def __init__(self, activated_skills: list[SkillContext] | None = None) -> None: self.activated_skills = list(activated_skills or []) async def assemble(self, **kwargs) -> SkillAssemblyResult: return SkillAssemblyResult(activated_skills=list(self.activated_skills)) class BlockingSkillAssembler: def __init__(self) -> None: self.first_started = asyncio.Event() self.release_first = asyncio.Event() async def assemble(self, **kwargs) -> SkillAssemblyResult: if "task first" in kwargs["task_description"]: self.first_started.set() await self.release_first.wait() return SkillAssemblyResult() class PerRunSnapshotMemoryService(MemoryService): def __init__(self, root: Path) -> None: super().__init__(root) self.count = 0 def capture_snapshot_for_run(self) -> MemorySnapshot: self.count += 1 return MemorySnapshot(memory_block=f"# Memory\n\nsnapshot-{self.count}", user_block=None) def get_snapshot(self) -> MemorySnapshot: return MemorySnapshot(memory_block="# Memory\n\nshared-snapshot", user_block=None) def _bundle(provider: RecordingProvider) -> ProviderBundle: return ProviderBundle( main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"), main_provider=provider, ) def _loop(tmp_path: Path) -> AgentLoop: return AgentLoop( loader=EngineLoader( workspace=tmp_path, skill_assembler=StubSkillAssembler(), ) ) def _loop_with_services( tmp_path: Path, *, skill_assembler, memory_service: MemoryService | None = None, ) -> AgentLoop: return AgentLoop( loader=EngineLoader( workspace=tmp_path, skill_assembler=skill_assembler, memory_service=memory_service, ) ) def _response(content: str, *, finish_reason: str = "stop") -> LLMResponse: return LLMResponse( content=content, finish_reason=finish_reason, provider_name="stub", model="stub-model", ) def _publish_skill(workspace: Path, *, skill_name: str, body: str) -> None: store = SkillSpecStore(workspace) draft = DraftService(store).create_new_skill_draft( skill_name=skill_name, proposed_content=body, proposed_frontmatter={"description": f"{skill_name} test skill", "tools": []}, created_by="tester", reason="test", ) ReviewService(store).approve(skill_name, draft.draft_id, reviewer="tester", notes="ok") SkillPublisher(store).publish(skill_name, draft.draft_id, publisher="tester", notes="publish") def test_local_agent_runner_uses_shared_loop_and_records_parent_task(tmp_path: Path) -> None: loop = _loop(tmp_path) provider = RecordingProvider([_response("sub-agent result")]) envelope = DelegationEnvelope( parent_task_id="task-parent", parent_session_id="session-root", parent_run_id="run-root", agent=AgentDescriptor(name="researcher", role="research"), task="research the requested topic", node_id="research", ) result = asyncio.run(LocalAgentRunner(loop).run(envelope, provider_bundle=_bundle(provider))) loaded = loop.boot() run_record = loaded.run_memory_store.list_runs()[-1] # type: ignore[union-attr] child_session = loaded.session_manager.get_session(result.session_id) # type: ignore[union-attr,arg-type] assert result.success is True assert run_record.task_id == "task-parent" assert child_session["parent_session_id"] == "session-root" def test_pinned_skill_is_injected_into_delegated_run(tmp_path: Path) -> None: _publish_skill( tmp_path, skill_name="review-check", body="# Review Check\n\nAlways mention the pinned review checklist.\n", ) loop = _loop(tmp_path) provider = RecordingProvider([_response("done")]) envelope = DelegationEnvelope( parent_task_id="task-parent", parent_session_id="session-root", parent_run_id="run-root", agent=AgentDescriptor(name="reviewer"), task="review the work", inherited_pinned_skills=["review-check"], node_id="review", ) result = asyncio.run(LocalAgentRunner(loop).run(envelope, provider_bundle=_bundle(provider))) loaded = loop.boot() events = loaded.session_manager.get_run_event_records(result.session_id, result.run_id) # type: ignore[union-attr,arg-type] skill_events = [event for event in events if event.event_type == "skill_activation_snapshotted"] assert "Always mention the pinned review checklist" in provider.calls[0][1]["content"] assert skill_events receipts = skill_events[0].event_payload["receipts"] assert receipts[0]["skill_name"] == "review-check" assert receipts[0]["activation_reason"] == "pinned_delegation" def test_ephemeral_pinned_skill_context_is_injected_into_delegated_run(tmp_path: Path) -> None: loop = _loop(tmp_path) provider = RecordingProvider([_response("done")]) envelope = DelegationEnvelope( parent_task_id="task-parent", parent_session_id="session-root", parent_run_id="run-root", agent=AgentDescriptor(name="api_review"), task="review the API", inherited_pinned_skill_contexts=[ SkillContext( name="draft:api-review", content="Always mention schema compatibility.", version="draft:draft-1", content_hash="hash", activation_reason="generated_missing_skill", ) ], node_id="api_review", ) result = asyncio.run(LocalAgentRunner(loop).run(envelope, provider_bundle=_bundle(provider))) loaded = loop.boot() events = loaded.session_manager.get_run_event_records(result.session_id, result.run_id) # type: ignore[union-attr,arg-type] skill_events = [event for event in events if event.event_type == "skill_activation_snapshotted"] assert "Always mention schema compatibility" in provider.calls[0][1]["content"] receipts = skill_events[0].event_payload["receipts"] assert receipts[0]["skill_name"] == "draft:api-review" assert receipts[0]["skill_version"] == "draft:draft-1" assert receipts[0]["activation_reason"] == "generated_missing_skill" def test_team_sequence_passes_prior_outputs(tmp_path: Path) -> None: loop = _loop(tmp_path) providers = { "first": RecordingProvider([_response("first output")]), "second": RecordingProvider([_response("second output")]), } graph = ExecutionGraph( strategy="sequence", nodes=[ ExecutionNode("first", "step one", AgentDescriptor(name="a")), ExecutionNode("second", "step two", AgentDescriptor(name="b")), ], ) result = asyncio.run( TeamService(loop).run_team( graph, parent_task_id=None, parent_session_id="session-root", parent_run_id="run-root", provider_bundle_factory=lambda node: _bundle(providers[node.node_id]), ) ) assert result.success is True assert result.summary == "first output\n\nsecond output" assert "Dependency first output:\nfirst output" in providers["second"].calls[0][0]["content"] def test_team_parallel_runs_all_nodes(tmp_path: Path) -> None: loop = _loop(tmp_path) providers = { "one": RecordingProvider([_response("one")]), "two": RecordingProvider([_response("two")]), "three": RecordingProvider([_response("three")]), } factory_calls: list[str] = [] graph = ExecutionGraph( strategy="parallel", nodes=[ ExecutionNode("one", "task one", AgentDescriptor(name="one")), ExecutionNode("two", "task two", AgentDescriptor(name="two")), ExecutionNode("three", "task three", AgentDescriptor(name="three")), ], ) result = asyncio.run( TeamService(loop).run_team( graph, parent_task_id=None, parent_session_id="session-root", parent_run_id="run-root", provider_bundle_factory=lambda node: (factory_calls.append(node.node_id) or _bundle(providers[node.node_id])), ) ) assert result.success is True assert sorted(factory_calls) == ["one", "three", "two"] assert result.run_ids and len(result.run_ids) == 3 assert [item.output_text for item in result.node_results] == ["one", "two", "three"] def test_parallel_node_factory_error_is_normalized_and_keeps_completed_runs(tmp_path: Path) -> None: loop = _loop(tmp_path) loaded = loop.boot() parent = loaded.task_service.create_task(session_id="session-root", description="parent task") # type: ignore[union-attr] providers = { "ok": RecordingProvider([_response("ok output")]), } graph = ExecutionGraph( strategy="parallel", nodes=[ ExecutionNode("ok", "task ok", AgentDescriptor(name="ok")), ExecutionNode("bad", "task bad", AgentDescriptor(name="bad")), ], ) def factory(node: ExecutionNode) -> ProviderBundle: if node.node_id == "bad": raise RuntimeError("factory failed") return _bundle(providers[node.node_id]) result = asyncio.run( TeamService(loop).run_team( graph, parent_task_id=parent.task_id, parent_session_id=parent.session_id, parent_run_id="run-root", provider_bundle_factory=factory, ) ) bad = [item for item in result.node_results if item.node_id == "bad"][0] task = loaded.task_service.get_task(parent.task_id) # type: ignore[union-attr] assert result.success is False assert bad.finish_reason == "error" assert bad.error == "factory failed" assert result.run_ids and len(result.run_ids) == 1 assert task is not None assert task.run_ids == result.run_ids assert "ok output" in result.summary assert "Failed nodes:\n- bad: factory failed" in result.summary def test_team_dag_blocks_dependents_after_failure(tmp_path: Path) -> None: loop = _loop(tmp_path) providers = { "prepare": RecordingProvider([_response("ok")]), "validate": RecordingProvider([_response("failed", finish_reason="error")]), } graph = ExecutionGraph( strategy="dag", nodes=[ ExecutionNode("prepare", "prepare", AgentDescriptor(name="prep")), ExecutionNode("validate", "validate", AgentDescriptor(name="validator"), depends_on=["prepare"]), ExecutionNode("publish", "publish", AgentDescriptor(name="publisher"), depends_on=["validate"]), ], ) result = asyncio.run( TeamService(loop).run_team( graph, parent_task_id=None, parent_session_id="session-root", parent_run_id="run-root", provider_bundle_factory=lambda node: _bundle(providers[node.node_id]), ) ) publish = [item for item in result.node_results if item.node_id == "publish"][0] assert result.success is False assert publish.finish_reason == "blocked" assert publish.run_id is None assert publish.error == "Blocked by failed dependency: validate" assert "failed" not in result.summary.split("Failed nodes:")[0] assert "- validate: failed" in result.summary assert "- publish: Blocked by failed dependency: validate" in result.summary def test_dag_node_factory_error_blocks_dependents(tmp_path: Path) -> None: loop = _loop(tmp_path) providers = { "prepare": RecordingProvider([_response("prepared")]), } graph = ExecutionGraph( strategy="dag", nodes=[ ExecutionNode("prepare", "prepare", AgentDescriptor(name="prep")), ExecutionNode("validate", "validate", AgentDescriptor(name="validator"), depends_on=["prepare"]), ExecutionNode("publish", "publish", AgentDescriptor(name="publisher"), depends_on=["validate"]), ], ) def factory(node: ExecutionNode) -> ProviderBundle: if node.node_id == "validate": raise RuntimeError("validator unavailable") return _bundle(providers[node.node_id]) result = asyncio.run( TeamService(loop).run_team( graph, parent_task_id=None, parent_session_id="session-root", parent_run_id="run-root", provider_bundle_factory=factory, ) ) validate = [item for item in result.node_results if item.node_id == "validate"][0] publish = [item for item in result.node_results if item.node_id == "publish"][0] assert result.success is False assert validate.finish_reason == "error" assert validate.error == "validator unavailable" assert publish.finish_reason == "blocked" assert publish.error == "Blocked by failed dependency: validate" def test_provider_bundle_with_node_model_override_is_normalized_by_team_service(tmp_path: Path) -> None: loop = _loop(tmp_path) provider = RecordingProvider([_response("unused")]) graph = ExecutionGraph( strategy="sequence", nodes=[ExecutionNode("specialist", "work", AgentDescriptor(name="specialist", model="special-model"))], ) result = asyncio.run( TeamService(loop).run_team( graph, parent_task_id=None, parent_session_id="session-root", provider_bundle=_bundle(provider), ) ) assert result.success is False assert result.node_results[0].finish_reason == "error" assert "provider_bundle cannot be combined" in (result.node_results[0].error or "") def test_team_summary_lists_only_failed_nodes_when_all_nodes_fail(tmp_path: Path) -> None: loop = _loop(tmp_path) graph = ExecutionGraph( strategy="parallel", nodes=[ ExecutionNode("one", "task one", AgentDescriptor(name="one")), ExecutionNode("two", "task two", AgentDescriptor(name="two")), ], ) def factory(node: ExecutionNode) -> ProviderBundle: raise RuntimeError(f"{node.node_id} down") result = asyncio.run( TeamService(loop).run_team( graph, parent_task_id=None, parent_session_id="session-root", provider_bundle_factory=factory, ) ) assert result.success is False assert result.summary == "Failed nodes:\n- one: one down\n- two: two down" def test_graph_structure_errors_still_raise(tmp_path: Path) -> None: loop = _loop(tmp_path) reserved = ExecutionGraph( strategy="moa", nodes=[ExecutionNode("node", "task", AgentDescriptor(name="node"))], ) unknown_dependency = ExecutionGraph( strategy="dag", nodes=[ExecutionNode("node", "task", AgentDescriptor(name="node"), depends_on=["missing"])], ) cyclic = ExecutionGraph( strategy="dag", nodes=[ ExecutionNode("a", "task a", AgentDescriptor(name="a"), depends_on=["b"]), ExecutionNode("b", "task b", AgentDescriptor(name="b"), depends_on=["a"]), ], ) with pytest.raises(NotImplementedError, match="reserved"): asyncio.run(TeamService(loop).run_team(reserved, parent_task_id=None, parent_session_id="session-root")) with pytest.raises(ValueError, match="unknown node"): asyncio.run(TeamService(loop).run_team(unknown_dependency, parent_task_id=None, parent_session_id="session-root")) with pytest.raises(ValueError, match="cyclic or unresolved dependencies"): asyncio.run(TeamService(loop).run_team(cyclic, parent_task_id=None, parent_session_id="session-root")) def test_team_run_does_not_create_independent_team_task(tmp_path: Path) -> None: loop = _loop(tmp_path) loaded = loop.boot() parent = loaded.task_service.create_task(session_id="session-root", description="parent task") # type: ignore[union-attr] provider = RecordingProvider([_response("child output")]) graph = ExecutionGraph( strategy="sequence", nodes=[ExecutionNode("child", "child task", AgentDescriptor(name="child"))], ) result = asyncio.run( TeamService(loop).run_team( graph, parent_task_id=parent.task_id, parent_session_id=parent.session_id, parent_run_id="run-root", provider_bundle=_bundle(provider), ) ) tasks = loaded.task_service.store.list_tasks() # type: ignore[union-attr] run_record = loaded.run_memory_store.list_runs()[-1] # type: ignore[union-attr] assert result.task_id == parent.task_id assert [task.task_id for task in tasks] == [parent.task_id] assert tasks[0].run_ids == result.run_ids assert run_record.task_id == parent.task_id def test_parallel_nodes_use_independent_memory_snapshots(tmp_path: Path) -> None: skill_assembler = BlockingSkillAssembler() memory_service = PerRunSnapshotMemoryService(tmp_path / "memory" / "curated") memory_service.initialize() loop = _loop_with_services(tmp_path, skill_assembler=skill_assembler, memory_service=memory_service) providers = { "first": RecordingProvider([_response("first")]), "second": RecordingProvider([_response("second")]), } graph = ExecutionGraph( strategy="parallel", nodes=[ ExecutionNode("first", "task first", AgentDescriptor(name="first")), ExecutionNode("second", "task second", AgentDescriptor(name="second")), ], ) async def run_team() -> None: task = asyncio.create_task( TeamService(loop).run_team( graph, parent_task_id=None, parent_session_id="session-root", provider_bundle_factory=lambda node: _bundle(providers[node.node_id]), ) ) await skill_assembler.first_started.wait() skill_assembler.release_first.set() await task asyncio.run(run_team()) first_system = providers["first"].calls[0][0]["content"] second_system = providers["second"].calls[0][0]["content"] assert "snapshot-1" in first_system assert "snapshot-2" in second_system assert "shared-snapshot" not in first_system assert "shared-snapshot" not in second_system def test_provider_bundle_with_node_model_override_is_rejected(tmp_path: Path) -> None: loop = _loop(tmp_path) provider = RecordingProvider([_response("unused")]) envelope = DelegationEnvelope( parent_task_id=None, parent_session_id="session-root", parent_run_id=None, agent=AgentDescriptor(name="specialist", model="special-model"), task="work", node_id="specialist", ) with pytest.raises(ValueError, match="provider_bundle cannot be combined"): asyncio.run(LocalAgentRunner(loop).run(envelope, provider_bundle=_bundle(provider))) def test_node_level_model_without_bundle_reaches_provider_resolution(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: captured: dict[str, str | None] = {} provider = RecordingProvider([_response("node model used")]) def fake_make_provider_bundle(**kwargs): captured["model"] = kwargs.get("model") captured["provider_name"] = kwargs.get("provider_name") return _bundle(provider) monkeypatch.setattr("beaver.engine.loop.make_provider_bundle", fake_make_provider_bundle) loop = _loop(tmp_path) envelope = DelegationEnvelope( parent_task_id=None, parent_session_id="session-root", parent_run_id=None, agent=AgentDescriptor(name="specialist", model="special-model", provider_name="custom"), task="work", node_id="specialist", ) result = asyncio.run(LocalAgentRunner(loop).run(envelope)) assert result.success is True assert captured == {"model": "special-model", "provider_name": "custom"} def test_unknown_parent_task_is_rejected_before_any_run(tmp_path: Path) -> None: loop = _loop(tmp_path) provider = RecordingProvider([_response("unused")]) graph = ExecutionGraph( strategy="sequence", nodes=[ExecutionNode("child", "child task", AgentDescriptor(name="child"))], ) with pytest.raises(ValueError, match="Unknown parent_task_id"): asyncio.run( TeamService(loop).run_team( graph, parent_task_id="missing-task", parent_session_id="session-root", provider_bundle=_bundle(provider), ) ) loaded = loop.boot() assert loaded.run_memory_store.list_runs() == [] # type: ignore[union-attr] def test_parent_task_session_mismatch_is_rejected(tmp_path: Path) -> None: loop = _loop(tmp_path) loaded = loop.boot() parent = loaded.task_service.create_task(session_id="session-root", description="parent task") # type: ignore[union-attr] provider = RecordingProvider([_response("unused")]) graph = ExecutionGraph( strategy="sequence", nodes=[ExecutionNode("child", "child task", AgentDescriptor(name="child"))], ) with pytest.raises(ValueError, match="belongs to session"): asyncio.run( TeamService(loop).run_team( graph, parent_task_id=parent.task_id, parent_session_id="other-session", provider_bundle=_bundle(provider), ) )