from __future__ import annotations import asyncio from pathlib import Path from types import SimpleNamespace from beaver.engine import AgentRunResult, EngineLoader from beaver.engine.context import SkillContext from beaver.engine.providers.base import LLMProvider, LLMResponse from beaver.engine.providers.factory import ProviderBundle from beaver.services.agent_service import AgentService from beaver.skills.assembler import SkillAssemblyResult from beaver.tasks import TaskExecutionPlan, TaskService class StubProvider(LLMProvider): def __init__(self, responses: list[LLMResponse]) -> None: super().__init__() self._responses = list(responses) self.seen_messages: list[list[dict]] = [] async def chat( self, messages: list[dict], tools: list[dict] | None = None, model: str | None = None, max_tokens: int = 4096, temperature: float = 0.7, ) -> LLMResponse: if not self._responses: raise AssertionError("No stubbed provider responses left") self.seen_messages.append(messages) return self._responses.pop(0) def get_default_model(self) -> str: return "stub-model" class StubTaskExecutionPlanner: async def plan(self, **kwargs) -> TaskExecutionPlan: return TaskExecutionPlan.single("test-single") class RecordingTaskExecutionPlanner: def __init__(self) -> None: self.calls: list[dict] = [] async def plan(self, **kwargs) -> TaskExecutionPlan: self.calls.append(dict(kwargs)) return TaskExecutionPlan.single("test-single") class RecordingSkillAssembler: def __init__(self, skills: list[SkillContext]) -> None: self.skills = list(skills) self.calls: list[dict] = [] async def assemble(self, **kwargs) -> SkillAssemblyResult: self.calls.append(dict(kwargs)) return SkillAssemblyResult(activated_skills=list(self.skills)) class RecordingTaskAttemptOrchestrator: def __init__(self) -> None: self.calls: list[dict] = [] async def run(self, **kwargs) -> AgentRunResult: self.calls.append(dict(kwargs)) task = kwargs["task"] task.task_id = "task-from-orchestrator" return AgentRunResult( session_id=kwargs["kwargs"]["session_id"], run_id="run-from-orchestrator", output_text="orchestrated", finish_reason="stop", tool_iterations=0, task_id=task.task_id, task_status=task.status, ) class FakeLearningCandidate: def to_dict(self) -> dict: return {"candidate_id": "candidate-1", "kind": "new_skill", "status": "open"} def _route_response(action: str = "new_task", short_title: str = "Test task") -> LLMResponse: return LLMResponse( content=f'{{"action":"{action}","reason":"test route","short_title":"{short_title}"}}', finish_reason="stop", provider_name="stub", model="stub-model", ) def _bundle(*responses: str, route_action: str = "new_task") -> ProviderBundle: return ProviderBundle( main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"), main_provider=StubProvider( [ LLMResponse( content=response, finish_reason="stop", provider_name="stub", model="stub-model", ) for response in responses ] ), auxiliary_runtime=SimpleNamespace(model="stub-model", provider_name="stub"), auxiliary_provider=StubProvider([_route_response(route_action)]), ) def test_task_run_records_evidence_and_waits_for_acceptance(tmp_path: Path) -> None: service = AgentService( loader=EngineLoader( workspace=tmp_path, task_execution_planner=StubTaskExecutionPlanner(), ) ) result = asyncio.run( service.process_direct( "draft release notes", session_id="web:test", provider_bundle=_bundle("Done"), ) ) task_service = service.create_loop().boot().task_service assert task_service is not None task = task_service.get_task(result.task_id or "") assert task is not None assert task.status == "awaiting_acceptance" assert task.validation_result is None assert result.validation_result is None event_types = [event.event_type for event in task_service.list_events(task.task_id)] assert "evidence_recorded" in event_types assert "validated" not in event_types def test_agent_service_records_router_latency(tmp_path: Path) -> None: service = AgentService( loader=EngineLoader( workspace=tmp_path, task_execution_planner=StubTaskExecutionPlanner(), ) ) result = asyncio.run( service.process_direct( "draft release notes", session_id="web:latency", provider_bundle=_bundle("Done"), ) ) latency = result.usage["latency_ms"] assert latency["router_ms"] > 0 def test_task_mode_preselects_skills_for_planner_and_reuses_them_in_main_run(tmp_path: Path) -> None: skill = SkillContext( name="docker-debug", content="Use docker logs before editing config.", version="v1", content_hash="hash-v1", activation_reason="llm_selected", tool_hints=["terminal"], ) skill_assembler = RecordingSkillAssembler([skill]) planner = RecordingTaskExecutionPlanner() service = AgentService( loader=EngineLoader( workspace=tmp_path, skill_assembler=skill_assembler, task_execution_planner=planner, ) ) result = asyncio.run( service.process_direct( "debug this workflow", session_id="web:skill-aware-task", provider_bundle=_bundle("Done"), ) ) assert result.task_id assert len(skill_assembler.calls) == 1 assert planner.calls assert planner.calls[0]["skill_summaries"] == ["docker-debug: Use docker logs before editing config."] assert planner.calls[0]["tool_hints"] == ["terminal"] task_service = service.create_loop().boot().task_service assert task_service is not None task = task_service.get_task(result.task_id) assert task is not None assert task.skill_names == ["docker-debug"] def test_task_mode_delegates_attempt_execution_to_orchestrator(tmp_path: Path) -> None: orchestrator = RecordingTaskAttemptOrchestrator() service = AgentService( loader=EngineLoader( workspace=tmp_path, task_execution_planner=StubTaskExecutionPlanner(), ) ) service._build_task_attempt_orchestrator = lambda loaded: orchestrator # type: ignore[attr-defined] result = asyncio.run( service.process_direct( "draft release notes", session_id="web:orchestrator", provider_bundle=_bundle("main runner should not be used"), ) ) assert result.output_text == "orchestrated" assert result.run_id == "run-from-orchestrator" assert len(orchestrator.calls) == 1 assert orchestrator.calls[0]["message"] == "draft release notes" assert orchestrator.calls[0]["task"].description == "draft release notes" def test_task_mode_injects_prompt_locale_output_language(tmp_path: Path) -> None: service = AgentService( loader=EngineLoader( workspace=tmp_path, task_execution_planner=StubTaskExecutionPlanner(), ) ) main_provider = StubProvider( [ LLMResponse( content="Done", finish_reason="stop", provider_name="stub", model="stub-model", ) ] ) bundle = ProviderBundle( main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"), main_provider=main_provider, auxiliary_runtime=SimpleNamespace(model="stub-model", provider_name="stub"), auxiliary_provider=StubProvider([_route_response("new_task", "Product summary")]), ) result = asyncio.run( service.process_direct( "Summarize the uploaded report in English", session_id="web:locale-task", prompt_locale="en", provider_bundle=bundle, ) ) assert result.task_id assert main_provider.seen_messages system_prompt = main_provider.seen_messages[-1][0]["content"] assert "Use English for user-facing replies" in system_prompt assert "Output language: English." in system_prompt task_service = service.create_loop().boot().task_service assert task_service is not None task = task_service.get_task(result.task_id) assert task is not None assert task.metadata["prompt_locale"] == "en" def test_unrelated_simple_chat_auto_accepts_active_task(tmp_path: Path) -> None: service = AgentService( loader=EngineLoader( workspace=tmp_path, task_execution_planner=StubTaskExecutionPlanner(), ) ) first = asyncio.run( service.process_direct( "recommend food in Hengqin", session_id="web:new-topic-chat", provider_bundle=_bundle("Food recommendations"), ) ) second = asyncio.run( service.process_direct( "have you eaten?", session_id="web:new-topic-chat", provider_bundle=_bundle("I do not eat.", route_action="simple_chat"), ) ) task_service = service.create_loop().boot().task_service assert task_service is not None previous = task_service.get_task(first.task_id or "") assert previous is not None assert previous.status == "closed" assert previous.run_ids == [first.run_id] assert previous.feedback[-1]["acceptance_type"] == "accept" assert previous.metadata["final_accepted_run_id"] == first.run_id assert second.task_id is None def test_unrelated_new_task_auto_accepts_previous_task(tmp_path: Path) -> None: service = AgentService( loader=EngineLoader( workspace=tmp_path, task_execution_planner=StubTaskExecutionPlanner(), ) ) first = asyncio.run( service.process_direct( "recommend food in Hengqin", session_id="web:new-topic-task", provider_bundle=_bundle("Food recommendations"), ) ) second = asyncio.run( service.process_direct( "check today's weather in Iceland", session_id="web:new-topic-task", provider_bundle=_bundle("Weather result", route_action="new_task"), ) ) task_service = service.create_loop().boot().task_service assert task_service is not None previous = task_service.get_task(first.task_id or "") current = task_service.get_task(second.task_id or "") assert previous is not None assert current is not None assert previous.status == "closed" assert previous.run_ids == [first.run_id] assert previous.feedback[-1]["acceptance_type"] == "accept" assert current.task_id != previous.task_id assert current.status == "awaiting_acceptance" assert current.run_ids == [second.run_id] def test_standalone_realtime_repeat_creates_new_task_in_same_session(tmp_path: Path) -> None: service = AgentService( loader=EngineLoader( workspace=tmp_path, task_execution_planner=StubTaskExecutionPlanner(), ) ) session_id = "feishu:group-weather" first = asyncio.run( service.process_direct( "珠海天气怎样", session_id=session_id, provider_bundle=_bundle("Weather result"), ) ) second = asyncio.run( service.process_direct( "珠海天气怎么样", session_id=session_id, provider_bundle=_bundle("Fresh weather result", route_action="continue_task"), ) ) task_service = service.create_loop().boot().task_service assert task_service is not None previous = task_service.get_task(first.task_id or "") current = task_service.get_task(second.task_id or "") assert previous is not None assert current is not None assert previous.session_id == session_id assert current.session_id == session_id assert current.task_id != previous.task_id assert previous.status == "closed" assert previous.run_ids == [first.run_id] assert current.status == "awaiting_acceptance" assert current.run_ids == [second.run_id] def test_related_follow_up_continues_active_task_without_accepting_it(tmp_path: Path) -> None: service = AgentService( loader=EngineLoader( workspace=tmp_path, task_execution_planner=StubTaskExecutionPlanner(), ) ) first = asyncio.run( service.process_direct( "recommend food in Hengqin", session_id="web:continue-topic", provider_bundle=_bundle("Food recommendations"), ) ) second = asyncio.run( service.process_direct( "include restaurants near the port", session_id="web:continue-topic", provider_bundle=_bundle("More recommendations", route_action="continue_task"), ) ) task_service = service.create_loop().boot().task_service assert task_service is not None task = task_service.get_task(first.task_id or "") assert task is not None assert second.task_id == first.task_id assert task.status == "awaiting_acceptance" assert task.run_ids == [first.run_id, second.run_id] assert task.feedback == [] def test_requested_revision_keeps_active_task_without_accepting_it(tmp_path: Path) -> None: service = AgentService( loader=EngineLoader( workspace=tmp_path, task_execution_planner=StubTaskExecutionPlanner(), ) ) first = asyncio.run( service.process_direct( "recommend food in Hengqin", session_id="web:revise-topic", provider_bundle=_bundle("Food recommendations"), ) ) second = asyncio.run( service.process_direct( "remove expensive restaurants", session_id="web:revise-topic", provider_bundle=_bundle("Revised recommendations", route_action="revise_task"), ) ) task_service = service.create_loop().boot().task_service assert task_service is not None task = task_service.get_task(first.task_id or "") assert task is not None assert second.task_id == first.task_id assert task.status == "awaiting_acceptance" assert task.run_ids == [first.run_id, second.run_id] assert [item["acceptance_type"] for item in task.feedback] == ["revise"] def test_router_failure_fallback_does_not_auto_accept_active_task(tmp_path: Path) -> None: service = AgentService( loader=EngineLoader( workspace=tmp_path, task_execution_planner=StubTaskExecutionPlanner(), ) ) first = asyncio.run( service.process_direct( "recommend food in Hengqin", session_id="web:router-fallback", provider_bundle=_bundle("Food recommendations"), ) ) fallback_bundle = ProviderBundle( main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"), main_provider=StubProvider( [ LLMResponse( content="Continued response", finish_reason="stop", provider_name="stub", model="stub-model", ) ] ), auxiliary_runtime=SimpleNamespace(model="stub-model", provider_name="stub"), auxiliary_provider=StubProvider([]), ) second = asyncio.run( service.process_direct( "continue after router failure", session_id="web:router-fallback", provider_bundle=fallback_bundle, ) ) task_service = service.create_loop().boot().task_service assert task_service is not None task = task_service.get_task(first.task_id or "") assert task is not None assert second.task_id == first.task_id assert task.status == "awaiting_acceptance" assert task.run_ids == [first.run_id, second.run_id] assert task.feedback == [] def test_acceptance_closes_task_and_triggers_learning(tmp_path: Path) -> None: service = AgentService( loader=EngineLoader( workspace=tmp_path, task_execution_planner=StubTaskExecutionPlanner(), ) ) result = asyncio.run( service.process_direct( "write implementation plan", session_id="web:acceptance", provider_bundle=_bundle("Plan"), ) ) loaded = service.create_loop().boot() generated: list[tuple[str, str]] = [] def build_learning_candidates_for_task( task_id: str, *, final_accepted_run_id: str | None = None, trigger_run_id: str | None = None, ) -> list[FakeLearningCandidate]: generated.append((task_id, final_accepted_run_id or trigger_run_id or "")) return [FakeLearningCandidate()] loaded.skill_learning_service.build_learning_candidates_for_task = build_learning_candidates_for_task response = asyncio.run( service.submit_acceptance( session_id="web:acceptance", run_id=result.run_id, acceptance_type="accept", ) ) assert response["task_status"] == "closed" assert response["acceptance_type"] == "accept" assert response["learning_candidates"] == [ {"candidate_id": "candidate-1", "kind": "new_skill", "status": "open"} ] assert generated == [(result.task_id, result.run_id)] task_service = loaded.task_service assert task_service is not None task = task_service.get_task(result.task_id or "") assert task is not None assert task.metadata["final_accepted_run_id"] == result.run_id def test_revise_and_abandon_do_not_trigger_learning(tmp_path: Path) -> None: service = AgentService( loader=EngineLoader( workspace=tmp_path, task_execution_planner=StubTaskExecutionPlanner(), ) ) result = asyncio.run( service.process_direct( "summarize notes", session_id="web:revise", provider_bundle=_bundle("Summary"), ) ) response = asyncio.run( service.submit_acceptance( session_id="web:revise", run_id=result.run_id, acceptance_type="revise", comment="Add decisions", ) ) assert response["task_status"] == "needs_revision" assert response["learning_candidates"] == [] task_service = service.create_loop().boot().task_service assert task_service is not None task = task_service.get_task(result.task_id or "") assert task is not None assert task.feedback[0]["acceptance_type"] == "revise" def test_legacy_feedback_endpoint_maps_satisfied_to_accept(tmp_path: Path) -> None: service = AgentService( loader=EngineLoader( workspace=tmp_path, task_execution_planner=StubTaskExecutionPlanner(), ) ) result = asyncio.run( service.process_direct( "prepare checklist", session_id="web:legacy", provider_bundle=_bundle("Checklist"), ) ) response = asyncio.run( service.submit_feedback( session_id="web:legacy", run_id=result.run_id, feedback_type="satisfied", ) ) assert response["acceptance_type"] == "accept" assert response["feedback_type"] == "satisfied" assert response["task_status"] == "closed" def test_task_service_maps_legacy_status_and_feedback(tmp_path: Path) -> None: service = TaskService(tmp_path) task = service.create_task(session_id="s", description="legacy") task.status = "awaiting_feedback" task.feedback.append({"feedback_type": "satisfied", "run_id": "run-1"}) service.store.upsert_task(task) loaded = service.get_task(task.task_id) assert loaded is not None assert loaded.status == "awaiting_acceptance" assert loaded.feedback[0]["acceptance_type"] == "accept"