Files
beaver_project/app-instance/backend/tests/unit/test_task_mode_feedback.py

624 lines
20 KiB
Python

from __future__ import annotations
import asyncio
from pathlib import Path
from types import SimpleNamespace
from beaver.engine import AgentRunResult, EngineLoader
from beaver.engine.context import SkillContext
from beaver.engine.providers.base import LLMProvider, LLMResponse
from beaver.engine.providers.factory import ProviderBundle
from beaver.services.agent_service import AgentService
from beaver.skills.assembler import SkillAssemblyResult
from beaver.tasks import TaskExecutionPlan, TaskService
class StubProvider(LLMProvider):
def __init__(self, responses: list[LLMResponse]) -> None:
super().__init__()
self._responses = list(responses)
self.seen_messages: list[list[dict]] = []
async def chat(
self,
messages: list[dict],
tools: list[dict] | None = None,
model: str | None = None,
max_tokens: int = 4096,
temperature: float = 0.7,
) -> LLMResponse:
if not self._responses:
raise AssertionError("No stubbed provider responses left")
self.seen_messages.append(messages)
return self._responses.pop(0)
def get_default_model(self) -> str:
return "stub-model"
class StubTaskExecutionPlanner:
async def plan(self, **kwargs) -> TaskExecutionPlan:
return TaskExecutionPlan.single("test-single")
class RecordingTaskExecutionPlanner:
def __init__(self) -> None:
self.calls: list[dict] = []
async def plan(self, **kwargs) -> TaskExecutionPlan:
self.calls.append(dict(kwargs))
return TaskExecutionPlan.single("test-single")
class RecordingSkillAssembler:
def __init__(self, skills: list[SkillContext]) -> None:
self.skills = list(skills)
self.calls: list[dict] = []
async def assemble(self, **kwargs) -> SkillAssemblyResult:
self.calls.append(dict(kwargs))
return SkillAssemblyResult(activated_skills=list(self.skills))
class RecordingTaskAttemptOrchestrator:
def __init__(self) -> None:
self.calls: list[dict] = []
async def run(self, **kwargs) -> AgentRunResult:
self.calls.append(dict(kwargs))
task = kwargs["task"]
task.task_id = "task-from-orchestrator"
return AgentRunResult(
session_id=kwargs["kwargs"]["session_id"],
run_id="run-from-orchestrator",
output_text="orchestrated",
finish_reason="stop",
tool_iterations=0,
task_id=task.task_id,
task_status=task.status,
)
class FakeLearningCandidate:
def to_dict(self) -> dict:
return {"candidate_id": "candidate-1", "kind": "new_skill", "status": "open"}
def _route_response(action: str = "new_task", short_title: str = "Test task") -> LLMResponse:
return LLMResponse(
content=f'{{"action":"{action}","reason":"test route","short_title":"{short_title}"}}',
finish_reason="stop",
provider_name="stub",
model="stub-model",
)
def _bundle(*responses: str, route_action: str = "new_task") -> ProviderBundle:
return ProviderBundle(
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
main_provider=StubProvider(
[
LLMResponse(
content=response,
finish_reason="stop",
provider_name="stub",
model="stub-model",
)
for response in responses
]
),
auxiliary_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
auxiliary_provider=StubProvider([_route_response(route_action)]),
)
def test_task_run_records_evidence_and_waits_for_acceptance(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=StubTaskExecutionPlanner(),
)
)
result = asyncio.run(
service.process_direct(
"draft release notes",
session_id="web:test",
provider_bundle=_bundle("Done"),
)
)
task_service = service.create_loop().boot().task_service
assert task_service is not None
task = task_service.get_task(result.task_id or "")
assert task is not None
assert task.status == "awaiting_acceptance"
assert task.validation_result is None
assert result.validation_result is None
event_types = [event.event_type for event in task_service.list_events(task.task_id)]
assert "evidence_recorded" in event_types
assert "validated" not in event_types
def test_agent_service_records_router_latency(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=StubTaskExecutionPlanner(),
)
)
result = asyncio.run(
service.process_direct(
"draft release notes",
session_id="web:latency",
provider_bundle=_bundle("Done"),
)
)
latency = result.usage["latency_ms"]
assert latency["router_ms"] > 0
def test_task_mode_preselects_skills_for_planner_and_reuses_them_in_main_run(tmp_path: Path) -> None:
skill = SkillContext(
name="docker-debug",
content="Use docker logs before editing config.",
version="v1",
content_hash="hash-v1",
activation_reason="llm_selected",
tool_hints=["terminal"],
)
skill_assembler = RecordingSkillAssembler([skill])
planner = RecordingTaskExecutionPlanner()
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
skill_assembler=skill_assembler,
task_execution_planner=planner,
)
)
result = asyncio.run(
service.process_direct(
"debug this workflow",
session_id="web:skill-aware-task",
provider_bundle=_bundle("Done"),
)
)
assert result.task_id
assert len(skill_assembler.calls) == 1
assert planner.calls
assert planner.calls[0]["skill_summaries"] == ["docker-debug: Use docker logs before editing config."]
assert planner.calls[0]["tool_hints"] == ["terminal"]
task_service = service.create_loop().boot().task_service
assert task_service is not None
task = task_service.get_task(result.task_id)
assert task is not None
assert task.skill_names == ["docker-debug"]
def test_task_mode_delegates_attempt_execution_to_orchestrator(tmp_path: Path) -> None:
orchestrator = RecordingTaskAttemptOrchestrator()
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=StubTaskExecutionPlanner(),
)
)
service._build_task_attempt_orchestrator = lambda loaded: orchestrator # type: ignore[attr-defined]
result = asyncio.run(
service.process_direct(
"draft release notes",
session_id="web:orchestrator",
provider_bundle=_bundle("main runner should not be used"),
)
)
assert result.output_text == "orchestrated"
assert result.run_id == "run-from-orchestrator"
assert len(orchestrator.calls) == 1
assert orchestrator.calls[0]["message"] == "draft release notes"
assert orchestrator.calls[0]["task"].description == "draft release notes"
def test_task_mode_injects_prompt_locale_output_language(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=StubTaskExecutionPlanner(),
)
)
main_provider = StubProvider(
[
LLMResponse(
content="Done",
finish_reason="stop",
provider_name="stub",
model="stub-model",
)
]
)
bundle = ProviderBundle(
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
main_provider=main_provider,
auxiliary_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
auxiliary_provider=StubProvider([_route_response("new_task", "Product summary")]),
)
result = asyncio.run(
service.process_direct(
"Summarize the uploaded report in English",
session_id="web:locale-task",
prompt_locale="en",
provider_bundle=bundle,
)
)
assert result.task_id
assert main_provider.seen_messages
system_prompt = main_provider.seen_messages[-1][0]["content"]
assert "Use English for user-facing replies" in system_prompt
assert "Output language: English." in system_prompt
task_service = service.create_loop().boot().task_service
assert task_service is not None
task = task_service.get_task(result.task_id)
assert task is not None
assert task.metadata["prompt_locale"] == "en"
def test_unrelated_simple_chat_auto_accepts_active_task(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=StubTaskExecutionPlanner(),
)
)
first = asyncio.run(
service.process_direct(
"recommend food in Hengqin",
session_id="web:new-topic-chat",
provider_bundle=_bundle("Food recommendations"),
)
)
second = asyncio.run(
service.process_direct(
"have you eaten?",
session_id="web:new-topic-chat",
provider_bundle=_bundle("I do not eat.", route_action="simple_chat"),
)
)
task_service = service.create_loop().boot().task_service
assert task_service is not None
previous = task_service.get_task(first.task_id or "")
assert previous is not None
assert previous.status == "closed"
assert previous.run_ids == [first.run_id]
assert previous.feedback[-1]["acceptance_type"] == "accept"
assert previous.metadata["final_accepted_run_id"] == first.run_id
assert second.task_id is None
def test_unrelated_new_task_auto_accepts_previous_task(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=StubTaskExecutionPlanner(),
)
)
first = asyncio.run(
service.process_direct(
"recommend food in Hengqin",
session_id="web:new-topic-task",
provider_bundle=_bundle("Food recommendations"),
)
)
second = asyncio.run(
service.process_direct(
"check today's weather in Iceland",
session_id="web:new-topic-task",
provider_bundle=_bundle("Weather result", route_action="new_task"),
)
)
task_service = service.create_loop().boot().task_service
assert task_service is not None
previous = task_service.get_task(first.task_id or "")
current = task_service.get_task(second.task_id or "")
assert previous is not None
assert current is not None
assert previous.status == "closed"
assert previous.run_ids == [first.run_id]
assert previous.feedback[-1]["acceptance_type"] == "accept"
assert current.task_id != previous.task_id
assert current.status == "awaiting_acceptance"
assert current.run_ids == [second.run_id]
def test_standalone_realtime_repeat_creates_new_task_in_same_session(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=StubTaskExecutionPlanner(),
)
)
session_id = "feishu:group-weather"
first = asyncio.run(
service.process_direct(
"珠海天气怎样",
session_id=session_id,
provider_bundle=_bundle("Weather result"),
)
)
second = asyncio.run(
service.process_direct(
"珠海天气怎么样",
session_id=session_id,
provider_bundle=_bundle("Fresh weather result", route_action="continue_task"),
)
)
task_service = service.create_loop().boot().task_service
assert task_service is not None
previous = task_service.get_task(first.task_id or "")
current = task_service.get_task(second.task_id or "")
assert previous is not None
assert current is not None
assert previous.session_id == session_id
assert current.session_id == session_id
assert current.task_id != previous.task_id
assert previous.status == "closed"
assert previous.run_ids == [first.run_id]
assert current.status == "awaiting_acceptance"
assert current.run_ids == [second.run_id]
def test_related_follow_up_continues_active_task_without_accepting_it(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=StubTaskExecutionPlanner(),
)
)
first = asyncio.run(
service.process_direct(
"recommend food in Hengqin",
session_id="web:continue-topic",
provider_bundle=_bundle("Food recommendations"),
)
)
second = asyncio.run(
service.process_direct(
"include restaurants near the port",
session_id="web:continue-topic",
provider_bundle=_bundle("More recommendations", route_action="continue_task"),
)
)
task_service = service.create_loop().boot().task_service
assert task_service is not None
task = task_service.get_task(first.task_id or "")
assert task is not None
assert second.task_id == first.task_id
assert task.status == "awaiting_acceptance"
assert task.run_ids == [first.run_id, second.run_id]
assert task.feedback == []
def test_requested_revision_keeps_active_task_without_accepting_it(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=StubTaskExecutionPlanner(),
)
)
first = asyncio.run(
service.process_direct(
"recommend food in Hengqin",
session_id="web:revise-topic",
provider_bundle=_bundle("Food recommendations"),
)
)
second = asyncio.run(
service.process_direct(
"remove expensive restaurants",
session_id="web:revise-topic",
provider_bundle=_bundle("Revised recommendations", route_action="revise_task"),
)
)
task_service = service.create_loop().boot().task_service
assert task_service is not None
task = task_service.get_task(first.task_id or "")
assert task is not None
assert second.task_id == first.task_id
assert task.status == "awaiting_acceptance"
assert task.run_ids == [first.run_id, second.run_id]
assert [item["acceptance_type"] for item in task.feedback] == ["revise"]
def test_router_failure_fallback_does_not_auto_accept_active_task(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=StubTaskExecutionPlanner(),
)
)
first = asyncio.run(
service.process_direct(
"recommend food in Hengqin",
session_id="web:router-fallback",
provider_bundle=_bundle("Food recommendations"),
)
)
fallback_bundle = ProviderBundle(
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
main_provider=StubProvider(
[
LLMResponse(
content="Continued response",
finish_reason="stop",
provider_name="stub",
model="stub-model",
)
]
),
auxiliary_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
auxiliary_provider=StubProvider([]),
)
second = asyncio.run(
service.process_direct(
"continue after router failure",
session_id="web:router-fallback",
provider_bundle=fallback_bundle,
)
)
task_service = service.create_loop().boot().task_service
assert task_service is not None
task = task_service.get_task(first.task_id or "")
assert task is not None
assert second.task_id == first.task_id
assert task.status == "awaiting_acceptance"
assert task.run_ids == [first.run_id, second.run_id]
assert task.feedback == []
def test_acceptance_closes_task_and_triggers_learning(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=StubTaskExecutionPlanner(),
)
)
result = asyncio.run(
service.process_direct(
"write implementation plan",
session_id="web:acceptance",
provider_bundle=_bundle("Plan"),
)
)
loaded = service.create_loop().boot()
generated: list[tuple[str, str]] = []
def build_learning_candidates_for_task(
task_id: str,
*,
final_accepted_run_id: str | None = None,
trigger_run_id: str | None = None,
) -> list[FakeLearningCandidate]:
generated.append((task_id, final_accepted_run_id or trigger_run_id or ""))
return [FakeLearningCandidate()]
loaded.skill_learning_service.build_learning_candidates_for_task = build_learning_candidates_for_task
response = asyncio.run(
service.submit_acceptance(
session_id="web:acceptance",
run_id=result.run_id,
acceptance_type="accept",
)
)
assert response["task_status"] == "closed"
assert response["acceptance_type"] == "accept"
assert response["learning_candidates"] == [
{"candidate_id": "candidate-1", "kind": "new_skill", "status": "open"}
]
assert generated == [(result.task_id, result.run_id)]
task_service = loaded.task_service
assert task_service is not None
task = task_service.get_task(result.task_id or "")
assert task is not None
assert task.metadata["final_accepted_run_id"] == result.run_id
def test_revise_and_abandon_do_not_trigger_learning(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=StubTaskExecutionPlanner(),
)
)
result = asyncio.run(
service.process_direct(
"summarize notes",
session_id="web:revise",
provider_bundle=_bundle("Summary"),
)
)
response = asyncio.run(
service.submit_acceptance(
session_id="web:revise",
run_id=result.run_id,
acceptance_type="revise",
comment="Add decisions",
)
)
assert response["task_status"] == "needs_revision"
assert response["learning_candidates"] == []
task_service = service.create_loop().boot().task_service
assert task_service is not None
task = task_service.get_task(result.task_id or "")
assert task is not None
assert task.feedback[0]["acceptance_type"] == "revise"
def test_legacy_feedback_endpoint_maps_satisfied_to_accept(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=StubTaskExecutionPlanner(),
)
)
result = asyncio.run(
service.process_direct(
"prepare checklist",
session_id="web:legacy",
provider_bundle=_bundle("Checklist"),
)
)
response = asyncio.run(
service.submit_feedback(
session_id="web:legacy",
run_id=result.run_id,
feedback_type="satisfied",
)
)
assert response["acceptance_type"] == "accept"
assert response["feedback_type"] == "satisfied"
assert response["task_status"] == "closed"
def test_task_service_maps_legacy_status_and_feedback(tmp_path: Path) -> None:
service = TaskService(tmp_path)
task = service.create_task(session_id="s", description="legacy")
task.status = "awaiting_feedback"
task.feedback.append({"feedback_type": "satisfied", "run_id": "run-1"})
service.store.upsert_task(task)
loaded = service.get_task(task.task_id)
assert loaded is not None
assert loaded.status == "awaiting_acceptance"
assert loaded.feedback[0]["acceptance_type"] == "accept"