Files
beaver_project/app-instance/backend/tests/unit/test_task_mode_feedback.py
steven_li 8aeb97a5fc feat(app): 移除内置agents并添加CORS支持和技能上传优化
移除了agents/registry.json中的所有内置agents配置,将agents数组清空。
为web应用添加了CORS中间件支持,允许指定的前端地址跨域访问。
重构了技能上传功能,增加了LLM重写机制,自动规范化上传的技能格式。
新增了工具名称提取逻辑,从技能正文中自动识别Required Tools段落。
更新了技能学习候选者和草稿的载荷结构,添加评估报告统计信息。
修改了意图路由技能的说明,改进任务状态管理逻辑。
2026-06-12 13:25:20 +08:00

499 lines
16 KiB
Python

from __future__ import annotations
import asyncio
from pathlib import Path
from types import SimpleNamespace
from beaver.engine import EngineLoader
from beaver.engine.providers.base import LLMProvider, LLMResponse
from beaver.engine.providers.factory import ProviderBundle
from beaver.services.agent_service import AgentService
from beaver.tasks import TaskExecutionPlan, TaskService
class StubProvider(LLMProvider):
def __init__(self, responses: list[LLMResponse]) -> None:
super().__init__()
self._responses = list(responses)
self.seen_messages: list[list[dict]] = []
async def chat(
self,
messages: list[dict],
tools: list[dict] | None = None,
model: str | None = None,
max_tokens: int = 4096,
temperature: float = 0.7,
) -> LLMResponse:
if not self._responses:
raise AssertionError("No stubbed provider responses left")
self.seen_messages.append(messages)
return self._responses.pop(0)
def get_default_model(self) -> str:
return "stub-model"
class StubTaskExecutionPlanner:
async def plan(self, **kwargs) -> TaskExecutionPlan:
return TaskExecutionPlan.single("test-single")
class FakeLearningCandidate:
def to_dict(self) -> dict:
return {"candidate_id": "candidate-1", "kind": "new_skill", "status": "open"}
def _route_response(action: str = "new_task", short_title: str = "Test task") -> LLMResponse:
return LLMResponse(
content=f'{{"action":"{action}","reason":"test route","short_title":"{short_title}"}}',
finish_reason="stop",
provider_name="stub",
model="stub-model",
)
def _bundle(*responses: str, route_action: str = "new_task") -> ProviderBundle:
return ProviderBundle(
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
main_provider=StubProvider(
[
LLMResponse(
content=response,
finish_reason="stop",
provider_name="stub",
model="stub-model",
)
for response in responses
]
),
auxiliary_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
auxiliary_provider=StubProvider([_route_response(route_action)]),
)
def test_task_run_records_evidence_and_waits_for_acceptance(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=StubTaskExecutionPlanner(),
)
)
result = asyncio.run(
service.process_direct(
"draft release notes",
session_id="web:test",
provider_bundle=_bundle("Done"),
)
)
task_service = service.create_loop().boot().task_service
assert task_service is not None
task = task_service.get_task(result.task_id or "")
assert task is not None
assert task.status == "awaiting_acceptance"
assert task.validation_result is None
assert result.validation_result is None
event_types = [event.event_type for event in task_service.list_events(task.task_id)]
assert "evidence_recorded" in event_types
assert "validated" not in event_types
def test_task_mode_injects_prompt_locale_output_language(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=StubTaskExecutionPlanner(),
)
)
main_provider = StubProvider(
[
LLMResponse(
content="Done",
finish_reason="stop",
provider_name="stub",
model="stub-model",
)
]
)
bundle = ProviderBundle(
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
main_provider=main_provider,
auxiliary_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
auxiliary_provider=StubProvider([_route_response("new_task", "Product summary")]),
)
result = asyncio.run(
service.process_direct(
"Summarize the uploaded report in English",
session_id="web:locale-task",
prompt_locale="en",
provider_bundle=bundle,
)
)
assert result.task_id
assert main_provider.seen_messages
system_prompt = main_provider.seen_messages[-1][0]["content"]
assert "Use English for user-facing replies" in system_prompt
assert "Output language: English." in system_prompt
task_service = service.create_loop().boot().task_service
assert task_service is not None
task = task_service.get_task(result.task_id)
assert task is not None
assert task.metadata["prompt_locale"] == "en"
def test_unrelated_simple_chat_auto_accepts_active_task(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=StubTaskExecutionPlanner(),
)
)
first = asyncio.run(
service.process_direct(
"recommend food in Hengqin",
session_id="web:new-topic-chat",
provider_bundle=_bundle("Food recommendations"),
)
)
second = asyncio.run(
service.process_direct(
"have you eaten?",
session_id="web:new-topic-chat",
provider_bundle=_bundle("I do not eat.", route_action="simple_chat"),
)
)
task_service = service.create_loop().boot().task_service
assert task_service is not None
previous = task_service.get_task(first.task_id or "")
assert previous is not None
assert previous.status == "closed"
assert previous.run_ids == [first.run_id]
assert previous.feedback[-1]["acceptance_type"] == "accept"
assert previous.metadata["final_accepted_run_id"] == first.run_id
assert second.task_id is None
def test_unrelated_new_task_auto_accepts_previous_task(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=StubTaskExecutionPlanner(),
)
)
first = asyncio.run(
service.process_direct(
"recommend food in Hengqin",
session_id="web:new-topic-task",
provider_bundle=_bundle("Food recommendations"),
)
)
second = asyncio.run(
service.process_direct(
"check today's weather in Iceland",
session_id="web:new-topic-task",
provider_bundle=_bundle("Weather result", route_action="new_task"),
)
)
task_service = service.create_loop().boot().task_service
assert task_service is not None
previous = task_service.get_task(first.task_id or "")
current = task_service.get_task(second.task_id or "")
assert previous is not None
assert current is not None
assert previous.status == "closed"
assert previous.run_ids == [first.run_id]
assert previous.feedback[-1]["acceptance_type"] == "accept"
assert current.task_id != previous.task_id
assert current.status == "awaiting_acceptance"
assert current.run_ids == [second.run_id]
def test_standalone_realtime_repeat_creates_new_task_in_same_session(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=StubTaskExecutionPlanner(),
)
)
session_id = "feishu:group-weather"
first = asyncio.run(
service.process_direct(
"珠海天气怎样",
session_id=session_id,
provider_bundle=_bundle("Weather result"),
)
)
second = asyncio.run(
service.process_direct(
"珠海天气怎么样",
session_id=session_id,
provider_bundle=_bundle("Fresh weather result", route_action="continue_task"),
)
)
task_service = service.create_loop().boot().task_service
assert task_service is not None
previous = task_service.get_task(first.task_id or "")
current = task_service.get_task(second.task_id or "")
assert previous is not None
assert current is not None
assert previous.session_id == session_id
assert current.session_id == session_id
assert current.task_id != previous.task_id
assert previous.status == "closed"
assert previous.run_ids == [first.run_id]
assert current.status == "awaiting_acceptance"
assert current.run_ids == [second.run_id]
def test_related_follow_up_continues_active_task_without_accepting_it(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=StubTaskExecutionPlanner(),
)
)
first = asyncio.run(
service.process_direct(
"recommend food in Hengqin",
session_id="web:continue-topic",
provider_bundle=_bundle("Food recommendations"),
)
)
second = asyncio.run(
service.process_direct(
"include restaurants near the port",
session_id="web:continue-topic",
provider_bundle=_bundle("More recommendations", route_action="continue_task"),
)
)
task_service = service.create_loop().boot().task_service
assert task_service is not None
task = task_service.get_task(first.task_id or "")
assert task is not None
assert second.task_id == first.task_id
assert task.status == "awaiting_acceptance"
assert task.run_ids == [first.run_id, second.run_id]
assert task.feedback == []
def test_requested_revision_keeps_active_task_without_accepting_it(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=StubTaskExecutionPlanner(),
)
)
first = asyncio.run(
service.process_direct(
"recommend food in Hengqin",
session_id="web:revise-topic",
provider_bundle=_bundle("Food recommendations"),
)
)
second = asyncio.run(
service.process_direct(
"remove expensive restaurants",
session_id="web:revise-topic",
provider_bundle=_bundle("Revised recommendations", route_action="revise_task"),
)
)
task_service = service.create_loop().boot().task_service
assert task_service is not None
task = task_service.get_task(first.task_id or "")
assert task is not None
assert second.task_id == first.task_id
assert task.status == "awaiting_acceptance"
assert task.run_ids == [first.run_id, second.run_id]
assert [item["acceptance_type"] for item in task.feedback] == ["revise"]
def test_router_failure_fallback_does_not_auto_accept_active_task(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=StubTaskExecutionPlanner(),
)
)
first = asyncio.run(
service.process_direct(
"recommend food in Hengqin",
session_id="web:router-fallback",
provider_bundle=_bundle("Food recommendations"),
)
)
fallback_bundle = ProviderBundle(
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
main_provider=StubProvider(
[
LLMResponse(
content="Continued response",
finish_reason="stop",
provider_name="stub",
model="stub-model",
)
]
),
auxiliary_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
auxiliary_provider=StubProvider([]),
)
second = asyncio.run(
service.process_direct(
"continue after router failure",
session_id="web:router-fallback",
provider_bundle=fallback_bundle,
)
)
task_service = service.create_loop().boot().task_service
assert task_service is not None
task = task_service.get_task(first.task_id or "")
assert task is not None
assert second.task_id == first.task_id
assert task.status == "awaiting_acceptance"
assert task.run_ids == [first.run_id, second.run_id]
assert task.feedback == []
def test_acceptance_closes_task_and_triggers_learning(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=StubTaskExecutionPlanner(),
)
)
result = asyncio.run(
service.process_direct(
"write implementation plan",
session_id="web:acceptance",
provider_bundle=_bundle("Plan"),
)
)
loaded = service.create_loop().boot()
generated: list[tuple[str, str]] = []
def build_learning_candidates_for_task(
task_id: str,
*,
final_accepted_run_id: str | None = None,
trigger_run_id: str | None = None,
) -> list[FakeLearningCandidate]:
generated.append((task_id, final_accepted_run_id or trigger_run_id or ""))
return [FakeLearningCandidate()]
loaded.skill_learning_service.build_learning_candidates_for_task = build_learning_candidates_for_task
response = asyncio.run(
service.submit_acceptance(
session_id="web:acceptance",
run_id=result.run_id,
acceptance_type="accept",
)
)
assert response["task_status"] == "closed"
assert response["acceptance_type"] == "accept"
assert response["learning_candidates"] == [
{"candidate_id": "candidate-1", "kind": "new_skill", "status": "open"}
]
assert generated == [(result.task_id, result.run_id)]
task_service = loaded.task_service
assert task_service is not None
task = task_service.get_task(result.task_id or "")
assert task is not None
assert task.metadata["final_accepted_run_id"] == result.run_id
def test_revise_and_abandon_do_not_trigger_learning(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=StubTaskExecutionPlanner(),
)
)
result = asyncio.run(
service.process_direct(
"summarize notes",
session_id="web:revise",
provider_bundle=_bundle("Summary"),
)
)
response = asyncio.run(
service.submit_acceptance(
session_id="web:revise",
run_id=result.run_id,
acceptance_type="revise",
comment="Add decisions",
)
)
assert response["task_status"] == "needs_revision"
assert response["learning_candidates"] == []
task_service = service.create_loop().boot().task_service
assert task_service is not None
task = task_service.get_task(result.task_id or "")
assert task is not None
assert task.feedback[0]["acceptance_type"] == "revise"
def test_legacy_feedback_endpoint_maps_satisfied_to_accept(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=StubTaskExecutionPlanner(),
)
)
result = asyncio.run(
service.process_direct(
"prepare checklist",
session_id="web:legacy",
provider_bundle=_bundle("Checklist"),
)
)
response = asyncio.run(
service.submit_feedback(
session_id="web:legacy",
run_id=result.run_id,
feedback_type="satisfied",
)
)
assert response["acceptance_type"] == "accept"
assert response["feedback_type"] == "satisfied"
assert response["task_status"] == "closed"
def test_task_service_maps_legacy_status_and_feedback(tmp_path: Path) -> None:
service = TaskService(tmp_path)
task = service.create_task(session_id="s", description="legacy")
task.status = "awaiting_feedback"
task.feedback.append({"feedback_type": "satisfied", "run_id": "run-1"})
service.store.upsert_task(task)
loaded = service.get_task(task.task_id)
assert loaded is not None
assert loaded.status == "awaiting_acceptance"
assert loaded.feedback[0]["acceptance_type"] == "accept"