移除了agents/registry.json中的所有内置agents配置,将agents数组清空。 为web应用添加了CORS中间件支持,允许指定的前端地址跨域访问。 重构了技能上传功能,增加了LLM重写机制,自动规范化上传的技能格式。 新增了工具名称提取逻辑,从技能正文中自动识别Required Tools段落。 更新了技能学习候选者和草稿的载荷结构,添加评估报告统计信息。 修改了意图路由技能的说明,改进任务状态管理逻辑。
499 lines
16 KiB
Python
499 lines
16 KiB
Python
from __future__ import annotations
|
|
|
|
import asyncio
|
|
from pathlib import Path
|
|
from types import SimpleNamespace
|
|
|
|
from beaver.engine import EngineLoader
|
|
from beaver.engine.providers.base import LLMProvider, LLMResponse
|
|
from beaver.engine.providers.factory import ProviderBundle
|
|
from beaver.services.agent_service import AgentService
|
|
from beaver.tasks import TaskExecutionPlan, TaskService
|
|
|
|
|
|
class StubProvider(LLMProvider):
|
|
def __init__(self, responses: list[LLMResponse]) -> None:
|
|
super().__init__()
|
|
self._responses = list(responses)
|
|
self.seen_messages: list[list[dict]] = []
|
|
|
|
async def chat(
|
|
self,
|
|
messages: list[dict],
|
|
tools: list[dict] | None = None,
|
|
model: str | None = None,
|
|
max_tokens: int = 4096,
|
|
temperature: float = 0.7,
|
|
) -> LLMResponse:
|
|
if not self._responses:
|
|
raise AssertionError("No stubbed provider responses left")
|
|
self.seen_messages.append(messages)
|
|
return self._responses.pop(0)
|
|
|
|
def get_default_model(self) -> str:
|
|
return "stub-model"
|
|
|
|
|
|
class StubTaskExecutionPlanner:
|
|
async def plan(self, **kwargs) -> TaskExecutionPlan:
|
|
return TaskExecutionPlan.single("test-single")
|
|
|
|
|
|
class FakeLearningCandidate:
|
|
def to_dict(self) -> dict:
|
|
return {"candidate_id": "candidate-1", "kind": "new_skill", "status": "open"}
|
|
|
|
|
|
def _route_response(action: str = "new_task", short_title: str = "Test task") -> LLMResponse:
|
|
return LLMResponse(
|
|
content=f'{{"action":"{action}","reason":"test route","short_title":"{short_title}"}}',
|
|
finish_reason="stop",
|
|
provider_name="stub",
|
|
model="stub-model",
|
|
)
|
|
|
|
|
|
def _bundle(*responses: str, route_action: str = "new_task") -> ProviderBundle:
|
|
return ProviderBundle(
|
|
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
|
|
main_provider=StubProvider(
|
|
[
|
|
LLMResponse(
|
|
content=response,
|
|
finish_reason="stop",
|
|
provider_name="stub",
|
|
model="stub-model",
|
|
)
|
|
for response in responses
|
|
]
|
|
),
|
|
auxiliary_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
|
|
auxiliary_provider=StubProvider([_route_response(route_action)]),
|
|
)
|
|
|
|
|
|
def test_task_run_records_evidence_and_waits_for_acceptance(tmp_path: Path) -> None:
|
|
service = AgentService(
|
|
loader=EngineLoader(
|
|
workspace=tmp_path,
|
|
task_execution_planner=StubTaskExecutionPlanner(),
|
|
)
|
|
)
|
|
|
|
result = asyncio.run(
|
|
service.process_direct(
|
|
"draft release notes",
|
|
session_id="web:test",
|
|
provider_bundle=_bundle("Done"),
|
|
)
|
|
)
|
|
|
|
task_service = service.create_loop().boot().task_service
|
|
assert task_service is not None
|
|
task = task_service.get_task(result.task_id or "")
|
|
assert task is not None
|
|
assert task.status == "awaiting_acceptance"
|
|
assert task.validation_result is None
|
|
assert result.validation_result is None
|
|
|
|
event_types = [event.event_type for event in task_service.list_events(task.task_id)]
|
|
assert "evidence_recorded" in event_types
|
|
assert "validated" not in event_types
|
|
|
|
|
|
def test_task_mode_injects_prompt_locale_output_language(tmp_path: Path) -> None:
|
|
service = AgentService(
|
|
loader=EngineLoader(
|
|
workspace=tmp_path,
|
|
task_execution_planner=StubTaskExecutionPlanner(),
|
|
)
|
|
)
|
|
main_provider = StubProvider(
|
|
[
|
|
LLMResponse(
|
|
content="Done",
|
|
finish_reason="stop",
|
|
provider_name="stub",
|
|
model="stub-model",
|
|
)
|
|
]
|
|
)
|
|
bundle = ProviderBundle(
|
|
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
|
|
main_provider=main_provider,
|
|
auxiliary_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
|
|
auxiliary_provider=StubProvider([_route_response("new_task", "Product summary")]),
|
|
)
|
|
|
|
result = asyncio.run(
|
|
service.process_direct(
|
|
"Summarize the uploaded report in English",
|
|
session_id="web:locale-task",
|
|
prompt_locale="en",
|
|
provider_bundle=bundle,
|
|
)
|
|
)
|
|
|
|
assert result.task_id
|
|
assert main_provider.seen_messages
|
|
system_prompt = main_provider.seen_messages[-1][0]["content"]
|
|
assert "Use English for user-facing replies" in system_prompt
|
|
assert "Output language: English." in system_prompt
|
|
|
|
task_service = service.create_loop().boot().task_service
|
|
assert task_service is not None
|
|
task = task_service.get_task(result.task_id)
|
|
assert task is not None
|
|
assert task.metadata["prompt_locale"] == "en"
|
|
|
|
|
|
def test_unrelated_simple_chat_auto_accepts_active_task(tmp_path: Path) -> None:
|
|
service = AgentService(
|
|
loader=EngineLoader(
|
|
workspace=tmp_path,
|
|
task_execution_planner=StubTaskExecutionPlanner(),
|
|
)
|
|
)
|
|
first = asyncio.run(
|
|
service.process_direct(
|
|
"recommend food in Hengqin",
|
|
session_id="web:new-topic-chat",
|
|
provider_bundle=_bundle("Food recommendations"),
|
|
)
|
|
)
|
|
|
|
second = asyncio.run(
|
|
service.process_direct(
|
|
"have you eaten?",
|
|
session_id="web:new-topic-chat",
|
|
provider_bundle=_bundle("I do not eat.", route_action="simple_chat"),
|
|
)
|
|
)
|
|
|
|
task_service = service.create_loop().boot().task_service
|
|
assert task_service is not None
|
|
previous = task_service.get_task(first.task_id or "")
|
|
assert previous is not None
|
|
assert previous.status == "closed"
|
|
assert previous.run_ids == [first.run_id]
|
|
assert previous.feedback[-1]["acceptance_type"] == "accept"
|
|
assert previous.metadata["final_accepted_run_id"] == first.run_id
|
|
assert second.task_id is None
|
|
|
|
|
|
def test_unrelated_new_task_auto_accepts_previous_task(tmp_path: Path) -> None:
|
|
service = AgentService(
|
|
loader=EngineLoader(
|
|
workspace=tmp_path,
|
|
task_execution_planner=StubTaskExecutionPlanner(),
|
|
)
|
|
)
|
|
first = asyncio.run(
|
|
service.process_direct(
|
|
"recommend food in Hengqin",
|
|
session_id="web:new-topic-task",
|
|
provider_bundle=_bundle("Food recommendations"),
|
|
)
|
|
)
|
|
|
|
second = asyncio.run(
|
|
service.process_direct(
|
|
"check today's weather in Iceland",
|
|
session_id="web:new-topic-task",
|
|
provider_bundle=_bundle("Weather result", route_action="new_task"),
|
|
)
|
|
)
|
|
|
|
task_service = service.create_loop().boot().task_service
|
|
assert task_service is not None
|
|
previous = task_service.get_task(first.task_id or "")
|
|
current = task_service.get_task(second.task_id or "")
|
|
assert previous is not None
|
|
assert current is not None
|
|
assert previous.status == "closed"
|
|
assert previous.run_ids == [first.run_id]
|
|
assert previous.feedback[-1]["acceptance_type"] == "accept"
|
|
assert current.task_id != previous.task_id
|
|
assert current.status == "awaiting_acceptance"
|
|
assert current.run_ids == [second.run_id]
|
|
|
|
|
|
def test_standalone_realtime_repeat_creates_new_task_in_same_session(tmp_path: Path) -> None:
|
|
service = AgentService(
|
|
loader=EngineLoader(
|
|
workspace=tmp_path,
|
|
task_execution_planner=StubTaskExecutionPlanner(),
|
|
)
|
|
)
|
|
session_id = "feishu:group-weather"
|
|
first = asyncio.run(
|
|
service.process_direct(
|
|
"珠海天气怎样",
|
|
session_id=session_id,
|
|
provider_bundle=_bundle("Weather result"),
|
|
)
|
|
)
|
|
|
|
second = asyncio.run(
|
|
service.process_direct(
|
|
"珠海天气怎么样",
|
|
session_id=session_id,
|
|
provider_bundle=_bundle("Fresh weather result", route_action="continue_task"),
|
|
)
|
|
)
|
|
|
|
task_service = service.create_loop().boot().task_service
|
|
assert task_service is not None
|
|
previous = task_service.get_task(first.task_id or "")
|
|
current = task_service.get_task(second.task_id or "")
|
|
assert previous is not None
|
|
assert current is not None
|
|
assert previous.session_id == session_id
|
|
assert current.session_id == session_id
|
|
assert current.task_id != previous.task_id
|
|
assert previous.status == "closed"
|
|
assert previous.run_ids == [first.run_id]
|
|
assert current.status == "awaiting_acceptance"
|
|
assert current.run_ids == [second.run_id]
|
|
|
|
|
|
def test_related_follow_up_continues_active_task_without_accepting_it(tmp_path: Path) -> None:
|
|
service = AgentService(
|
|
loader=EngineLoader(
|
|
workspace=tmp_path,
|
|
task_execution_planner=StubTaskExecutionPlanner(),
|
|
)
|
|
)
|
|
first = asyncio.run(
|
|
service.process_direct(
|
|
"recommend food in Hengqin",
|
|
session_id="web:continue-topic",
|
|
provider_bundle=_bundle("Food recommendations"),
|
|
)
|
|
)
|
|
|
|
second = asyncio.run(
|
|
service.process_direct(
|
|
"include restaurants near the port",
|
|
session_id="web:continue-topic",
|
|
provider_bundle=_bundle("More recommendations", route_action="continue_task"),
|
|
)
|
|
)
|
|
|
|
task_service = service.create_loop().boot().task_service
|
|
assert task_service is not None
|
|
task = task_service.get_task(first.task_id or "")
|
|
assert task is not None
|
|
assert second.task_id == first.task_id
|
|
assert task.status == "awaiting_acceptance"
|
|
assert task.run_ids == [first.run_id, second.run_id]
|
|
assert task.feedback == []
|
|
|
|
|
|
def test_requested_revision_keeps_active_task_without_accepting_it(tmp_path: Path) -> None:
|
|
service = AgentService(
|
|
loader=EngineLoader(
|
|
workspace=tmp_path,
|
|
task_execution_planner=StubTaskExecutionPlanner(),
|
|
)
|
|
)
|
|
first = asyncio.run(
|
|
service.process_direct(
|
|
"recommend food in Hengqin",
|
|
session_id="web:revise-topic",
|
|
provider_bundle=_bundle("Food recommendations"),
|
|
)
|
|
)
|
|
|
|
second = asyncio.run(
|
|
service.process_direct(
|
|
"remove expensive restaurants",
|
|
session_id="web:revise-topic",
|
|
provider_bundle=_bundle("Revised recommendations", route_action="revise_task"),
|
|
)
|
|
)
|
|
|
|
task_service = service.create_loop().boot().task_service
|
|
assert task_service is not None
|
|
task = task_service.get_task(first.task_id or "")
|
|
assert task is not None
|
|
assert second.task_id == first.task_id
|
|
assert task.status == "awaiting_acceptance"
|
|
assert task.run_ids == [first.run_id, second.run_id]
|
|
assert [item["acceptance_type"] for item in task.feedback] == ["revise"]
|
|
|
|
|
|
def test_router_failure_fallback_does_not_auto_accept_active_task(tmp_path: Path) -> None:
|
|
service = AgentService(
|
|
loader=EngineLoader(
|
|
workspace=tmp_path,
|
|
task_execution_planner=StubTaskExecutionPlanner(),
|
|
)
|
|
)
|
|
first = asyncio.run(
|
|
service.process_direct(
|
|
"recommend food in Hengqin",
|
|
session_id="web:router-fallback",
|
|
provider_bundle=_bundle("Food recommendations"),
|
|
)
|
|
)
|
|
fallback_bundle = ProviderBundle(
|
|
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
|
|
main_provider=StubProvider(
|
|
[
|
|
LLMResponse(
|
|
content="Continued response",
|
|
finish_reason="stop",
|
|
provider_name="stub",
|
|
model="stub-model",
|
|
)
|
|
]
|
|
),
|
|
auxiliary_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
|
|
auxiliary_provider=StubProvider([]),
|
|
)
|
|
|
|
second = asyncio.run(
|
|
service.process_direct(
|
|
"continue after router failure",
|
|
session_id="web:router-fallback",
|
|
provider_bundle=fallback_bundle,
|
|
)
|
|
)
|
|
|
|
task_service = service.create_loop().boot().task_service
|
|
assert task_service is not None
|
|
task = task_service.get_task(first.task_id or "")
|
|
assert task is not None
|
|
assert second.task_id == first.task_id
|
|
assert task.status == "awaiting_acceptance"
|
|
assert task.run_ids == [first.run_id, second.run_id]
|
|
assert task.feedback == []
|
|
|
|
|
|
def test_acceptance_closes_task_and_triggers_learning(tmp_path: Path) -> None:
|
|
service = AgentService(
|
|
loader=EngineLoader(
|
|
workspace=tmp_path,
|
|
task_execution_planner=StubTaskExecutionPlanner(),
|
|
)
|
|
)
|
|
result = asyncio.run(
|
|
service.process_direct(
|
|
"write implementation plan",
|
|
session_id="web:acceptance",
|
|
provider_bundle=_bundle("Plan"),
|
|
)
|
|
)
|
|
|
|
loaded = service.create_loop().boot()
|
|
generated: list[tuple[str, str]] = []
|
|
|
|
def build_learning_candidates_for_task(
|
|
task_id: str,
|
|
*,
|
|
final_accepted_run_id: str | None = None,
|
|
trigger_run_id: str | None = None,
|
|
) -> list[FakeLearningCandidate]:
|
|
generated.append((task_id, final_accepted_run_id or trigger_run_id or ""))
|
|
return [FakeLearningCandidate()]
|
|
|
|
loaded.skill_learning_service.build_learning_candidates_for_task = build_learning_candidates_for_task
|
|
|
|
response = asyncio.run(
|
|
service.submit_acceptance(
|
|
session_id="web:acceptance",
|
|
run_id=result.run_id,
|
|
acceptance_type="accept",
|
|
)
|
|
)
|
|
|
|
assert response["task_status"] == "closed"
|
|
assert response["acceptance_type"] == "accept"
|
|
assert response["learning_candidates"] == [
|
|
{"candidate_id": "candidate-1", "kind": "new_skill", "status": "open"}
|
|
]
|
|
assert generated == [(result.task_id, result.run_id)]
|
|
|
|
task_service = loaded.task_service
|
|
assert task_service is not None
|
|
task = task_service.get_task(result.task_id or "")
|
|
assert task is not None
|
|
assert task.metadata["final_accepted_run_id"] == result.run_id
|
|
|
|
|
|
def test_revise_and_abandon_do_not_trigger_learning(tmp_path: Path) -> None:
|
|
service = AgentService(
|
|
loader=EngineLoader(
|
|
workspace=tmp_path,
|
|
task_execution_planner=StubTaskExecutionPlanner(),
|
|
)
|
|
)
|
|
result = asyncio.run(
|
|
service.process_direct(
|
|
"summarize notes",
|
|
session_id="web:revise",
|
|
provider_bundle=_bundle("Summary"),
|
|
)
|
|
)
|
|
|
|
response = asyncio.run(
|
|
service.submit_acceptance(
|
|
session_id="web:revise",
|
|
run_id=result.run_id,
|
|
acceptance_type="revise",
|
|
comment="Add decisions",
|
|
)
|
|
)
|
|
|
|
assert response["task_status"] == "needs_revision"
|
|
assert response["learning_candidates"] == []
|
|
|
|
task_service = service.create_loop().boot().task_service
|
|
assert task_service is not None
|
|
task = task_service.get_task(result.task_id or "")
|
|
assert task is not None
|
|
assert task.feedback[0]["acceptance_type"] == "revise"
|
|
|
|
|
|
def test_legacy_feedback_endpoint_maps_satisfied_to_accept(tmp_path: Path) -> None:
|
|
service = AgentService(
|
|
loader=EngineLoader(
|
|
workspace=tmp_path,
|
|
task_execution_planner=StubTaskExecutionPlanner(),
|
|
)
|
|
)
|
|
result = asyncio.run(
|
|
service.process_direct(
|
|
"prepare checklist",
|
|
session_id="web:legacy",
|
|
provider_bundle=_bundle("Checklist"),
|
|
)
|
|
)
|
|
|
|
response = asyncio.run(
|
|
service.submit_feedback(
|
|
session_id="web:legacy",
|
|
run_id=result.run_id,
|
|
feedback_type="satisfied",
|
|
)
|
|
)
|
|
|
|
assert response["acceptance_type"] == "accept"
|
|
assert response["feedback_type"] == "satisfied"
|
|
assert response["task_status"] == "closed"
|
|
|
|
|
|
def test_task_service_maps_legacy_status_and_feedback(tmp_path: Path) -> None:
|
|
service = TaskService(tmp_path)
|
|
task = service.create_task(session_id="s", description="legacy")
|
|
task.status = "awaiting_feedback"
|
|
task.feedback.append({"feedback_type": "satisfied", "run_id": "run-1"})
|
|
service.store.upsert_task(task)
|
|
|
|
loaded = service.get_task(task.task_id)
|
|
|
|
assert loaded is not None
|
|
assert loaded.status == "awaiting_acceptance"
|
|
assert loaded.feedback[0]["acceptance_type"] == "accept"
|