feat(task): 添加任务修订功能和超时处理机制
添加了 `revise_task` 路由动作类型,允许用户修改、纠正或重新执行最新活动任务结果。 实现了工具失败指导原则,防止相同类别工具重复失败。 为任务规划器添加了超时处理机制,避免长时间等待。 BREAKING CHANGE: 任务路由逻辑已更新,新增 `revise_task` 动作类型。 fix(api): 修复任务详情API返回完整流程投影 修复了任务详情API端点,现在会包含过滤后的流程运行、事件和工件信息, 并确保时间戳字段正确序列化。 refactor(engine): 优化任务技能解析器摘要节点处理 改进了任务技能解析器对摘要节点的处理逻辑,对于仅依赖文本生成功能的摘要节 点不再分配具体技能,直接使用依赖项输出进行汇总。 test: 增加任务修订和超时处理测试用例 添加了测试用例验证任务修订输入记录反馈、超时回退到单模式以及 摘要节点技能解析等新功能。
This commit is contained in:
@ -78,3 +78,81 @@ def test_task_delete_api_removes_backend_task(tmp_path: Path) -> None:
|
||||
assert deleted.json()["task_id"] == task.task_id
|
||||
assert all(item["task_id"] != task.task_id for item in listed.json())
|
||||
assert missing.status_code == 404
|
||||
|
||||
|
||||
def test_task_detail_api_includes_filtered_process_projection(tmp_path: Path) -> None:
|
||||
service = AgentService(workspace=tmp_path)
|
||||
loaded = service.create_loop().boot()
|
||||
task = loaded.task_service.create_task( # type: ignore[union-attr]
|
||||
session_id="web:detail",
|
||||
description="补充赛事数据",
|
||||
)
|
||||
other_task = loaded.task_service.create_task( # type: ignore[union-attr]
|
||||
session_id="web:detail",
|
||||
description="不相关任务",
|
||||
)
|
||||
loaded.session_manager.append_message(
|
||||
"web:detail",
|
||||
role="system",
|
||||
event_type="task_execution_planned",
|
||||
event_payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": 2,
|
||||
"plan_mode": "team",
|
||||
"strategy": "parallel",
|
||||
"node_ids": ["search_match_result", "search_match_stats"],
|
||||
"reason": "needs separate evidence gathering",
|
||||
},
|
||||
context_visible=False,
|
||||
)
|
||||
loaded.session_manager.append_message(
|
||||
"web:detail",
|
||||
role="system",
|
||||
event_type="task_team_run_failed",
|
||||
event_payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": 2,
|
||||
"plan_mode": "team",
|
||||
"strategy": "parallel",
|
||||
"team_success": False,
|
||||
"team_run_ids": ["sub-run"],
|
||||
"node_results": [
|
||||
{
|
||||
"node_id": "search_match_stats",
|
||||
"success": False,
|
||||
"output_text": "",
|
||||
"run_id": "sub-run",
|
||||
"finish_reason": "max_tool_iterations",
|
||||
"error": "max_tool_iterations",
|
||||
}
|
||||
],
|
||||
"error": "one or more team nodes failed",
|
||||
},
|
||||
context_visible=False,
|
||||
)
|
||||
loaded.session_manager.append_message(
|
||||
"web:detail",
|
||||
role="system",
|
||||
event_type="task_execution_planned",
|
||||
event_payload={
|
||||
"task_id": other_task.task_id,
|
||||
"attempt_index": 1,
|
||||
"plan_mode": "single",
|
||||
"strategy": None,
|
||||
"node_ids": [],
|
||||
},
|
||||
context_visible=False,
|
||||
)
|
||||
app = create_app(service=service, manage_service_lifecycle=False)
|
||||
|
||||
with TestClient(app) as client:
|
||||
response = client.get(f"/api/tasks/{task.task_id}")
|
||||
|
||||
assert response.status_code == 200
|
||||
payload = response.json()
|
||||
assert [run["run_id"] for run in payload["process_runs"]] == [
|
||||
f"task:{task.task_id}:attempt:2",
|
||||
"sub-run",
|
||||
]
|
||||
assert {event["actor_name"] for event in payload["process_events"]} == {"Task Planner", "Task Team", "search_match_stats"}
|
||||
assert all(event["metadata"]["task_id"] == task.task_id for event in payload["process_events"])
|
||||
|
||||
@ -103,6 +103,20 @@ def test_router_continues_active_task_from_llm_decision() -> None:
|
||||
assert provider.calls[0]["max_tokens"] == 256
|
||||
|
||||
|
||||
def test_router_marks_revision_from_llm_decision() -> None:
|
||||
decision = asyncio.run(
|
||||
MainAgentRouter().classify(
|
||||
"再详细一点,并加上表格",
|
||||
active_task=_task(),
|
||||
provider=RouterProvider('{"action":"revise_task","reason":"user requested changes","short_title":"任务连续性"}'),
|
||||
)
|
||||
)
|
||||
|
||||
assert decision.is_task
|
||||
assert decision.starts_new_task is False
|
||||
assert decision.action == "revise_task"
|
||||
|
||||
|
||||
def test_router_receives_thinking_mode() -> None:
|
||||
provider = RouterProvider('{"action":"simple_chat","reason":"simple"}')
|
||||
decision = asyncio.run(
|
||||
|
||||
@ -0,0 +1,12 @@
|
||||
from beaver.engine.session.models import MessageRecord
|
||||
|
||||
|
||||
def test_conversation_message_preserves_timestamp() -> None:
|
||||
record = MessageRecord(
|
||||
role="user",
|
||||
content="hello",
|
||||
timestamp=1_779_329_600.0,
|
||||
message_id=42,
|
||||
)
|
||||
|
||||
assert record.to_conversation_message()["timestamp"] == 1_779_329_600.0
|
||||
@ -27,6 +27,22 @@ class PlannerProvider(LLMProvider):
|
||||
return "stub-model"
|
||||
|
||||
|
||||
class HangingPlannerProvider(LLMProvider):
|
||||
async def chat(
|
||||
self,
|
||||
messages: list[dict],
|
||||
tools: list[dict] | None = None,
|
||||
model: str | None = None,
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
) -> LLMResponse:
|
||||
await asyncio.sleep(10)
|
||||
return LLMResponse(content='{"mode":"team"}', finish_reason="stop", provider_name="stub", model="stub-model")
|
||||
|
||||
def get_default_model(self) -> str:
|
||||
return "stub-model"
|
||||
|
||||
|
||||
def _task() -> TaskRecord:
|
||||
return TaskRecord(
|
||||
task_id="task-1",
|
||||
@ -49,6 +65,13 @@ def _bundle(response: str) -> ProviderBundle:
|
||||
)
|
||||
|
||||
|
||||
def _hanging_bundle() -> ProviderBundle:
|
||||
return ProviderBundle(
|
||||
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
|
||||
main_provider=HangingPlannerProvider(),
|
||||
)
|
||||
|
||||
|
||||
def test_planner_selects_single_mode() -> None:
|
||||
plan = asyncio.run(
|
||||
TaskExecutionPlanner().plan(
|
||||
@ -95,6 +118,22 @@ def test_planner_builds_team_graph() -> None:
|
||||
assert plan.final_synthesis_instruction == "merge the findings"
|
||||
|
||||
|
||||
def test_planner_timeout_falls_back_to_single() -> None:
|
||||
plan = asyncio.run(
|
||||
TaskExecutionPlanner().plan(
|
||||
task=_task(),
|
||||
user_message="implement workflow",
|
||||
attempt_index=1,
|
||||
provider_bundle=_hanging_bundle(),
|
||||
timeout_seconds=0.01,
|
||||
)
|
||||
)
|
||||
|
||||
assert plan.mode == "single"
|
||||
assert plan.reason == "planner_failed"
|
||||
assert "TimeoutError" in (plan.fallback_error or "")
|
||||
|
||||
|
||||
def test_planner_team_nodes_can_target_skills_without_agent_roles() -> None:
|
||||
plan = TaskExecutionPlanner().from_json(
|
||||
"""
|
||||
|
||||
@ -290,6 +290,109 @@ def test_active_task_continues_until_llm_closes_it(tmp_path: Path) -> None:
|
||||
assert loaded.task_service.active_task_view("web:continue") is None
|
||||
|
||||
|
||||
def test_active_task_revision_input_records_feedback_and_reruns(tmp_path: Path) -> None:
|
||||
service = AgentService(
|
||||
loader=EngineLoader(
|
||||
workspace=tmp_path,
|
||||
task_execution_planner=_single_planner(),
|
||||
validation_service=StubValidationService(
|
||||
[
|
||||
ValidationResult(passed=True, score=0.9, validator="test"),
|
||||
ValidationResult(passed=True, score=0.95, validator="test"),
|
||||
]
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
first = asyncio.run(
|
||||
service.process_direct(
|
||||
"查询珠海天气",
|
||||
session_id="web:revise-direct",
|
||||
provider_bundle=_bundle("珠海天气概览", route_action="new_task"),
|
||||
)
|
||||
)
|
||||
second = asyncio.run(
|
||||
service.process_direct(
|
||||
"再详细一点,并加上明后天穿衣建议",
|
||||
session_id="web:revise-direct",
|
||||
provider_bundle=_bundle("更新后的珠海天气和穿衣建议", route_action="revise_task"),
|
||||
)
|
||||
)
|
||||
loaded = service.create_loop().boot()
|
||||
task = loaded.task_service.get_task(first.task_id)
|
||||
messages = loaded.session_manager.get_messages_as_conversation(first.session_id)
|
||||
first_assistant = [
|
||||
message
|
||||
for message in messages
|
||||
if message.get("role") == "assistant" and message.get("run_id") == first.run_id
|
||||
][-1]
|
||||
user_messages = [message.get("content") for message in messages if message.get("role") == "user"]
|
||||
|
||||
assert second.task_id == first.task_id
|
||||
assert task is not None
|
||||
assert task.status == "awaiting_feedback"
|
||||
assert len(task.run_ids) == 2
|
||||
assert task.feedback == [
|
||||
{
|
||||
"feedback_type": "revise",
|
||||
"comment": "再详细一点,并加上明后天穿衣建议",
|
||||
"run_id": first.run_id,
|
||||
"created_at": task.feedback[0]["created_at"],
|
||||
}
|
||||
]
|
||||
assert first_assistant["feedback_state"] == "revise"
|
||||
assert "再详细一点,并加上明后天穿衣建议" in user_messages
|
||||
|
||||
|
||||
def test_explicit_revision_feedback_then_input_reruns_without_duplicate_feedback(tmp_path: Path) -> None:
|
||||
service = AgentService(
|
||||
loader=EngineLoader(
|
||||
workspace=tmp_path,
|
||||
task_execution_planner=_single_planner(),
|
||||
validation_service=StubValidationService(
|
||||
[
|
||||
ValidationResult(passed=True, score=0.9, validator="test"),
|
||||
ValidationResult(passed=True, score=0.95, validator="test"),
|
||||
]
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
first = asyncio.run(
|
||||
service.process_direct(
|
||||
"查询珠海天气",
|
||||
session_id="web:explicit-revise",
|
||||
provider_bundle=_bundle("珠海天气概览", route_action="new_task"),
|
||||
)
|
||||
)
|
||||
feedback = asyncio.run(
|
||||
service.submit_feedback(
|
||||
session_id=first.session_id,
|
||||
run_id=first.run_id,
|
||||
feedback_type="revise",
|
||||
comment="准备补充穿衣建议",
|
||||
)
|
||||
)
|
||||
second = asyncio.run(
|
||||
service.process_direct(
|
||||
"加上明后天穿衣建议",
|
||||
session_id="web:explicit-revise",
|
||||
provider_bundle=_bundle("更新后的珠海天气和穿衣建议", route_action="revise_task"),
|
||||
)
|
||||
)
|
||||
loaded = service.create_loop().boot()
|
||||
task = loaded.task_service.get_task(first.task_id)
|
||||
|
||||
assert feedback["task_status"] == "needs_revision"
|
||||
assert second.task_id == first.task_id
|
||||
assert task is not None
|
||||
assert task.status == "awaiting_feedback"
|
||||
assert len(task.run_ids) == 2
|
||||
assert len(task.feedback) == 1
|
||||
assert task.feedback[0]["feedback_type"] == "revise"
|
||||
assert task.feedback[0]["comment"] == "准备补充穿衣建议"
|
||||
|
||||
|
||||
def test_validation_failure_retries_once(tmp_path: Path) -> None:
|
||||
service = AgentService(
|
||||
loader=EngineLoader(
|
||||
@ -545,6 +648,8 @@ def test_task_mode_team_failure_still_uses_main_synthesis(tmp_path: Path) -> Non
|
||||
assert result.output_text == "fallback synthesized answer"
|
||||
assert any(event.event_type == "task_team_run_failed" for event in events)
|
||||
assert "sub-agent unavailable" in main_provider.calls[0][0]["content"]
|
||||
assert "same class of tools fails repeatedly" in main_provider.calls[0][0]["content"]
|
||||
assert "user-visible fallback answer" in main_provider.calls[0][0]["content"]
|
||||
|
||||
|
||||
def test_task_mode_team_retry_hides_first_synthesis_run(tmp_path: Path) -> None:
|
||||
|
||||
@ -65,8 +65,8 @@ def _publish_skill(workspace: Path, *, skill_name: str) -> None:
|
||||
store = SkillSpecStore(workspace)
|
||||
draft = DraftService(store).create_new_skill_draft(
|
||||
skill_name=skill_name,
|
||||
proposed_content="# API Contract Review\n\nCheck schema compatibility and breaking changes.",
|
||||
proposed_frontmatter={"description": "API contract compatibility review", "tools": []},
|
||||
proposed_content=f"# {skill_name}\n\nCheck schema compatibility and breaking changes.",
|
||||
proposed_frontmatter={"description": f"{skill_name} capability", "tools": []},
|
||||
created_by="tester",
|
||||
reason="test",
|
||||
)
|
||||
@ -174,3 +174,51 @@ def test_task_skill_resolver_generates_ephemeral_guidance_when_missing(tmp_path:
|
||||
assert reports[0].ephemeral_guidance_id is not None
|
||||
assert reports[0].ephemeral_guidance_name == "api-compatibility-review"
|
||||
assert reports[0].ephemeral_used is True
|
||||
|
||||
|
||||
def test_task_skill_resolver_keeps_summary_nodes_skillless(tmp_path: Path) -> None:
|
||||
_publish_skill(tmp_path, skill_name="multi-search-engine")
|
||||
provider = RecordingProvider(['["multi-search-engine"]'])
|
||||
resolver = TaskSkillResolver(
|
||||
skills_loader=SkillsLoader(tmp_path),
|
||||
draft_service=DraftService(SkillSpecStore(tmp_path)),
|
||||
)
|
||||
graph = ExecutionGraph(
|
||||
strategy="dag",
|
||||
nodes=[
|
||||
ExecutionNode(
|
||||
"summarize",
|
||||
"Compile a clear, concise summary from dependency outputs for the user.",
|
||||
AgentDescriptor(
|
||||
name="summarize",
|
||||
metadata={
|
||||
"skill_query": "Summarization",
|
||||
"required_capabilities": ["text generation"],
|
||||
},
|
||||
),
|
||||
depends_on=["verify_result"],
|
||||
inherited_pinned_skills=["multi-search-engine"],
|
||||
inherited_pinned_skill_contexts=[
|
||||
SkillContext(name="ephemeral:search-guidance", content="Search again.")
|
||||
],
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
resolved, reports = asyncio.run(
|
||||
resolver.resolve_graph(
|
||||
graph,
|
||||
task=_task(),
|
||||
user_message="summarize result",
|
||||
attempt_index=2,
|
||||
provider_bundle=_bundle(provider),
|
||||
)
|
||||
)
|
||||
|
||||
assert resolved.nodes[0].inherited_pinned_skills == []
|
||||
assert resolved.nodes[0].inherited_pinned_skill_contexts == []
|
||||
assert resolved.nodes[0].agent.metadata["selected_skill_names"] == []
|
||||
assert reports[0].selected_skill_names == []
|
||||
assert reports[0].ephemeral_used is False
|
||||
assert reports[0].reason == "summary node uses dependency outputs directly"
|
||||
assert provider.calls == []
|
||||
|
||||
Reference in New Issue
Block a user