feat(beaver): 完成Task Team功能v1实现,重构后端架构支持统一内核
新增内部Task系统,包括验证、反馈门控机制,实现自动质量验证 (通过率>=0.75)和用户反馈闭环(satisfied/revise/abandon)。 实现Agent Team v1协调器,支持sequence/parallel/dag执行策略, sub-agent复用主AgentLoop,每个run使用独立memory snapshot。 建立Skill学习pipeline,包含draft/审核/发布/回滚完整生命周期, 通过Task验证通过且用户满意才生成学习候选。 重构目录结构,移除third_party依赖,建立统一engine内核, 所有agent共享运行时基础组件。 更新ContextBuilder清理provider消息字段,增强SkillContext版本管理, 集成TaskExecutionPlanner和TaskSkillResolver实现技能解析机制。
This commit is contained in:
507
app-instance/backend/tests/unit/test_task_mode_feedback.py
Normal file
507
app-instance/backend/tests/unit/test_task_mode_feedback.py
Normal file
@ -0,0 +1,507 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
from beaver.coordinator import AgentDescriptor, ExecutionGraph, ExecutionNode
|
||||
from beaver.engine import EngineLoader
|
||||
from beaver.engine.context.builder import ContextBuilder, ContextBuildInput
|
||||
from beaver.engine.providers.base import LLMProvider, LLMResponse
|
||||
from beaver.engine.providers.factory import ProviderBundle
|
||||
from beaver.services.agent_service import AgentService
|
||||
from beaver.tasks import TaskExecutionPlan, TaskService, ValidationResult, ValidationService
|
||||
|
||||
|
||||
class StubProvider(LLMProvider):
|
||||
def __init__(self, responses: list[LLMResponse]) -> None:
|
||||
super().__init__()
|
||||
self._responses = list(responses)
|
||||
self.calls: list[list[dict]] = []
|
||||
|
||||
async def chat(
|
||||
self,
|
||||
messages: list[dict],
|
||||
tools: list[dict] | None = None,
|
||||
model: str | None = None,
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
) -> LLMResponse:
|
||||
self.calls.append(messages)
|
||||
if not self._responses:
|
||||
raise AssertionError("No stubbed provider responses left")
|
||||
return self._responses.pop(0)
|
||||
|
||||
def get_default_model(self) -> str:
|
||||
return "stub-model"
|
||||
|
||||
|
||||
class StubValidationService:
|
||||
def __init__(self, results: list[ValidationResult]) -> None:
|
||||
self.results = list(results)
|
||||
|
||||
async def validate_task_result(self, **kwargs) -> ValidationResult:
|
||||
if not self.results:
|
||||
raise AssertionError("No stubbed validation results left")
|
||||
return self.results.pop(0)
|
||||
|
||||
|
||||
class StubTaskExecutionPlanner:
|
||||
def __init__(self, plans: list[TaskExecutionPlan] | None = None) -> None:
|
||||
self.plans = list(plans or [TaskExecutionPlan.single("test-single")])
|
||||
self.calls = []
|
||||
|
||||
async def plan(self, **kwargs) -> TaskExecutionPlan:
|
||||
self.calls.append(kwargs)
|
||||
if len(self.plans) == 1:
|
||||
return self.plans[0]
|
||||
if not self.plans:
|
||||
raise AssertionError("No stubbed execution plans left")
|
||||
return self.plans.pop(0)
|
||||
|
||||
|
||||
class FakeLearningCandidate:
|
||||
def to_dict(self) -> dict:
|
||||
return {"candidate_id": "candidate-1", "kind": "new_skill", "status": "open"}
|
||||
|
||||
|
||||
def _bundle(*responses: str) -> ProviderBundle:
|
||||
return ProviderBundle(
|
||||
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
|
||||
main_provider=StubProvider(
|
||||
[
|
||||
LLMResponse(
|
||||
content=response,
|
||||
finish_reason="stop",
|
||||
provider_name="stub",
|
||||
model="stub-model",
|
||||
)
|
||||
for response in responses
|
||||
]
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _single_planner() -> StubTaskExecutionPlanner:
|
||||
return StubTaskExecutionPlanner([TaskExecutionPlan.single("test-single")])
|
||||
|
||||
|
||||
def _team_plan(strategy: str = "sequence") -> TaskExecutionPlan:
|
||||
return TaskExecutionPlan(
|
||||
mode="team",
|
||||
reason="test-team",
|
||||
graph=ExecutionGraph(
|
||||
strategy=strategy, # type: ignore[arg-type]
|
||||
nodes=[
|
||||
ExecutionNode(
|
||||
node_id="research",
|
||||
task="research implementation options",
|
||||
agent=AgentDescriptor(name="researcher", role="research"),
|
||||
)
|
||||
],
|
||||
),
|
||||
final_synthesis_instruction="Use the sub-agent result to produce the final answer.",
|
||||
)
|
||||
|
||||
|
||||
def _provider_bundle(provider: StubProvider) -> ProviderBundle:
|
||||
return ProviderBundle(
|
||||
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
|
||||
main_provider=provider,
|
||||
)
|
||||
|
||||
|
||||
def test_simple_question_does_not_create_task(tmp_path: Path) -> None:
|
||||
service = AgentService(
|
||||
loader=EngineLoader(
|
||||
workspace=tmp_path,
|
||||
task_execution_planner=_single_planner(),
|
||||
validation_service=StubValidationService([]),
|
||||
)
|
||||
)
|
||||
|
||||
result = asyncio.run(
|
||||
service.process_direct(
|
||||
"hello?",
|
||||
session_id="web:simple",
|
||||
provider_bundle=_bundle("hi"),
|
||||
)
|
||||
)
|
||||
loaded = service.create_loop().boot()
|
||||
|
||||
assert result.task_id is None
|
||||
assert loaded.task_service.store.list_tasks() == []
|
||||
|
||||
|
||||
def test_complex_request_creates_task_and_records_validation(tmp_path: Path) -> None:
|
||||
service = AgentService(
|
||||
loader=EngineLoader(
|
||||
workspace=tmp_path,
|
||||
task_execution_planner=_single_planner(),
|
||||
validation_service=StubValidationService(
|
||||
[ValidationResult(passed=True, score=0.9, validator="test")]
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
result = asyncio.run(
|
||||
service.process_direct(
|
||||
"implement the new report workflow",
|
||||
session_id="web:task",
|
||||
provider_bundle=_bundle("implemented"),
|
||||
)
|
||||
)
|
||||
loaded = service.create_loop().boot()
|
||||
task = loaded.task_service.get_task_by_run_id(result.run_id)
|
||||
events = loaded.session_manager.get_run_event_records(result.session_id, result.run_id)
|
||||
run_record = loaded.run_memory_store.list_runs()[-1]
|
||||
skill_effects = next(event for event in events if event.event_type == "skill_effects_snapshotted")
|
||||
|
||||
assert result.task_id is not None
|
||||
assert task is not None
|
||||
assert task.status == "awaiting_feedback"
|
||||
assert any(event.event_type == "task_validation_snapshotted" for event in events)
|
||||
assert run_record.task_id == result.task_id
|
||||
assert run_record.validation_result["accepted"] is True
|
||||
assert skill_effects.event_payload["learning_candidate_enabled"] is False
|
||||
assert skill_effects.event_payload["learning_candidates"] == []
|
||||
|
||||
|
||||
def test_validation_failure_retries_once(tmp_path: Path) -> None:
|
||||
service = AgentService(
|
||||
loader=EngineLoader(
|
||||
workspace=tmp_path,
|
||||
task_execution_planner=_single_planner(),
|
||||
validation_service=StubValidationService(
|
||||
[
|
||||
ValidationResult(
|
||||
passed=False,
|
||||
score=0.2,
|
||||
issues=["missing tests"],
|
||||
recommended_revision_prompt="Add tests before final response.",
|
||||
validator="test",
|
||||
),
|
||||
ValidationResult(passed=True, score=0.88, validator="test"),
|
||||
]
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
result = asyncio.run(
|
||||
service.process_direct(
|
||||
"implement and validate the task",
|
||||
session_id="web:retry",
|
||||
provider_bundle=_bundle("first draft", "revised draft"),
|
||||
)
|
||||
)
|
||||
loaded = service.create_loop().boot()
|
||||
task = loaded.task_service.get_task(result.task_id)
|
||||
|
||||
assert result.output_text == "revised draft"
|
||||
assert result.validation_result["accepted"] is True
|
||||
assert task is not None
|
||||
assert len(task.run_ids) == 2
|
||||
visible_messages = loaded.session_manager.get_messages_as_conversation(result.session_id)
|
||||
visible_contents = [message.get("content") for message in visible_messages]
|
||||
assert "first draft" not in visible_contents
|
||||
assert "revised draft" in visible_contents
|
||||
|
||||
|
||||
def test_feedback_closes_or_abandons_internal_task(tmp_path: Path) -> None:
|
||||
service = AgentService(
|
||||
loader=EngineLoader(
|
||||
workspace=tmp_path,
|
||||
task_execution_planner=_single_planner(),
|
||||
validation_service=StubValidationService(
|
||||
[ValidationResult(passed=True, score=0.9, validator="test")]
|
||||
),
|
||||
)
|
||||
)
|
||||
result = asyncio.run(
|
||||
service.process_direct(
|
||||
"implement feedback handling",
|
||||
session_id="web:feedback",
|
||||
provider_bundle=_bundle("done"),
|
||||
)
|
||||
)
|
||||
loaded = service.create_loop().boot()
|
||||
learning_calls = []
|
||||
|
||||
def build_learning_candidates() -> list[FakeLearningCandidate]:
|
||||
learning_calls.append("called")
|
||||
return [FakeLearningCandidate()]
|
||||
|
||||
loaded.skill_learning_service.build_learning_candidates = build_learning_candidates
|
||||
|
||||
feedback = asyncio.run(
|
||||
service.submit_feedback(
|
||||
session_id=result.session_id,
|
||||
run_id=result.run_id,
|
||||
feedback_type="satisfied",
|
||||
)
|
||||
)
|
||||
|
||||
assert feedback["task_status"] == "closed"
|
||||
assert feedback["learning_candidates"] == [
|
||||
{"candidate_id": "candidate-1", "kind": "new_skill", "status": "open"}
|
||||
]
|
||||
assert learning_calls == ["called"]
|
||||
|
||||
service2 = AgentService(
|
||||
loader=EngineLoader(
|
||||
workspace=tmp_path / "abandon",
|
||||
task_execution_planner=_single_planner(),
|
||||
validation_service=StubValidationService(
|
||||
[
|
||||
ValidationResult(passed=False, score=0.3, validator="test"),
|
||||
ValidationResult(passed=False, score=0.3, validator="test"),
|
||||
]
|
||||
),
|
||||
)
|
||||
)
|
||||
abandoned = asyncio.run(
|
||||
service2.process_direct(
|
||||
"implement another workflow",
|
||||
session_id="web:abandon",
|
||||
provider_bundle=_bundle("not enough", "still not enough"),
|
||||
)
|
||||
)
|
||||
abandon_feedback = asyncio.run(
|
||||
service2.submit_feedback(
|
||||
session_id=abandoned.session_id,
|
||||
run_id=abandoned.run_id,
|
||||
feedback_type="abandon",
|
||||
comment="too costly",
|
||||
)
|
||||
)
|
||||
|
||||
assert abandon_feedback["task_status"] == "abandoned"
|
||||
assert abandon_feedback["learning_candidates"] == []
|
||||
|
||||
|
||||
def test_feedback_is_idempotent_and_projected_to_assistant_message(tmp_path: Path) -> None:
|
||||
service = AgentService(
|
||||
loader=EngineLoader(
|
||||
workspace=tmp_path,
|
||||
task_execution_planner=_single_planner(),
|
||||
validation_service=StubValidationService(
|
||||
[ValidationResult(passed=True, score=0.9, validator="test")]
|
||||
),
|
||||
)
|
||||
)
|
||||
result = asyncio.run(
|
||||
service.process_direct(
|
||||
"implement feedback projection",
|
||||
session_id="web:feedback-projection",
|
||||
provider_bundle=_bundle("done"),
|
||||
)
|
||||
)
|
||||
loaded = service.create_loop().boot()
|
||||
|
||||
first = asyncio.run(
|
||||
service.submit_feedback(
|
||||
session_id=result.session_id,
|
||||
run_id=result.run_id,
|
||||
feedback_type="satisfied",
|
||||
)
|
||||
)
|
||||
second = asyncio.run(
|
||||
service.submit_feedback(
|
||||
session_id=result.session_id,
|
||||
run_id=result.run_id,
|
||||
feedback_type="satisfied",
|
||||
)
|
||||
)
|
||||
|
||||
feedback_events = [
|
||||
event
|
||||
for event in loaded.session_manager.get_run_event_records(result.session_id, result.run_id)
|
||||
if event.event_type == "task_feedback_recorded"
|
||||
]
|
||||
assistant = [
|
||||
message
|
||||
for message in loaded.session_manager.get_messages_as_conversation(result.session_id)
|
||||
if message.get("role") == "assistant" and message.get("run_id") == result.run_id
|
||||
][-1]
|
||||
|
||||
assert first["task_status"] == "closed"
|
||||
assert second["task_status"] == "closed"
|
||||
assert len(feedback_events) == 1
|
||||
assert assistant["feedback_state"] == "satisfied"
|
||||
assert assistant["task_status"] == "closed"
|
||||
assert assistant["validation_status"] == "passed"
|
||||
|
||||
with pytest.raises(ValueError, match="already recorded"):
|
||||
asyncio.run(
|
||||
service.submit_feedback(
|
||||
session_id=result.session_id,
|
||||
run_id=result.run_id,
|
||||
feedback_type="abandon",
|
||||
)
|
||||
)
|
||||
|
||||
task = loaded.task_service.get_task(result.task_id)
|
||||
assert task is not None
|
||||
assert task.status == "closed"
|
||||
|
||||
|
||||
def test_task_mode_team_plan_runs_subagent_then_main_synthesis(tmp_path: Path) -> None:
|
||||
main_provider = StubProvider(
|
||||
[
|
||||
LLMResponse(content="final synthesized answer", finish_reason="stop", provider_name="stub", model="stub-model")
|
||||
]
|
||||
)
|
||||
sub_provider = StubProvider(
|
||||
[
|
||||
LLMResponse(content="sub-agent evidence", finish_reason="stop", provider_name="stub", model="stub-model")
|
||||
]
|
||||
)
|
||||
service = AgentService(
|
||||
loader=EngineLoader(
|
||||
workspace=tmp_path,
|
||||
task_execution_planner=StubTaskExecutionPlanner([_team_plan()]),
|
||||
validation_service=StubValidationService([ValidationResult(passed=True, score=0.9, validator="test")]),
|
||||
)
|
||||
)
|
||||
|
||||
result = asyncio.run(
|
||||
service.process_direct(
|
||||
"implement team-backed workflow",
|
||||
session_id="web:team",
|
||||
provider_bundle=_provider_bundle(main_provider),
|
||||
team_provider_bundle_factory=lambda node: _provider_bundle(sub_provider),
|
||||
)
|
||||
)
|
||||
loaded = service.create_loop().boot()
|
||||
task = loaded.task_service.get_task(result.task_id)
|
||||
events = loaded.session_manager.get_event_records(result.session_id)
|
||||
|
||||
assert result.output_text == "final synthesized answer"
|
||||
assert task is not None
|
||||
assert len(task.run_ids) == 2
|
||||
assert result.run_id == task.run_ids[-1]
|
||||
assert any(event.event_type == "task_execution_planned" for event in events)
|
||||
assert any(event.event_type == "task_team_run_completed" for event in events)
|
||||
assert "sub-agent evidence" in main_provider.calls[0][0]["content"]
|
||||
assert "sub-agent evidence" != result.output_text
|
||||
|
||||
|
||||
def test_task_mode_team_failure_still_uses_main_synthesis(tmp_path: Path) -> None:
|
||||
main_provider = StubProvider(
|
||||
[
|
||||
LLMResponse(content="fallback synthesized answer", finish_reason="stop", provider_name="stub", model="stub-model")
|
||||
]
|
||||
)
|
||||
service = AgentService(
|
||||
loader=EngineLoader(
|
||||
workspace=tmp_path,
|
||||
task_execution_planner=StubTaskExecutionPlanner([_team_plan()]),
|
||||
validation_service=StubValidationService([ValidationResult(passed=True, score=0.9, validator="test")]),
|
||||
)
|
||||
)
|
||||
|
||||
result = asyncio.run(
|
||||
service.process_direct(
|
||||
"implement workflow despite team failure",
|
||||
session_id="web:team-failure",
|
||||
provider_bundle=_provider_bundle(main_provider),
|
||||
team_provider_bundle_factory=lambda node: (_ for _ in ()).throw(RuntimeError("sub-agent unavailable")),
|
||||
)
|
||||
)
|
||||
loaded = service.create_loop().boot()
|
||||
events = loaded.session_manager.get_event_records(result.session_id)
|
||||
|
||||
assert result.output_text == "fallback synthesized answer"
|
||||
assert any(event.event_type == "task_team_run_failed" for event in events)
|
||||
assert "sub-agent unavailable" in main_provider.calls[0][0]["content"]
|
||||
|
||||
|
||||
def test_task_mode_team_retry_hides_first_synthesis_run(tmp_path: Path) -> None:
|
||||
main_provider = StubProvider(
|
||||
[
|
||||
LLMResponse(content="first synthesized answer", finish_reason="stop", provider_name="stub", model="stub-model"),
|
||||
LLMResponse(content="revised synthesized answer", finish_reason="stop", provider_name="stub", model="stub-model"),
|
||||
]
|
||||
)
|
||||
sub_providers = [
|
||||
StubProvider([LLMResponse(content="first evidence", finish_reason="stop", provider_name="stub", model="stub-model")]),
|
||||
StubProvider([LLMResponse(content="second evidence", finish_reason="stop", provider_name="stub", model="stub-model")]),
|
||||
]
|
||||
service = AgentService(
|
||||
loader=EngineLoader(
|
||||
workspace=tmp_path,
|
||||
task_execution_planner=StubTaskExecutionPlanner([_team_plan(), _team_plan()]),
|
||||
validation_service=StubValidationService(
|
||||
[
|
||||
ValidationResult(passed=False, score=0.2, recommended_revision_prompt="revise", validator="test"),
|
||||
ValidationResult(passed=True, score=0.9, validator="test"),
|
||||
]
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
result = asyncio.run(
|
||||
service.process_direct(
|
||||
"implement and validate with team",
|
||||
session_id="web:team-retry",
|
||||
provider_bundle=_provider_bundle(main_provider),
|
||||
team_provider_bundle_factory=lambda node: _provider_bundle(sub_providers.pop(0)),
|
||||
)
|
||||
)
|
||||
loaded = service.create_loop().boot()
|
||||
task = loaded.task_service.get_task(result.task_id)
|
||||
visible = loaded.session_manager.get_messages_as_conversation(result.session_id)
|
||||
visible_contents = [message.get("content") for message in visible]
|
||||
run_records = {record.run_id: record for record in loaded.run_memory_store.list_runs()}
|
||||
|
||||
assert result.output_text == "revised synthesized answer"
|
||||
assert task is not None
|
||||
assert len(task.run_ids) == 4
|
||||
assert "first synthesized answer" not in visible_contents
|
||||
assert "revised synthesized answer" in visible_contents
|
||||
for run_id in task.run_ids:
|
||||
record = run_records[run_id]
|
||||
events = loaded.session_manager.get_run_event_records(record.session_id, run_id)
|
||||
skill_effects = [event for event in events if event.event_type == "skill_effects_snapshotted"]
|
||||
assert skill_effects
|
||||
assert skill_effects[-1].event_payload["learning_candidate_enabled"] is False
|
||||
|
||||
|
||||
def test_context_builder_strips_ui_projection_fields_from_provider_history() -> None:
|
||||
result = ContextBuilder().build_messages(
|
||||
ContextBuildInput(
|
||||
history=[
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "done",
|
||||
"run_id": "run-1",
|
||||
"task_id": "task-1",
|
||||
"task_status": "closed",
|
||||
"validation_status": "passed",
|
||||
"feedback_state": "satisfied",
|
||||
}
|
||||
],
|
||||
)
|
||||
)
|
||||
|
||||
assistant = result.messages[-1]
|
||||
assert assistant == {"role": "assistant", "content": "done"}
|
||||
|
||||
|
||||
def test_llm_validator_parse_failure_is_not_accepted(tmp_path: Path) -> None:
|
||||
task_service = TaskService(tmp_path / "tasks")
|
||||
task = task_service.create_task(session_id="web:validator", description="implement validator handling")
|
||||
validation = asyncio.run(
|
||||
ValidationService().validate_task_result(
|
||||
task=task,
|
||||
user_message="implement validator handling",
|
||||
final_output="done",
|
||||
provider_bundle=_bundle("not json"),
|
||||
)
|
||||
)
|
||||
|
||||
assert validation.accepted is False
|
||||
assert validation.validator == "llm_error"
|
||||
assert validation.issues
|
||||
Reference in New Issue
Block a user