diff --git a/app-instance/backend/beaver/skills/learning/__init__.py b/app-instance/backend/beaver/skills/learning/__init__.py index eb6d616..e664220 100644 --- a/app-instance/backend/beaver/skills/learning/__init__.py +++ b/app-instance/backend/beaver/skills/learning/__init__.py @@ -9,6 +9,7 @@ from .missing_skill import ( MissingSkillSynthesizer, ) from .pipeline import SkillLearningPipelineService +from .preservation import check_preservation from .service import RunReceiptContext, SkillLearningService from .synthesizer import SkillDraftSynthesizer from .worker import SkillLearningWorker, SkillLearningWorkerConfig, SkillLearningWorkerResult @@ -23,6 +24,7 @@ __all__ = [ "MissingSkillSynthesizer", "RunReceiptContext", "SkillLearningPipelineService", + "check_preservation", "SkillDraftSynthesizer", "SkillLearningService", "SkillLearningWorker", diff --git a/app-instance/backend/beaver/skills/learning/preservation.py b/app-instance/backend/beaver/skills/learning/preservation.py new file mode 100644 index 0000000..f1c1e75 --- /dev/null +++ b/app-instance/backend/beaver/skills/learning/preservation.py @@ -0,0 +1,53 @@ +"""Preservation checks for skill revision drafts.""" + +from __future__ import annotations + +import re +from typing import Any + + +def check_preservation(*, base_content: str, draft_content: str) -> dict[str, Any]: + base_sections = _sections(base_content) + draft_sections = _sections(draft_content) + preserved: list[str] = [] + changed: list[str] = [] + dropped: list[str] = [] + + for heading, body in base_sections.items(): + draft_body = draft_sections.get(heading) + if draft_body is None: + dropped.append(heading) + continue + preserved.append(heading) + if _normalize(body) != _normalize(draft_body): + changed.append(heading) + + risk_level = "high" if dropped else "low" + return { + "passed": not dropped, + "risk_level": risk_level, + "preserved_sections": preserved, + "changed_sections": changed, + "dropped_sections": dropped, + } + + +def _sections(content: str) -> dict[str, str]: + current = "body" + sections: dict[str, list[str]] = {current: []} + for line in (content or "").splitlines(): + match = re.match(r"^#{1,6}\s+(.+?)\s*$", line) + if match: + current = match.group(1).strip() + sections.setdefault(current, []) + continue + sections.setdefault(current, []).append(line) + return { + heading: "\n".join(lines).strip() + for heading, lines in sections.items() + if "\n".join(lines).strip() + } + + +def _normalize(value: str) -> str: + return re.sub(r"\s+", " ", value or "").strip().lower() diff --git a/app-instance/backend/tests/unit/test_skill_learning_preservation.py b/app-instance/backend/tests/unit/test_skill_learning_preservation.py new file mode 100644 index 0000000..f82e330 --- /dev/null +++ b/app-instance/backend/tests/unit/test_skill_learning_preservation.py @@ -0,0 +1,27 @@ +from __future__ import annotations + +from beaver.skills.learning.preservation import check_preservation + + +def test_preservation_passes_when_base_sections_remain() -> None: + base = "# Skill\n\n## Workflow\n\n- Read first.\n\n## Safety\n\n- Do not delete files.\n" + draft = "# Skill\n\n## Workflow\n\n- Read first.\n- Then write.\n\n## Safety\n\n- Do not delete files.\n" + + report = check_preservation(base_content=base, draft_content=draft) + + assert report["passed"] is True + assert report["risk_level"] == "low" + assert "Workflow" in report["preserved_sections"] + assert "Safety" in report["preserved_sections"] + assert report["dropped_sections"] == [] + + +def test_preservation_flags_dropped_section() -> None: + base = "# Skill\n\n## Workflow\n\n- Read first.\n\n## Safety\n\n- Do not delete files.\n" + draft = "# Skill\n\n## Workflow\n\n- Read first.\n" + + report = check_preservation(base_content=base, draft_content=draft) + + assert report["passed"] is False + assert report["risk_level"] == "high" + assert "Safety" in report["dropped_sections"]