memory-gateway/pipeline/transforms/normalize_kb.py

"""Normalize raw mock KB/playbook documents into a retrieval-friendly structure."""
from __future__ import annotations

import json
from dataclasses import dataclass, asdict
from pathlib import Path
from typing import Any


@dataclass
class NormalizedKnowledge:
    id: str
    memory_type: str
    doc_type: str
    scenario: str
    title: str
    abstract: str
    key_points: list[str]
    investigation_guidance: list[str]
    decision_points: list[str]
    related_refs: dict[str, list[str]]
    source_path: str
    tags: list[str]


def normalize_kb(raw_doc: dict[str, Any], source_path: str = "") -> NormalizedKnowledge:
    """Convert a raw KB or playbook document into the normalized knowledge model."""
    return NormalizedKnowledge(
        id=raw_doc["doc_id"],
        memory_type="knowledge",
        doc_type=raw_doc["doc_type"],
        scenario=raw_doc["scenario"],
        title=raw_doc["title"],
        abstract=raw_doc.get("summary", ""),
        key_points=raw_doc.get("key_points", []),
        investigation_guidance=raw_doc.get("investigation_guidance", []),
        decision_points=raw_doc.get("decision_points", []),
        related_refs=raw_doc.get("related_refs", {}),
        source_path=source_path,
        tags=raw_doc.get("tags", []),
    )


def load_and_normalize_kb(path: str | Path) -> NormalizedKnowledge:
    path = Path(path)
    with path.open("r", encoding="utf-8") as f:
        raw_doc = json.load(f)
    return normalize_kb(raw_doc, source_path=str(path))


def main() -> None:
    import argparse

    parser = argparse.ArgumentParser(description="Normalize a mock KB or playbook JSON file.")
    parser.add_argument("path", help="Path to a raw KB/playbook JSON file")
    args = parser.parse_args()

    normalized = load_and_normalize_kb(args.path)
    print(json.dumps(asdict(normalized), ensure_ascii=False, indent=2))


if __name__ == "__main__":
    main()