"""Batch-ingest mock KB/playbook files and emit normalized knowledge JSON documents.""" from __future__ import annotations import json from dataclasses import asdict from pathlib import Path from pipeline.transforms.normalize_kb import load_and_normalize_kb def ingest_kb(input_dir: str | Path, output_dir: str | Path) -> list[Path]: input_dir = Path(input_dir) output_dir = Path(output_dir) output_dir.mkdir(parents=True, exist_ok=True) written: list[Path] = [] for src in sorted(input_dir.rglob("*.json")): normalized = load_and_normalize_kb(src) dest = output_dir / f"{normalized.id}.json" with dest.open("w", encoding="utf-8") as f: json.dump(asdict(normalized), f, ensure_ascii=False, indent=2) written.append(dest) return written def main() -> None: import argparse parser = argparse.ArgumentParser(description="Normalize a directory of mock KB/playbook JSON files.") parser.add_argument("--input-dir", default="evaluation/datasets/mock_kb", help="Directory containing raw mock KB/playbook files") parser.add_argument("--output-dir", default="evaluation/datasets/normalized_kb", help="Directory to write normalized KB/playbook files") args = parser.parse_args() written = ingest_kb(args.input_dir, args.output_dir) print(f"normalized_kb={len(written)}") for path in written: print(path) if __name__ == "__main__": main()