42 lines
1.4 KiB
Python
42 lines
1.4 KiB
Python
"""Batch-ingest mock KB/playbook files and emit normalized knowledge JSON documents."""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from dataclasses import asdict
|
|
from pathlib import Path
|
|
|
|
from pipeline.transforms.normalize_kb import load_and_normalize_kb
|
|
|
|
|
|
def ingest_kb(input_dir: str | Path, output_dir: str | Path) -> list[Path]:
|
|
input_dir = Path(input_dir)
|
|
output_dir = Path(output_dir)
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
written: list[Path] = []
|
|
for src in sorted(input_dir.rglob("*.json")):
|
|
normalized = load_and_normalize_kb(src)
|
|
dest = output_dir / f"{normalized.id}.json"
|
|
with dest.open("w", encoding="utf-8") as f:
|
|
json.dump(asdict(normalized), f, ensure_ascii=False, indent=2)
|
|
written.append(dest)
|
|
return written
|
|
|
|
|
|
def main() -> None:
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(description="Normalize a directory of mock KB/playbook JSON files.")
|
|
parser.add_argument("--input-dir", default="evaluation/datasets/mock_kb", help="Directory containing raw mock KB/playbook files")
|
|
parser.add_argument("--output-dir", default="evaluation/datasets/normalized_kb", help="Directory to write normalized KB/playbook files")
|
|
args = parser.parse_args()
|
|
|
|
written = ingest_kb(args.input_dir, args.output_dir)
|
|
print(f"normalized_kb={len(written)}")
|
|
for path in written:
|
|
print(path)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|