64 lines
1.9 KiB
Python
64 lines
1.9 KiB
Python
"""Normalize raw mock KB/playbook documents into a retrieval-friendly structure."""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from dataclasses import dataclass, asdict
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
|
|
@dataclass
|
|
class NormalizedKnowledge:
|
|
id: str
|
|
memory_type: str
|
|
doc_type: str
|
|
scenario: str
|
|
title: str
|
|
abstract: str
|
|
key_points: list[str]
|
|
investigation_guidance: list[str]
|
|
decision_points: list[str]
|
|
related_refs: dict[str, list[str]]
|
|
source_path: str
|
|
tags: list[str]
|
|
|
|
|
|
def normalize_kb(raw_doc: dict[str, Any], source_path: str = "") -> NormalizedKnowledge:
|
|
"""Convert a raw KB or playbook document into the normalized knowledge model."""
|
|
return NormalizedKnowledge(
|
|
id=raw_doc["doc_id"],
|
|
memory_type="knowledge",
|
|
doc_type=raw_doc["doc_type"],
|
|
scenario=raw_doc["scenario"],
|
|
title=raw_doc["title"],
|
|
abstract=raw_doc.get("summary", ""),
|
|
key_points=raw_doc.get("key_points", []),
|
|
investigation_guidance=raw_doc.get("investigation_guidance", []),
|
|
decision_points=raw_doc.get("decision_points", []),
|
|
related_refs=raw_doc.get("related_refs", {}),
|
|
source_path=source_path,
|
|
tags=raw_doc.get("tags", []),
|
|
)
|
|
|
|
|
|
def load_and_normalize_kb(path: str | Path) -> NormalizedKnowledge:
|
|
path = Path(path)
|
|
with path.open("r", encoding="utf-8") as f:
|
|
raw_doc = json.load(f)
|
|
return normalize_kb(raw_doc, source_path=str(path))
|
|
|
|
|
|
def main() -> None:
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(description="Normalize a mock KB or playbook JSON file.")
|
|
parser.add_argument("path", help="Path to a raw KB/playbook JSON file")
|
|
args = parser.parse_args()
|
|
|
|
normalized = load_and_normalize_kb(args.path)
|
|
print(json.dumps(asdict(normalized), ensure_ascii=False, indent=2))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|