update memory gateway
This commit is contained in:
87
memory_gateway/document_ingest.py
Normal file
87
memory_gateway/document_ingest.py
Normal file
@ -0,0 +1,87 @@
|
||||
"""Document ingestion helpers for Memory Gateway."""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def slugify(value: str, fallback: str = "document") -> str:
|
||||
slug = re.sub(r"[^a-zA-Z0-9\u4e00-\u9fff_-]+", "-", value.lower()).strip("-")
|
||||
slug = re.sub(r"-+", "-", slug)[:100].strip("-")
|
||||
return slug or fallback
|
||||
|
||||
|
||||
def convert_file_to_markdown(file_path: str | Path) -> str:
|
||||
"""Convert a local document to Markdown using Microsoft MarkItDown."""
|
||||
try:
|
||||
from markitdown import MarkItDown
|
||||
except ModuleNotFoundError as exc:
|
||||
raise RuntimeError("markitdown is not installed. Install with: pip install 'markitdown[all]'") from exc
|
||||
|
||||
file_path = Path(file_path)
|
||||
converter = MarkItDown(enable_plugins=False)
|
||||
if hasattr(converter, "convert_local"):
|
||||
result = converter.convert_local(str(file_path))
|
||||
else:
|
||||
result = converter.convert(str(file_path))
|
||||
markdown = getattr(result, "text_content", "") or ""
|
||||
if not markdown.strip():
|
||||
raise RuntimeError("Document conversion produced empty Markdown")
|
||||
return markdown
|
||||
|
||||
|
||||
def build_markdown_note(
|
||||
*,
|
||||
title: str,
|
||||
markdown: str,
|
||||
source_filename: str,
|
||||
tags: list[str],
|
||||
knowledge_type: str,
|
||||
summary: str | None = None,
|
||||
) -> str:
|
||||
tag_text = ", ".join(tags)
|
||||
frontmatter = [
|
||||
"---",
|
||||
f"title: {title}",
|
||||
f"knowledge_type: {knowledge_type}",
|
||||
f"source_filename: {source_filename}",
|
||||
f"created_at: {datetime.now(timezone.utc).isoformat()}",
|
||||
f"tags: [{tag_text}]" if tag_text else "tags: []",
|
||||
]
|
||||
if summary:
|
||||
escaped = summary.replace('"', '\\"')
|
||||
frontmatter.append(f'summary: "{escaped}"')
|
||||
frontmatter.extend(["---", "", f"# {title}", "", markdown.strip(), ""])
|
||||
return "\n".join(frontmatter)
|
||||
|
||||
|
||||
def save_markdown_to_obsidian(
|
||||
*,
|
||||
vault_path: str | Path,
|
||||
relative_dir: str,
|
||||
title: str,
|
||||
markdown: str,
|
||||
source_filename: str,
|
||||
tags: list[str],
|
||||
knowledge_type: str,
|
||||
summary: str | None = None,
|
||||
) -> Path:
|
||||
vault = Path(vault_path)
|
||||
target_dir = vault / relative_dir.strip("/")
|
||||
target_dir.mkdir(parents=True, exist_ok=True)
|
||||
digest = slugify(source_filename.rsplit(".", 1)[0] or title)
|
||||
note_name = f"{slugify(title, digest)}.md"
|
||||
target = target_dir / note_name
|
||||
target.write_text(
|
||||
build_markdown_note(
|
||||
title=title,
|
||||
markdown=markdown,
|
||||
source_filename=source_filename,
|
||||
tags=tags,
|
||||
knowledge_type=knowledge_type,
|
||||
summary=summary,
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
return target
|
||||
Reference in New Issue
Block a user