update memory gateway

This commit is contained in:
2026-04-30 16:09:28 +08:00
parent e6b1520bce
commit ba84b1ddb3
98 changed files with 1341 additions and 6783 deletions

View File

@ -0,0 +1,87 @@
"""Document ingestion helpers for Memory Gateway."""
from __future__ import annotations
import re
from datetime import datetime, timezone
from pathlib import Path
def slugify(value: str, fallback: str = "document") -> str:
slug = re.sub(r"[^a-zA-Z0-9\u4e00-\u9fff_-]+", "-", value.lower()).strip("-")
slug = re.sub(r"-+", "-", slug)[:100].strip("-")
return slug or fallback
def convert_file_to_markdown(file_path: str | Path) -> str:
"""Convert a local document to Markdown using Microsoft MarkItDown."""
try:
from markitdown import MarkItDown
except ModuleNotFoundError as exc:
raise RuntimeError("markitdown is not installed. Install with: pip install 'markitdown[all]'") from exc
file_path = Path(file_path)
converter = MarkItDown(enable_plugins=False)
if hasattr(converter, "convert_local"):
result = converter.convert_local(str(file_path))
else:
result = converter.convert(str(file_path))
markdown = getattr(result, "text_content", "") or ""
if not markdown.strip():
raise RuntimeError("Document conversion produced empty Markdown")
return markdown
def build_markdown_note(
*,
title: str,
markdown: str,
source_filename: str,
tags: list[str],
knowledge_type: str,
summary: str | None = None,
) -> str:
tag_text = ", ".join(tags)
frontmatter = [
"---",
f"title: {title}",
f"knowledge_type: {knowledge_type}",
f"source_filename: {source_filename}",
f"created_at: {datetime.now(timezone.utc).isoformat()}",
f"tags: [{tag_text}]" if tag_text else "tags: []",
]
if summary:
escaped = summary.replace('"', '\\"')
frontmatter.append(f'summary: "{escaped}"')
frontmatter.extend(["---", "", f"# {title}", "", markdown.strip(), ""])
return "\n".join(frontmatter)
def save_markdown_to_obsidian(
*,
vault_path: str | Path,
relative_dir: str,
title: str,
markdown: str,
source_filename: str,
tags: list[str],
knowledge_type: str,
summary: str | None = None,
) -> Path:
vault = Path(vault_path)
target_dir = vault / relative_dir.strip("/")
target_dir.mkdir(parents=True, exist_ok=True)
digest = slugify(source_filename.rsplit(".", 1)[0] or title)
note_name = f"{slugify(title, digest)}.md"
target = target_dir / note_name
target.write_text(
build_markdown_note(
title=title,
markdown=markdown,
source_filename=source_filename,
tags=tags,
knowledge_type=knowledge_type,
summary=summary,
),
encoding="utf-8",
)
return target