update memory gateway

2026-04-30 16:09:28 +08:00
parent e6b1520bce
commit ba84b1ddb3
98 changed files with 1341 additions and 6783 deletions
--- a/memory_gateway/server.py
+++ b/memory_gateway/server.py
@ -1,14 +1,19 @@
 """Memory Gateway MCP Server.

-基于 Model Context Protocol 的记忆网关服务，为局域网内的 AI Agent 提供统一的 OpenViking 访问入口。
+通用 Memory Gateway 服务，为 AI agent / harness 提供统一的 OpenViking 记忆检索、总结和知识沉淀入口。
 """
 import asyncio
+import hashlib
 import json
 import logging
+import re
+import tempfile
+from datetime import datetime, timezone
 from contextlib import asynccontextmanager
+from pathlib import Path
 from typing import Any, Optional

-from fastapi import APIRouter, Depends, FastAPI, Header, HTTPException, Request, status
+from fastapi import APIRouter, Depends, FastAPI, File, Form, Header, HTTPException, Request, UploadFile, status
 from fastapi.responses import JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
 from mcp.server import Server
@ -17,7 +22,9 @@ from sse_starlette import EventSourceResponse

 from .config import get_config, set_config, Config
 from .openviking_client import get_openviking_client, close_openviking_client
-from .types import SearchRequest, AddMemoryRequest, AddResourceRequest
+from .document_ingest import convert_file_to_markdown, save_markdown_to_obsidian, slugify
+from .llm import LLMConfigurationError, LLMSummaryError, summarize_with_llm
+from .types import SearchRequest, AddMemoryRequest, AddResourceRequest, CommitSummaryRequest

 # 配置日志
 logging.basicConfig(
@ -75,6 +82,27 @@ async def list_tools() -> list[Tool]:
                "required": ["uri", "content"],
            },
        ),
+        Tool(
+            name="commit_summary",
+            description="总结一段通用内容并按需沉淀为 OpenViking memory/resource",
+            inputSchema={
+                "type": "object",
+                "properties": {
+                    "content": {"type": "string", "description": "需要总结和沉淀的原文内容"},
+                    "title": {"type": "string", "description": "标题（可选）"},
+                    "summary": {"type": "string", "description": "人工提供的摘要（可选）"},
+                    "namespace": {"type": "string", "description": "OpenViking memory namespace（可选）"},
+                    "memory_type": {"type": "string", "description": "记忆类型，默认 summary"},
+                    "tags": {"type": "array", "items": {"type": "string"}, "description": "标签列表"},
+                    "source": {"type": "string", "description": "来源说明或外部链接"},
+                    "resource_uri": {"type": "string", "description": "写入 resource 的 URI（可选）"},
+                    "resource_type": {"type": "string", "description": "资源类型，默认 json"},
+                    "persist_as": {"type": "string", "enum": ["memory", "resource", "both", "none"], "description": "沉淀方式"},
+                    "max_summary_chars": {"type": "integer", "description": "摘要最大长度"},
+                },
+                "required": ["content"],
+            },
+        ),
        Tool(
            name="get_status",
            description="检查系统状态",
@ -140,6 +168,11 @@ async def call_tool(name: str, arguments: Any) -> list[TextContent]:
            )
            return [TextContent(type="text", text=str(result))]

+        elif name == "commit_summary":
+            request = CommitSummaryRequest(**arguments)
+            result = await commit_summary_to_openviking(request)
+            return [TextContent(type="text", text=json.dumps(result, ensure_ascii=False))]
+
        elif name == "get_status":
            ov_status = await ov_client.health_check()
            return [TextContent(type="text", text=f"Memory Gateway: OK\nOpenViking: {ov_status}")]
@ -201,6 +234,155 @@ def verify_api_key(x_api_key: Optional[str] = Header(default=None)) -> None:
        )


+_SENTENCE_RE = re.compile(r"(?<=[。！？.!?])\s+")
+_WORD_RE = re.compile(r"[^a-zA-Z0-9\u4e00-\u9fff_-]+")
+
+
+def _normalize_whitespace(value: str) -> str:
+    return re.sub(r"\s+", " ", value).strip()
+
+
+def _slugify(value: str, fallback: str) -> str:
+    slug = _WORD_RE.sub("-", value.lower()).strip("-")
+    slug = re.sub(r"-+", "-", slug)[:80].strip("-")
+    return slug or fallback
+
+
+def _derive_title(content: str, title: Optional[str]) -> str:
+    if title and title.strip():
+        return title.strip()
+    for line in content.splitlines():
+        line = line.strip("# -*\t")
+        if line:
+            return line[:120]
+    return "Untitled summary"
+
+
+def _derive_summary(content: str, provided: Optional[str], max_chars: int) -> str:
+    if provided and provided.strip():
+        return provided.strip()[:max_chars]
+
+    normalized = _normalize_whitespace(content)
+    if not normalized:
+        return ""
+
+    sentences = [part.strip() for part in _SENTENCE_RE.split(normalized) if part.strip()]
+    if not sentences:
+        return normalized[:max_chars]
+
+    summary = " ".join(sentences[:3])
+    return summary[:max_chars]
+
+
+def _extract_key_points(content: str, limit: int = 8) -> list[str]:
+    points: list[str] = []
+    for raw_line in content.splitlines():
+        line = raw_line.strip()
+        if not line:
+            continue
+        stripped = re.sub(r"^(?:[-*•]\s*|\d+[.、)]\s*)", "", line).strip()
+        if not stripped:
+            continue
+        is_structured = line.startswith(("-", "*", "•")) or re.match(r"^\d+[.、)]\s+", line)
+        has_signal = any(token in stripped.lower() for token in [
+            "verdict", "result", "finding", "evidence", "action", "risk", "ioc",
+            "结论", "结果", "证据", "建议", "动作", "风险", "命中", "关联",
+        ])
+        if is_structured or has_signal:
+            point = _normalize_whitespace(stripped)
+            if point and point not in points:
+                points.append(point[:240])
+        if len(points) >= limit:
+            break
+
+    if points:
+        return points
+
+    summary = _derive_summary(content, None, 500)
+    return [summary] if summary else []
+
+
+def _render_memory_text(artifact: dict[str, Any]) -> str:
+    lines = [
+        f"Title: {artifact['title']}",
+        f"Summary: {artifact['summary']}",
+    ]
+    if artifact.get("tags"):
+        lines.append("Tags: " + ", ".join(artifact["tags"]))
+    if artifact.get("source"):
+        lines.append("Source: " + artifact["source"])
+    if artifact.get("key_points"):
+        lines.append("Key points:")
+        lines.extend(f"- {point}" for point in artifact["key_points"])
+    return "\n".join(lines)
+
+
+def _default_summary_resource_uri(request: CommitSummaryRequest, title: str) -> str:
+    namespace = (request.namespace or get_config().memory.default_namespace or "general").strip("/")
+    memory_type = (request.memory_type or "summary").strip("/")
+    digest = hashlib.sha1(request.content.encode("utf-8")).hexdigest()[:12]
+    slug = _slugify(title, digest)
+    return f"viking://resources/{namespace}/{memory_type}/{slug}-{digest}.json"
+
+
+async def build_summary_artifact(request: CommitSummaryRequest) -> dict[str, Any]:
+    max_chars = max(120, min(request.max_summary_chars, 4000))
+    llm_result = await summarize_with_llm(
+        request.content,
+        title=request.title,
+        summary_hint=request.summary,
+        tags=request.tags,
+        max_summary_chars=max_chars,
+        purpose=request.purpose or "generic knowledge memory",
+    )
+    title = llm_result.get("title") or _derive_title(request.content, request.title)
+    return {
+        "schema_version": "memory-gateway.summary.v1",
+        "id": hashlib.sha1(request.content.encode("utf-8")).hexdigest()[:16],
+        "title": title,
+        "summary": llm_result.get("summary", ""),
+        "key_points": llm_result.get("key_points", []),
+        "tags": llm_result.get("tags", request.tags),
+        "source": request.source,
+        "namespace": request.namespace or get_config().memory.default_namespace,
+        "memory_type": request.memory_type or "summary",
+        "created_at": datetime.now(timezone.utc).isoformat(),
+        "content": request.content,
+        "llm": llm_result.get("llm"),
+    }
+
+
+async def commit_summary_to_openviking(request: CommitSummaryRequest) -> dict[str, Any]:
+    artifact = await build_summary_artifact(request)
+    ov_client = await get_openviking_client()
+
+    memory_result: Optional[dict[str, Any]] = None
+    resource_result: Optional[dict[str, Any]] = None
+
+    if request.persist_as in {"memory", "both"}:
+        memory_result = await ov_client.add_memory(
+            content=_render_memory_text(artifact),
+            namespace=artifact["namespace"],
+            memory_type=artifact["memory_type"],
+        )
+
+    if request.persist_as in {"resource", "both"}:
+        resource_uri = request.resource_uri or _default_summary_resource_uri(request, artifact["title"])
+        artifact["resource_uri"] = resource_uri
+        resource_result = await ov_client.add_resource(
+            uri=resource_uri,
+            content=json.dumps(artifact, ensure_ascii=False, indent=2),
+            resource_type=request.resource_type or "json",
+        )
+
+    return {
+        "status": "ok",
+        "artifact": artifact,
+        "memory_result": memory_result,
+        "resource_result": resource_result,
+    }
+
+
 # FastAPI 应用
 app = FastAPI(title="Memory Gateway", version="0.1.0", lifespan=lifespan)

@ -346,6 +528,136 @@ async def api_add_resource(request: AddResourceRequest):
    return result


+@app.post("/api/summary", dependencies=[Depends(verify_api_key)])
+async def api_commit_summary(request: CommitSummaryRequest):
+    """REST API: 通用内容 LLM 总结与记忆沉淀。"""
+    try:
+        return await commit_summary_to_openviking(request)
+    except LLMConfigurationError as exc:
+        raise HTTPException(status_code=503, detail=str(exc)) from exc
+    except (LLMSummaryError, Exception) as exc:
+        if isinstance(exc, HTTPException):
+            raise
+        raise HTTPException(status_code=502, detail=f"LLM summary failed: {exc}") from exc
+
+
+def _parse_tags(tags: str | None) -> list[str]:
+    if not tags:
+        return []
+    return [tag.strip() for tag in re.split(r"[,\n]", tags) if tag.strip()]
+
+
+def _default_knowledge_uri(namespace: str, knowledge_type: str, title: str, content: str) -> str:
+    digest = hashlib.sha1(content.encode("utf-8")).hexdigest()[:12]
+    return f"viking://resources/{namespace.strip('/')}/knowledge/{knowledge_type.strip('/')}/{slugify(title, digest)}-{digest}.json"
+
+
+@app.post("/api/knowledge/upload", dependencies=[Depends(verify_api_key)])
+async def api_upload_knowledge(
+    file: UploadFile = File(...),
+    title: Optional[str] = Form(default=None),
+    namespace: str = Form(default="memory-gateway"),
+    knowledge_type: str = Form(default="knowledge"),
+    tags: str = Form(default=""),
+    source: Optional[str] = Form(default=None),
+    obsidian_dir: Optional[str] = Form(default=None),
+    resource_uri: Optional[str] = Form(default=None),
+    persist_as: str = Form(default="resource"),
+    max_summary_chars: int = Form(default=1000),
+):
+    """Upload a document, convert it to Markdown, save to Obsidian, summarize with LLM, and commit to OpenViking."""
+    if persist_as not in {"memory", "resource", "both", "none"}:
+        raise HTTPException(status_code=422, detail="persist_as must be one of memory/resource/both/none")
+
+    original_name = file.filename or "uploaded-document"
+    suffix = Path(original_name).suffix or ".bin"
+    with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
+        tmp.write(await file.read())
+        tmp_path = Path(tmp.name)
+
+    try:
+        markdown = await asyncio.to_thread(convert_file_to_markdown, tmp_path)
+    except RuntimeError as exc:
+        tmp_path.unlink(missing_ok=True)
+        raise HTTPException(status_code=500, detail=str(exc)) from exc
+    except Exception as exc:  # noqa: BLE001
+        tmp_path.unlink(missing_ok=True)
+        raise HTTPException(status_code=500, detail=f"Document conversion failed: {exc}") from exc
+    finally:
+        tmp_path.unlink(missing_ok=True)
+
+    parsed_tags = _parse_tags(tags)
+    effective_title = title or Path(original_name).stem or "Uploaded knowledge"
+    request = CommitSummaryRequest(
+        content=markdown,
+        title=effective_title,
+        namespace=namespace,
+        memory_type=knowledge_type,
+        tags=parsed_tags,
+        source=source or original_name,
+        persist_as="none",
+        max_summary_chars=max_summary_chars,
+        purpose=f"knowledge upload: {knowledge_type}",
+    )
+    try:
+        artifact = await build_summary_artifact(request)
+    except LLMConfigurationError as exc:
+        raise HTTPException(status_code=503, detail=str(exc)) from exc
+    except Exception as exc:  # noqa: BLE001
+        raise HTTPException(status_code=502, detail=f"LLM summary failed: {exc}") from exc
+
+    config = get_config()
+    relative_dir = obsidian_dir or getattr(config.obsidian, "knowledge_dir", "01_Knowledge/Uploaded")
+    obsidian_path = save_markdown_to_obsidian(
+        vault_path=config.obsidian.vault_path,
+        relative_dir=relative_dir,
+        title=artifact["title"],
+        markdown=markdown,
+        source_filename=original_name,
+        tags=artifact.get("tags", []),
+        knowledge_type=knowledge_type,
+        summary=artifact.get("summary"),
+    )
+
+    artifact.update(
+        {
+            "schema_version": "memory-gateway.knowledge_upload.v1",
+            "knowledge_type": knowledge_type,
+            "source_filename": original_name,
+            "obsidian_path": str(obsidian_path),
+            "obsidian_relative_path": str(obsidian_path.relative_to(config.obsidian.vault_path)),
+            "markdown_content": markdown,
+        }
+    )
+
+    ov_client = await get_openviking_client()
+    memory_result: Optional[dict[str, Any]] = None
+    resource_result: Optional[dict[str, Any]] = None
+    if persist_as in {"memory", "both"}:
+        memory_result = await ov_client.add_memory(
+            content=_render_memory_text(artifact),
+            namespace=namespace,
+            memory_type=knowledge_type,
+        )
+    if persist_as in {"resource", "both"}:
+        final_uri = resource_uri or _default_knowledge_uri(namespace, knowledge_type, artifact["title"], markdown)
+        artifact["resource_uri"] = final_uri
+        resource_result = await ov_client.add_resource(
+            uri=final_uri,
+            content=json.dumps(artifact, ensure_ascii=False, indent=2),
+            resource_type="json",
+        )
+
+    return {
+        "status": "ok",
+        "artifact": artifact,
+        "markdown_chars": len(markdown),
+        "obsidian_path": str(obsidian_path),
+        "memory_result": memory_result,
+        "resource_result": resource_result,
+    }
+
+
 def create_app(config: Optional[Config] = None) -> FastAPI:
    """创建 FastAPI 应用"""
    if config: