update memory gateway
This commit is contained in:
@ -6,7 +6,7 @@ from typing import Optional
|
||||
import yaml
|
||||
from pydantic import ValidationError
|
||||
|
||||
from .types import Config, ServerConfig, OpenVikingConfig, MemoryConfig, LoggingConfig
|
||||
from .types import Config, ServerConfig, OpenVikingConfig, MemoryConfig, LoggingConfig, LLMConfig, ObsidianConfig
|
||||
|
||||
|
||||
def load_config(config_path: Optional[str] = None) -> Config:
|
||||
@ -32,6 +32,8 @@ def load_config(config_path: Optional[str] = None) -> Config:
|
||||
openviking=OpenVikingConfig(**data.get("openviking", {})),
|
||||
memory=MemoryConfig(**data.get("memory", {})),
|
||||
logging=LoggingConfig(**data.get("logging", {})),
|
||||
llm=LLMConfig(**data.get("llm", {})),
|
||||
obsidian=ObsidianConfig(**data.get("obsidian", {})),
|
||||
)
|
||||
except (ValidationError, yaml.YAMLError) as e:
|
||||
print(f"配置文件解析错误: {e}")
|
||||
|
||||
87
memory_gateway/document_ingest.py
Normal file
87
memory_gateway/document_ingest.py
Normal file
@ -0,0 +1,87 @@
|
||||
"""Document ingestion helpers for Memory Gateway."""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def slugify(value: str, fallback: str = "document") -> str:
|
||||
slug = re.sub(r"[^a-zA-Z0-9\u4e00-\u9fff_-]+", "-", value.lower()).strip("-")
|
||||
slug = re.sub(r"-+", "-", slug)[:100].strip("-")
|
||||
return slug or fallback
|
||||
|
||||
|
||||
def convert_file_to_markdown(file_path: str | Path) -> str:
|
||||
"""Convert a local document to Markdown using Microsoft MarkItDown."""
|
||||
try:
|
||||
from markitdown import MarkItDown
|
||||
except ModuleNotFoundError as exc:
|
||||
raise RuntimeError("markitdown is not installed. Install with: pip install 'markitdown[all]'") from exc
|
||||
|
||||
file_path = Path(file_path)
|
||||
converter = MarkItDown(enable_plugins=False)
|
||||
if hasattr(converter, "convert_local"):
|
||||
result = converter.convert_local(str(file_path))
|
||||
else:
|
||||
result = converter.convert(str(file_path))
|
||||
markdown = getattr(result, "text_content", "") or ""
|
||||
if not markdown.strip():
|
||||
raise RuntimeError("Document conversion produced empty Markdown")
|
||||
return markdown
|
||||
|
||||
|
||||
def build_markdown_note(
|
||||
*,
|
||||
title: str,
|
||||
markdown: str,
|
||||
source_filename: str,
|
||||
tags: list[str],
|
||||
knowledge_type: str,
|
||||
summary: str | None = None,
|
||||
) -> str:
|
||||
tag_text = ", ".join(tags)
|
||||
frontmatter = [
|
||||
"---",
|
||||
f"title: {title}",
|
||||
f"knowledge_type: {knowledge_type}",
|
||||
f"source_filename: {source_filename}",
|
||||
f"created_at: {datetime.now(timezone.utc).isoformat()}",
|
||||
f"tags: [{tag_text}]" if tag_text else "tags: []",
|
||||
]
|
||||
if summary:
|
||||
escaped = summary.replace('"', '\\"')
|
||||
frontmatter.append(f'summary: "{escaped}"')
|
||||
frontmatter.extend(["---", "", f"# {title}", "", markdown.strip(), ""])
|
||||
return "\n".join(frontmatter)
|
||||
|
||||
|
||||
def save_markdown_to_obsidian(
|
||||
*,
|
||||
vault_path: str | Path,
|
||||
relative_dir: str,
|
||||
title: str,
|
||||
markdown: str,
|
||||
source_filename: str,
|
||||
tags: list[str],
|
||||
knowledge_type: str,
|
||||
summary: str | None = None,
|
||||
) -> Path:
|
||||
vault = Path(vault_path)
|
||||
target_dir = vault / relative_dir.strip("/")
|
||||
target_dir.mkdir(parents=True, exist_ok=True)
|
||||
digest = slugify(source_filename.rsplit(".", 1)[0] or title)
|
||||
note_name = f"{slugify(title, digest)}.md"
|
||||
target = target_dir / note_name
|
||||
target.write_text(
|
||||
build_markdown_note(
|
||||
title=title,
|
||||
markdown=markdown,
|
||||
source_filename=source_filename,
|
||||
tags=tags,
|
||||
knowledge_type=knowledge_type,
|
||||
summary=summary,
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
return target
|
||||
158
memory_gateway/llm.py
Normal file
158
memory_gateway/llm.py
Normal file
@ -0,0 +1,158 @@
|
||||
"""LLM helpers for Memory Gateway summaries."""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
||||
from .config import get_config
|
||||
|
||||
|
||||
class LLMConfigurationError(RuntimeError):
|
||||
"""Raised when LLM summarization is requested but not configured."""
|
||||
|
||||
|
||||
class LLMSummaryError(RuntimeError):
|
||||
"""Raised when the LLM response cannot be used."""
|
||||
|
||||
|
||||
def _llm_settings() -> dict[str, Any]:
|
||||
config = get_config()
|
||||
llm_config = getattr(config, "llm", None)
|
||||
|
||||
base_url = (
|
||||
os.environ.get("MEMORY_GATEWAY_LLM_BASE_URL")
|
||||
or os.environ.get("OPENAI_BASE_URL")
|
||||
or getattr(llm_config, "base_url", "")
|
||||
or "https://api.openai.com/v1"
|
||||
).rstrip("/")
|
||||
api_key = (
|
||||
os.environ.get("MEMORY_GATEWAY_LLM_API_KEY")
|
||||
or os.environ.get("OPENAI_API_KEY")
|
||||
or getattr(llm_config, "api_key", "")
|
||||
)
|
||||
model = (
|
||||
os.environ.get("MEMORY_GATEWAY_LLM_MODEL")
|
||||
or os.environ.get("OPENAI_MODEL")
|
||||
or getattr(llm_config, "model", "")
|
||||
)
|
||||
timeout = int(os.environ.get("MEMORY_GATEWAY_LLM_TIMEOUT") or getattr(llm_config, "timeout", 60))
|
||||
max_input_chars = int(os.environ.get("MEMORY_GATEWAY_LLM_MAX_INPUT_CHARS") or getattr(llm_config, "max_input_chars", 24000))
|
||||
return {
|
||||
"base_url": base_url,
|
||||
"api_key": api_key,
|
||||
"model": model,
|
||||
"timeout": timeout,
|
||||
"max_input_chars": max_input_chars,
|
||||
}
|
||||
|
||||
|
||||
def _extract_json(text: str) -> dict[str, Any]:
|
||||
text = text.strip()
|
||||
if text.startswith("```"):
|
||||
text = re.sub(r"^```(?:json)?\s*", "", text)
|
||||
text = re.sub(r"\s*```$", "", text)
|
||||
try:
|
||||
return json.loads(text)
|
||||
except json.JSONDecodeError:
|
||||
match = re.search(r"\{.*\}", text, flags=re.S)
|
||||
if not match:
|
||||
raise LLMSummaryError("LLM did not return JSON") from None
|
||||
return json.loads(match.group(0))
|
||||
|
||||
|
||||
def _coerce_string_list(value: Any, limit: int = 12) -> list[str]:
|
||||
if not isinstance(value, list):
|
||||
return []
|
||||
items: list[str] = []
|
||||
for item in value:
|
||||
if item is None:
|
||||
continue
|
||||
text = str(item).strip()
|
||||
if text and text not in items:
|
||||
items.append(text[:300])
|
||||
if len(items) >= limit:
|
||||
break
|
||||
return items
|
||||
|
||||
|
||||
async def summarize_with_llm(
|
||||
content: str,
|
||||
*,
|
||||
title: str | None = None,
|
||||
summary_hint: str | None = None,
|
||||
tags: list[str] | None = None,
|
||||
max_summary_chars: int = 800,
|
||||
purpose: str = "generic knowledge memory",
|
||||
) -> dict[str, Any]:
|
||||
"""Summarize content using an OpenAI-compatible chat completions API."""
|
||||
settings = _llm_settings()
|
||||
if not settings["model"]:
|
||||
raise LLMConfigurationError("LLM model is not configured. Set MEMORY_GATEWAY_LLM_MODEL or llm.model.")
|
||||
if not settings["api_key"] and not settings["base_url"].startswith(("http://127.0.0.1", "http://localhost")):
|
||||
raise LLMConfigurationError("LLM API key is not configured. Set MEMORY_GATEWAY_LLM_API_KEY or OPENAI_API_KEY.")
|
||||
|
||||
trimmed = content[: settings["max_input_chars"]]
|
||||
tag_text = ", ".join(tags or [])
|
||||
system_prompt = (
|
||||
"You are a precise knowledge curator. Summarize input into reusable memory. "
|
||||
"Return only valid JSON with these keys: title, summary, key_points, tags. "
|
||||
"summary must be concise but specific; key_points must be reusable, evidence-based bullets. "
|
||||
"Do not invent facts not present in the input. Preserve important identifiers, paths, URLs, IPs, IDs, and verdicts."
|
||||
)
|
||||
user_prompt = f"""
|
||||
Purpose: {purpose}
|
||||
Provided title: {title or ''}
|
||||
Provided summary hint: {summary_hint or ''}
|
||||
Provided tags: {tag_text}
|
||||
Max summary characters: {max_summary_chars}
|
||||
|
||||
Content:
|
||||
{trimmed}
|
||||
""".strip()
|
||||
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if settings["api_key"]:
|
||||
headers["Authorization"] = f"Bearer {settings['api_key']}"
|
||||
|
||||
payload = {
|
||||
"model": settings["model"],
|
||||
"messages": [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_prompt},
|
||||
],
|
||||
"temperature": 0.2,
|
||||
"response_format": {"type": "json_object"},
|
||||
}
|
||||
async with httpx.AsyncClient(timeout=settings["timeout"]) as client:
|
||||
response = await client.post(f"{settings['base_url']}/chat/completions", headers=headers, json=payload)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
try:
|
||||
content_text = data["choices"][0]["message"]["content"]
|
||||
except (KeyError, IndexError, TypeError) as exc:
|
||||
raise LLMSummaryError(f"Unexpected LLM response shape: {data}") from exc
|
||||
|
||||
parsed = _extract_json(content_text)
|
||||
merged_tags = []
|
||||
for tag in [*(tags or []), *_coerce_string_list(parsed.get("tags"), limit=8)]:
|
||||
tag = str(tag).strip()
|
||||
if tag and tag not in merged_tags:
|
||||
merged_tags.append(tag)
|
||||
|
||||
summary = str(parsed.get("summary") or "").strip()
|
||||
return {
|
||||
"title": str(parsed.get("title") or title or "Untitled summary").strip()[:160],
|
||||
"summary": summary[:max(120, max_summary_chars)],
|
||||
"key_points": _coerce_string_list(parsed.get("key_points"), limit=10),
|
||||
"tags": merged_tags,
|
||||
"llm": {
|
||||
"provider": "openai-compatible",
|
||||
"base_url": settings["base_url"],
|
||||
"model": settings["model"],
|
||||
},
|
||||
}
|
||||
@ -1,4 +1,4 @@
|
||||
"""OpenViking client wrapper used by the SOC Memory POC."""
|
||||
"""OpenViking client wrapper used by Memory Gateway."""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
@ -1,14 +1,19 @@
|
||||
"""Memory Gateway MCP Server.
|
||||
|
||||
基于 Model Context Protocol 的记忆网关服务,为局域网内的 AI Agent 提供统一的 OpenViking 访问入口。
|
||||
通用 Memory Gateway 服务,为 AI agent / harness 提供统一的 OpenViking 记忆检索、总结和知识沉淀入口。
|
||||
"""
|
||||
import asyncio
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import tempfile
|
||||
from datetime import datetime, timezone
|
||||
from contextlib import asynccontextmanager
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, FastAPI, Header, HTTPException, Request, status
|
||||
from fastapi import APIRouter, Depends, FastAPI, File, Form, Header, HTTPException, Request, UploadFile, status
|
||||
from fastapi.responses import JSONResponse
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from mcp.server import Server
|
||||
@ -17,7 +22,9 @@ from sse_starlette import EventSourceResponse
|
||||
|
||||
from .config import get_config, set_config, Config
|
||||
from .openviking_client import get_openviking_client, close_openviking_client
|
||||
from .types import SearchRequest, AddMemoryRequest, AddResourceRequest
|
||||
from .document_ingest import convert_file_to_markdown, save_markdown_to_obsidian, slugify
|
||||
from .llm import LLMConfigurationError, LLMSummaryError, summarize_with_llm
|
||||
from .types import SearchRequest, AddMemoryRequest, AddResourceRequest, CommitSummaryRequest
|
||||
|
||||
# 配置日志
|
||||
logging.basicConfig(
|
||||
@ -75,6 +82,27 @@ async def list_tools() -> list[Tool]:
|
||||
"required": ["uri", "content"],
|
||||
},
|
||||
),
|
||||
Tool(
|
||||
name="commit_summary",
|
||||
description="总结一段通用内容并按需沉淀为 OpenViking memory/resource",
|
||||
inputSchema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {"type": "string", "description": "需要总结和沉淀的原文内容"},
|
||||
"title": {"type": "string", "description": "标题(可选)"},
|
||||
"summary": {"type": "string", "description": "人工提供的摘要(可选)"},
|
||||
"namespace": {"type": "string", "description": "OpenViking memory namespace(可选)"},
|
||||
"memory_type": {"type": "string", "description": "记忆类型,默认 summary"},
|
||||
"tags": {"type": "array", "items": {"type": "string"}, "description": "标签列表"},
|
||||
"source": {"type": "string", "description": "来源说明或外部链接"},
|
||||
"resource_uri": {"type": "string", "description": "写入 resource 的 URI(可选)"},
|
||||
"resource_type": {"type": "string", "description": "资源类型,默认 json"},
|
||||
"persist_as": {"type": "string", "enum": ["memory", "resource", "both", "none"], "description": "沉淀方式"},
|
||||
"max_summary_chars": {"type": "integer", "description": "摘要最大长度"},
|
||||
},
|
||||
"required": ["content"],
|
||||
},
|
||||
),
|
||||
Tool(
|
||||
name="get_status",
|
||||
description="检查系统状态",
|
||||
@ -140,6 +168,11 @@ async def call_tool(name: str, arguments: Any) -> list[TextContent]:
|
||||
)
|
||||
return [TextContent(type="text", text=str(result))]
|
||||
|
||||
elif name == "commit_summary":
|
||||
request = CommitSummaryRequest(**arguments)
|
||||
result = await commit_summary_to_openviking(request)
|
||||
return [TextContent(type="text", text=json.dumps(result, ensure_ascii=False))]
|
||||
|
||||
elif name == "get_status":
|
||||
ov_status = await ov_client.health_check()
|
||||
return [TextContent(type="text", text=f"Memory Gateway: OK\nOpenViking: {ov_status}")]
|
||||
@ -201,6 +234,155 @@ def verify_api_key(x_api_key: Optional[str] = Header(default=None)) -> None:
|
||||
)
|
||||
|
||||
|
||||
_SENTENCE_RE = re.compile(r"(?<=[。!?.!?])\s+")
|
||||
_WORD_RE = re.compile(r"[^a-zA-Z0-9\u4e00-\u9fff_-]+")
|
||||
|
||||
|
||||
def _normalize_whitespace(value: str) -> str:
|
||||
return re.sub(r"\s+", " ", value).strip()
|
||||
|
||||
|
||||
def _slugify(value: str, fallback: str) -> str:
|
||||
slug = _WORD_RE.sub("-", value.lower()).strip("-")
|
||||
slug = re.sub(r"-+", "-", slug)[:80].strip("-")
|
||||
return slug or fallback
|
||||
|
||||
|
||||
def _derive_title(content: str, title: Optional[str]) -> str:
|
||||
if title and title.strip():
|
||||
return title.strip()
|
||||
for line in content.splitlines():
|
||||
line = line.strip("# -*\t")
|
||||
if line:
|
||||
return line[:120]
|
||||
return "Untitled summary"
|
||||
|
||||
|
||||
def _derive_summary(content: str, provided: Optional[str], max_chars: int) -> str:
|
||||
if provided and provided.strip():
|
||||
return provided.strip()[:max_chars]
|
||||
|
||||
normalized = _normalize_whitespace(content)
|
||||
if not normalized:
|
||||
return ""
|
||||
|
||||
sentences = [part.strip() for part in _SENTENCE_RE.split(normalized) if part.strip()]
|
||||
if not sentences:
|
||||
return normalized[:max_chars]
|
||||
|
||||
summary = " ".join(sentences[:3])
|
||||
return summary[:max_chars]
|
||||
|
||||
|
||||
def _extract_key_points(content: str, limit: int = 8) -> list[str]:
|
||||
points: list[str] = []
|
||||
for raw_line in content.splitlines():
|
||||
line = raw_line.strip()
|
||||
if not line:
|
||||
continue
|
||||
stripped = re.sub(r"^(?:[-*•]\s*|\d+[.、)]\s*)", "", line).strip()
|
||||
if not stripped:
|
||||
continue
|
||||
is_structured = line.startswith(("-", "*", "•")) or re.match(r"^\d+[.、)]\s+", line)
|
||||
has_signal = any(token in stripped.lower() for token in [
|
||||
"verdict", "result", "finding", "evidence", "action", "risk", "ioc",
|
||||
"结论", "结果", "证据", "建议", "动作", "风险", "命中", "关联",
|
||||
])
|
||||
if is_structured or has_signal:
|
||||
point = _normalize_whitespace(stripped)
|
||||
if point and point not in points:
|
||||
points.append(point[:240])
|
||||
if len(points) >= limit:
|
||||
break
|
||||
|
||||
if points:
|
||||
return points
|
||||
|
||||
summary = _derive_summary(content, None, 500)
|
||||
return [summary] if summary else []
|
||||
|
||||
|
||||
def _render_memory_text(artifact: dict[str, Any]) -> str:
|
||||
lines = [
|
||||
f"Title: {artifact['title']}",
|
||||
f"Summary: {artifact['summary']}",
|
||||
]
|
||||
if artifact.get("tags"):
|
||||
lines.append("Tags: " + ", ".join(artifact["tags"]))
|
||||
if artifact.get("source"):
|
||||
lines.append("Source: " + artifact["source"])
|
||||
if artifact.get("key_points"):
|
||||
lines.append("Key points:")
|
||||
lines.extend(f"- {point}" for point in artifact["key_points"])
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _default_summary_resource_uri(request: CommitSummaryRequest, title: str) -> str:
|
||||
namespace = (request.namespace or get_config().memory.default_namespace or "general").strip("/")
|
||||
memory_type = (request.memory_type or "summary").strip("/")
|
||||
digest = hashlib.sha1(request.content.encode("utf-8")).hexdigest()[:12]
|
||||
slug = _slugify(title, digest)
|
||||
return f"viking://resources/{namespace}/{memory_type}/{slug}-{digest}.json"
|
||||
|
||||
|
||||
async def build_summary_artifact(request: CommitSummaryRequest) -> dict[str, Any]:
|
||||
max_chars = max(120, min(request.max_summary_chars, 4000))
|
||||
llm_result = await summarize_with_llm(
|
||||
request.content,
|
||||
title=request.title,
|
||||
summary_hint=request.summary,
|
||||
tags=request.tags,
|
||||
max_summary_chars=max_chars,
|
||||
purpose=request.purpose or "generic knowledge memory",
|
||||
)
|
||||
title = llm_result.get("title") or _derive_title(request.content, request.title)
|
||||
return {
|
||||
"schema_version": "memory-gateway.summary.v1",
|
||||
"id": hashlib.sha1(request.content.encode("utf-8")).hexdigest()[:16],
|
||||
"title": title,
|
||||
"summary": llm_result.get("summary", ""),
|
||||
"key_points": llm_result.get("key_points", []),
|
||||
"tags": llm_result.get("tags", request.tags),
|
||||
"source": request.source,
|
||||
"namespace": request.namespace or get_config().memory.default_namespace,
|
||||
"memory_type": request.memory_type or "summary",
|
||||
"created_at": datetime.now(timezone.utc).isoformat(),
|
||||
"content": request.content,
|
||||
"llm": llm_result.get("llm"),
|
||||
}
|
||||
|
||||
|
||||
async def commit_summary_to_openviking(request: CommitSummaryRequest) -> dict[str, Any]:
|
||||
artifact = await build_summary_artifact(request)
|
||||
ov_client = await get_openviking_client()
|
||||
|
||||
memory_result: Optional[dict[str, Any]] = None
|
||||
resource_result: Optional[dict[str, Any]] = None
|
||||
|
||||
if request.persist_as in {"memory", "both"}:
|
||||
memory_result = await ov_client.add_memory(
|
||||
content=_render_memory_text(artifact),
|
||||
namespace=artifact["namespace"],
|
||||
memory_type=artifact["memory_type"],
|
||||
)
|
||||
|
||||
if request.persist_as in {"resource", "both"}:
|
||||
resource_uri = request.resource_uri or _default_summary_resource_uri(request, artifact["title"])
|
||||
artifact["resource_uri"] = resource_uri
|
||||
resource_result = await ov_client.add_resource(
|
||||
uri=resource_uri,
|
||||
content=json.dumps(artifact, ensure_ascii=False, indent=2),
|
||||
resource_type=request.resource_type or "json",
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "ok",
|
||||
"artifact": artifact,
|
||||
"memory_result": memory_result,
|
||||
"resource_result": resource_result,
|
||||
}
|
||||
|
||||
|
||||
# FastAPI 应用
|
||||
app = FastAPI(title="Memory Gateway", version="0.1.0", lifespan=lifespan)
|
||||
|
||||
@ -346,6 +528,136 @@ async def api_add_resource(request: AddResourceRequest):
|
||||
return result
|
||||
|
||||
|
||||
@app.post("/api/summary", dependencies=[Depends(verify_api_key)])
|
||||
async def api_commit_summary(request: CommitSummaryRequest):
|
||||
"""REST API: 通用内容 LLM 总结与记忆沉淀。"""
|
||||
try:
|
||||
return await commit_summary_to_openviking(request)
|
||||
except LLMConfigurationError as exc:
|
||||
raise HTTPException(status_code=503, detail=str(exc)) from exc
|
||||
except (LLMSummaryError, Exception) as exc:
|
||||
if isinstance(exc, HTTPException):
|
||||
raise
|
||||
raise HTTPException(status_code=502, detail=f"LLM summary failed: {exc}") from exc
|
||||
|
||||
|
||||
def _parse_tags(tags: str | None) -> list[str]:
|
||||
if not tags:
|
||||
return []
|
||||
return [tag.strip() for tag in re.split(r"[,\n]", tags) if tag.strip()]
|
||||
|
||||
|
||||
def _default_knowledge_uri(namespace: str, knowledge_type: str, title: str, content: str) -> str:
|
||||
digest = hashlib.sha1(content.encode("utf-8")).hexdigest()[:12]
|
||||
return f"viking://resources/{namespace.strip('/')}/knowledge/{knowledge_type.strip('/')}/{slugify(title, digest)}-{digest}.json"
|
||||
|
||||
|
||||
@app.post("/api/knowledge/upload", dependencies=[Depends(verify_api_key)])
|
||||
async def api_upload_knowledge(
|
||||
file: UploadFile = File(...),
|
||||
title: Optional[str] = Form(default=None),
|
||||
namespace: str = Form(default="memory-gateway"),
|
||||
knowledge_type: str = Form(default="knowledge"),
|
||||
tags: str = Form(default=""),
|
||||
source: Optional[str] = Form(default=None),
|
||||
obsidian_dir: Optional[str] = Form(default=None),
|
||||
resource_uri: Optional[str] = Form(default=None),
|
||||
persist_as: str = Form(default="resource"),
|
||||
max_summary_chars: int = Form(default=1000),
|
||||
):
|
||||
"""Upload a document, convert it to Markdown, save to Obsidian, summarize with LLM, and commit to OpenViking."""
|
||||
if persist_as not in {"memory", "resource", "both", "none"}:
|
||||
raise HTTPException(status_code=422, detail="persist_as must be one of memory/resource/both/none")
|
||||
|
||||
original_name = file.filename or "uploaded-document"
|
||||
suffix = Path(original_name).suffix or ".bin"
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
|
||||
tmp.write(await file.read())
|
||||
tmp_path = Path(tmp.name)
|
||||
|
||||
try:
|
||||
markdown = await asyncio.to_thread(convert_file_to_markdown, tmp_path)
|
||||
except RuntimeError as exc:
|
||||
tmp_path.unlink(missing_ok=True)
|
||||
raise HTTPException(status_code=500, detail=str(exc)) from exc
|
||||
except Exception as exc: # noqa: BLE001
|
||||
tmp_path.unlink(missing_ok=True)
|
||||
raise HTTPException(status_code=500, detail=f"Document conversion failed: {exc}") from exc
|
||||
finally:
|
||||
tmp_path.unlink(missing_ok=True)
|
||||
|
||||
parsed_tags = _parse_tags(tags)
|
||||
effective_title = title or Path(original_name).stem or "Uploaded knowledge"
|
||||
request = CommitSummaryRequest(
|
||||
content=markdown,
|
||||
title=effective_title,
|
||||
namespace=namespace,
|
||||
memory_type=knowledge_type,
|
||||
tags=parsed_tags,
|
||||
source=source or original_name,
|
||||
persist_as="none",
|
||||
max_summary_chars=max_summary_chars,
|
||||
purpose=f"knowledge upload: {knowledge_type}",
|
||||
)
|
||||
try:
|
||||
artifact = await build_summary_artifact(request)
|
||||
except LLMConfigurationError as exc:
|
||||
raise HTTPException(status_code=503, detail=str(exc)) from exc
|
||||
except Exception as exc: # noqa: BLE001
|
||||
raise HTTPException(status_code=502, detail=f"LLM summary failed: {exc}") from exc
|
||||
|
||||
config = get_config()
|
||||
relative_dir = obsidian_dir or getattr(config.obsidian, "knowledge_dir", "01_Knowledge/Uploaded")
|
||||
obsidian_path = save_markdown_to_obsidian(
|
||||
vault_path=config.obsidian.vault_path,
|
||||
relative_dir=relative_dir,
|
||||
title=artifact["title"],
|
||||
markdown=markdown,
|
||||
source_filename=original_name,
|
||||
tags=artifact.get("tags", []),
|
||||
knowledge_type=knowledge_type,
|
||||
summary=artifact.get("summary"),
|
||||
)
|
||||
|
||||
artifact.update(
|
||||
{
|
||||
"schema_version": "memory-gateway.knowledge_upload.v1",
|
||||
"knowledge_type": knowledge_type,
|
||||
"source_filename": original_name,
|
||||
"obsidian_path": str(obsidian_path),
|
||||
"obsidian_relative_path": str(obsidian_path.relative_to(config.obsidian.vault_path)),
|
||||
"markdown_content": markdown,
|
||||
}
|
||||
)
|
||||
|
||||
ov_client = await get_openviking_client()
|
||||
memory_result: Optional[dict[str, Any]] = None
|
||||
resource_result: Optional[dict[str, Any]] = None
|
||||
if persist_as in {"memory", "both"}:
|
||||
memory_result = await ov_client.add_memory(
|
||||
content=_render_memory_text(artifact),
|
||||
namespace=namespace,
|
||||
memory_type=knowledge_type,
|
||||
)
|
||||
if persist_as in {"resource", "both"}:
|
||||
final_uri = resource_uri or _default_knowledge_uri(namespace, knowledge_type, artifact["title"], markdown)
|
||||
artifact["resource_uri"] = final_uri
|
||||
resource_result = await ov_client.add_resource(
|
||||
uri=final_uri,
|
||||
content=json.dumps(artifact, ensure_ascii=False, indent=2),
|
||||
resource_type="json",
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "ok",
|
||||
"artifact": artifact,
|
||||
"markdown_chars": len(markdown),
|
||||
"obsidian_path": str(obsidian_path),
|
||||
"memory_result": memory_result,
|
||||
"resource_result": resource_result,
|
||||
}
|
||||
|
||||
|
||||
def create_app(config: Optional[Config] = None) -> FastAPI:
|
||||
"""创建 FastAPI 应用"""
|
||||
if config:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
"""类型定义"""
|
||||
from typing import Optional, Any
|
||||
from typing import Optional, Any, Literal
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
@ -19,10 +19,25 @@ class OpenVikingConfig(BaseModel):
|
||||
|
||||
class MemoryConfig(BaseModel):
|
||||
"""记忆配置"""
|
||||
default_namespace: str = "soc"
|
||||
default_namespace: str = "memory-gateway"
|
||||
search_limit: int = 10
|
||||
|
||||
|
||||
class LLMConfig(BaseModel):
|
||||
"""LLM 配置,用于通用总结和知识沉淀。"""
|
||||
base_url: str = "https://api.openai.com/v1"
|
||||
api_key: str = ""
|
||||
model: str = ""
|
||||
timeout: int = 60
|
||||
max_input_chars: int = 24000
|
||||
|
||||
|
||||
class ObsidianConfig(BaseModel):
|
||||
"""Obsidian Vault 配置。"""
|
||||
vault_path: str = "/home/tom/memory-gateway/obsidian-vault"
|
||||
knowledge_dir: str = "01_Knowledge/Uploaded"
|
||||
|
||||
|
||||
class LoggingConfig(BaseModel):
|
||||
"""日志配置"""
|
||||
level: str = "INFO"
|
||||
@ -35,6 +50,8 @@ class Config(BaseModel):
|
||||
openviking: OpenVikingConfig = Field(default_factory=OpenVikingConfig)
|
||||
memory: MemoryConfig = Field(default_factory=MemoryConfig)
|
||||
logging: LoggingConfig = Field(default_factory=LoggingConfig)
|
||||
llm: LLMConfig = Field(default_factory=LLMConfig)
|
||||
obsidian: ObsidianConfig = Field(default_factory=ObsidianConfig)
|
||||
|
||||
|
||||
class SearchRequest(BaseModel):
|
||||
@ -59,6 +76,36 @@ class AddResourceRequest(BaseModel):
|
||||
resource_type: Optional[str] = "text"
|
||||
|
||||
|
||||
|
||||
|
||||
class CommitSummaryRequest(BaseModel):
|
||||
"""通用总结与沉淀请求。
|
||||
|
||||
该模型用于任意场景把一段高价值内容总结后
|
||||
写入 OpenViking memory、resource,或两者同时写入。
|
||||
"""
|
||||
content: str
|
||||
title: Optional[str] = None
|
||||
summary: Optional[str] = None
|
||||
purpose: Optional[str] = "generic knowledge memory"
|
||||
namespace: Optional[str] = None
|
||||
memory_type: Optional[str] = "summary"
|
||||
tags: list[str] = Field(default_factory=list)
|
||||
source: Optional[str] = None
|
||||
resource_uri: Optional[str] = None
|
||||
resource_type: Optional[str] = "json"
|
||||
persist_as: Literal["memory", "resource", "both", "none"] = "both"
|
||||
max_summary_chars: int = 600
|
||||
|
||||
|
||||
class CommitSummaryResponse(BaseModel):
|
||||
"""通用总结与沉淀响应。"""
|
||||
status: str
|
||||
artifact: dict[str, Any]
|
||||
memory_result: Optional[dict[str, Any]] = None
|
||||
resource_result: Optional[dict[str, Any]] = None
|
||||
|
||||
|
||||
class SearchResult(BaseModel):
|
||||
"""搜索结果"""
|
||||
results: list[dict[str, Any]]
|
||||
|
||||
Reference in New Issue
Block a user