update memory gateway

This commit is contained in:
2026-04-30 16:09:28 +08:00
parent e6b1520bce
commit ba84b1ddb3
98 changed files with 1341 additions and 6783 deletions

158
memory_gateway/llm.py Normal file
View File

@ -0,0 +1,158 @@
"""LLM helpers for Memory Gateway summaries."""
from __future__ import annotations
import json
import os
import re
from typing import Any
import httpx
from .config import get_config
class LLMConfigurationError(RuntimeError):
"""Raised when LLM summarization is requested but not configured."""
class LLMSummaryError(RuntimeError):
"""Raised when the LLM response cannot be used."""
def _llm_settings() -> dict[str, Any]:
config = get_config()
llm_config = getattr(config, "llm", None)
base_url = (
os.environ.get("MEMORY_GATEWAY_LLM_BASE_URL")
or os.environ.get("OPENAI_BASE_URL")
or getattr(llm_config, "base_url", "")
or "https://api.openai.com/v1"
).rstrip("/")
api_key = (
os.environ.get("MEMORY_GATEWAY_LLM_API_KEY")
or os.environ.get("OPENAI_API_KEY")
or getattr(llm_config, "api_key", "")
)
model = (
os.environ.get("MEMORY_GATEWAY_LLM_MODEL")
or os.environ.get("OPENAI_MODEL")
or getattr(llm_config, "model", "")
)
timeout = int(os.environ.get("MEMORY_GATEWAY_LLM_TIMEOUT") or getattr(llm_config, "timeout", 60))
max_input_chars = int(os.environ.get("MEMORY_GATEWAY_LLM_MAX_INPUT_CHARS") or getattr(llm_config, "max_input_chars", 24000))
return {
"base_url": base_url,
"api_key": api_key,
"model": model,
"timeout": timeout,
"max_input_chars": max_input_chars,
}
def _extract_json(text: str) -> dict[str, Any]:
text = text.strip()
if text.startswith("```"):
text = re.sub(r"^```(?:json)?\s*", "", text)
text = re.sub(r"\s*```$", "", text)
try:
return json.loads(text)
except json.JSONDecodeError:
match = re.search(r"\{.*\}", text, flags=re.S)
if not match:
raise LLMSummaryError("LLM did not return JSON") from None
return json.loads(match.group(0))
def _coerce_string_list(value: Any, limit: int = 12) -> list[str]:
if not isinstance(value, list):
return []
items: list[str] = []
for item in value:
if item is None:
continue
text = str(item).strip()
if text and text not in items:
items.append(text[:300])
if len(items) >= limit:
break
return items
async def summarize_with_llm(
content: str,
*,
title: str | None = None,
summary_hint: str | None = None,
tags: list[str] | None = None,
max_summary_chars: int = 800,
purpose: str = "generic knowledge memory",
) -> dict[str, Any]:
"""Summarize content using an OpenAI-compatible chat completions API."""
settings = _llm_settings()
if not settings["model"]:
raise LLMConfigurationError("LLM model is not configured. Set MEMORY_GATEWAY_LLM_MODEL or llm.model.")
if not settings["api_key"] and not settings["base_url"].startswith(("http://127.0.0.1", "http://localhost")):
raise LLMConfigurationError("LLM API key is not configured. Set MEMORY_GATEWAY_LLM_API_KEY or OPENAI_API_KEY.")
trimmed = content[: settings["max_input_chars"]]
tag_text = ", ".join(tags or [])
system_prompt = (
"You are a precise knowledge curator. Summarize input into reusable memory. "
"Return only valid JSON with these keys: title, summary, key_points, tags. "
"summary must be concise but specific; key_points must be reusable, evidence-based bullets. "
"Do not invent facts not present in the input. Preserve important identifiers, paths, URLs, IPs, IDs, and verdicts."
)
user_prompt = f"""
Purpose: {purpose}
Provided title: {title or ''}
Provided summary hint: {summary_hint or ''}
Provided tags: {tag_text}
Max summary characters: {max_summary_chars}
Content:
{trimmed}
""".strip()
headers = {"Content-Type": "application/json"}
if settings["api_key"]:
headers["Authorization"] = f"Bearer {settings['api_key']}"
payload = {
"model": settings["model"],
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
],
"temperature": 0.2,
"response_format": {"type": "json_object"},
}
async with httpx.AsyncClient(timeout=settings["timeout"]) as client:
response = await client.post(f"{settings['base_url']}/chat/completions", headers=headers, json=payload)
response.raise_for_status()
data = response.json()
try:
content_text = data["choices"][0]["message"]["content"]
except (KeyError, IndexError, TypeError) as exc:
raise LLMSummaryError(f"Unexpected LLM response shape: {data}") from exc
parsed = _extract_json(content_text)
merged_tags = []
for tag in [*(tags or []), *_coerce_string_list(parsed.get("tags"), limit=8)]:
tag = str(tag).strip()
if tag and tag not in merged_tags:
merged_tags.append(tag)
summary = str(parsed.get("summary") or "").strip()
return {
"title": str(parsed.get("title") or title or "Untitled summary").strip()[:160],
"summary": summary[:max(120, max_summary_chars)],
"key_points": _coerce_string_list(parsed.get("key_points"), limit=10),
"tags": merged_tags,
"llm": {
"provider": "openai-compatible",
"base_url": settings["base_url"],
"model": settings["model"],
},
}