update memory gateway

2026-04-30 16:09:28 +08:00
parent e6b1520bce
commit ba84b1ddb3
98 changed files with 1341 additions and 6783 deletions
--- a/memory_gateway/llm.py
+++ b/memory_gateway/llm.py
@ -0,0 +1,158 @@
+"""LLM helpers for Memory Gateway summaries."""
+from __future__ import annotations
+
+import json
+import os
+import re
+from typing import Any
+
+import httpx
+
+from .config import get_config
+
+
+class LLMConfigurationError(RuntimeError):
+    """Raised when LLM summarization is requested but not configured."""
+
+
+class LLMSummaryError(RuntimeError):
+    """Raised when the LLM response cannot be used."""
+
+
+def _llm_settings() -> dict[str, Any]:
+    config = get_config()
+    llm_config = getattr(config, "llm", None)
+
+    base_url = (
+        os.environ.get("MEMORY_GATEWAY_LLM_BASE_URL")
+        or os.environ.get("OPENAI_BASE_URL")
+        or getattr(llm_config, "base_url", "")
+        or "https://api.openai.com/v1"
+    ).rstrip("/")
+    api_key = (
+        os.environ.get("MEMORY_GATEWAY_LLM_API_KEY")
+        or os.environ.get("OPENAI_API_KEY")
+        or getattr(llm_config, "api_key", "")
+    )
+    model = (
+        os.environ.get("MEMORY_GATEWAY_LLM_MODEL")
+        or os.environ.get("OPENAI_MODEL")
+        or getattr(llm_config, "model", "")
+    )
+    timeout = int(os.environ.get("MEMORY_GATEWAY_LLM_TIMEOUT") or getattr(llm_config, "timeout", 60))
+    max_input_chars = int(os.environ.get("MEMORY_GATEWAY_LLM_MAX_INPUT_CHARS") or getattr(llm_config, "max_input_chars", 24000))
+    return {
+        "base_url": base_url,
+        "api_key": api_key,
+        "model": model,
+        "timeout": timeout,
+        "max_input_chars": max_input_chars,
+    }
+
+
+def _extract_json(text: str) -> dict[str, Any]:
+    text = text.strip()
+    if text.startswith("```"):
+        text = re.sub(r"^```(?:json)?\s*", "", text)
+        text = re.sub(r"\s*```$", "", text)
+    try:
+        return json.loads(text)
+    except json.JSONDecodeError:
+        match = re.search(r"\{.*\}", text, flags=re.S)
+        if not match:
+            raise LLMSummaryError("LLM did not return JSON") from None
+        return json.loads(match.group(0))
+
+
+def _coerce_string_list(value: Any, limit: int = 12) -> list[str]:
+    if not isinstance(value, list):
+        return []
+    items: list[str] = []
+    for item in value:
+        if item is None:
+            continue
+        text = str(item).strip()
+        if text and text not in items:
+            items.append(text[:300])
+        if len(items) >= limit:
+            break
+    return items
+
+
+async def summarize_with_llm(
+    content: str,
+    *,
+    title: str | None = None,
+    summary_hint: str | None = None,
+    tags: list[str] | None = None,
+    max_summary_chars: int = 800,
+    purpose: str = "generic knowledge memory",
+) -> dict[str, Any]:
+    """Summarize content using an OpenAI-compatible chat completions API."""
+    settings = _llm_settings()
+    if not settings["model"]:
+        raise LLMConfigurationError("LLM model is not configured. Set MEMORY_GATEWAY_LLM_MODEL or llm.model.")
+    if not settings["api_key"] and not settings["base_url"].startswith(("http://127.0.0.1", "http://localhost")):
+        raise LLMConfigurationError("LLM API key is not configured. Set MEMORY_GATEWAY_LLM_API_KEY or OPENAI_API_KEY.")
+
+    trimmed = content[: settings["max_input_chars"]]
+    tag_text = ", ".join(tags or [])
+    system_prompt = (
+        "You are a precise knowledge curator. Summarize input into reusable memory. "
+        "Return only valid JSON with these keys: title, summary, key_points, tags. "
+        "summary must be concise but specific; key_points must be reusable, evidence-based bullets. "
+        "Do not invent facts not present in the input. Preserve important identifiers, paths, URLs, IPs, IDs, and verdicts."
+    )
+    user_prompt = f"""
+Purpose: {purpose}
+Provided title: {title or ''}
+Provided summary hint: {summary_hint or ''}
+Provided tags: {tag_text}
+Max summary characters: {max_summary_chars}
+
+Content:
+{trimmed}
+""".strip()
+
+    headers = {"Content-Type": "application/json"}
+    if settings["api_key"]:
+        headers["Authorization"] = f"Bearer {settings['api_key']}"
+
+    payload = {
+        "model": settings["model"],
+        "messages": [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt},
+        ],
+        "temperature": 0.2,
+        "response_format": {"type": "json_object"},
+    }
+    async with httpx.AsyncClient(timeout=settings["timeout"]) as client:
+        response = await client.post(f"{settings['base_url']}/chat/completions", headers=headers, json=payload)
+        response.raise_for_status()
+        data = response.json()
+
+    try:
+        content_text = data["choices"][0]["message"]["content"]
+    except (KeyError, IndexError, TypeError) as exc:
+        raise LLMSummaryError(f"Unexpected LLM response shape: {data}") from exc
+
+    parsed = _extract_json(content_text)
+    merged_tags = []
+    for tag in [*(tags or []), *_coerce_string_list(parsed.get("tags"), limit=8)]:
+        tag = str(tag).strip()
+        if tag and tag not in merged_tags:
+            merged_tags.append(tag)
+
+    summary = str(parsed.get("summary") or "").strip()
+    return {
+        "title": str(parsed.get("title") or title or "Untitled summary").strip()[:160],
+        "summary": summary[:max(120, max_summary_chars)],
+        "key_points": _coerce_string_list(parsed.get("key_points"), limit=10),
+        "tags": merged_tags,
+        "llm": {
+            "provider": "openai-compatible",
+            "base_url": settings["base_url"],
+            "model": settings["model"],
+        },
+    }