update memory gateway
This commit is contained in:
158
memory_gateway/llm.py
Normal file
158
memory_gateway/llm.py
Normal file
@ -0,0 +1,158 @@
|
||||
"""LLM helpers for Memory Gateway summaries."""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
||||
from .config import get_config
|
||||
|
||||
|
||||
class LLMConfigurationError(RuntimeError):
|
||||
"""Raised when LLM summarization is requested but not configured."""
|
||||
|
||||
|
||||
class LLMSummaryError(RuntimeError):
|
||||
"""Raised when the LLM response cannot be used."""
|
||||
|
||||
|
||||
def _llm_settings() -> dict[str, Any]:
|
||||
config = get_config()
|
||||
llm_config = getattr(config, "llm", None)
|
||||
|
||||
base_url = (
|
||||
os.environ.get("MEMORY_GATEWAY_LLM_BASE_URL")
|
||||
or os.environ.get("OPENAI_BASE_URL")
|
||||
or getattr(llm_config, "base_url", "")
|
||||
or "https://api.openai.com/v1"
|
||||
).rstrip("/")
|
||||
api_key = (
|
||||
os.environ.get("MEMORY_GATEWAY_LLM_API_KEY")
|
||||
or os.environ.get("OPENAI_API_KEY")
|
||||
or getattr(llm_config, "api_key", "")
|
||||
)
|
||||
model = (
|
||||
os.environ.get("MEMORY_GATEWAY_LLM_MODEL")
|
||||
or os.environ.get("OPENAI_MODEL")
|
||||
or getattr(llm_config, "model", "")
|
||||
)
|
||||
timeout = int(os.environ.get("MEMORY_GATEWAY_LLM_TIMEOUT") or getattr(llm_config, "timeout", 60))
|
||||
max_input_chars = int(os.environ.get("MEMORY_GATEWAY_LLM_MAX_INPUT_CHARS") or getattr(llm_config, "max_input_chars", 24000))
|
||||
return {
|
||||
"base_url": base_url,
|
||||
"api_key": api_key,
|
||||
"model": model,
|
||||
"timeout": timeout,
|
||||
"max_input_chars": max_input_chars,
|
||||
}
|
||||
|
||||
|
||||
def _extract_json(text: str) -> dict[str, Any]:
|
||||
text = text.strip()
|
||||
if text.startswith("```"):
|
||||
text = re.sub(r"^```(?:json)?\s*", "", text)
|
||||
text = re.sub(r"\s*```$", "", text)
|
||||
try:
|
||||
return json.loads(text)
|
||||
except json.JSONDecodeError:
|
||||
match = re.search(r"\{.*\}", text, flags=re.S)
|
||||
if not match:
|
||||
raise LLMSummaryError("LLM did not return JSON") from None
|
||||
return json.loads(match.group(0))
|
||||
|
||||
|
||||
def _coerce_string_list(value: Any, limit: int = 12) -> list[str]:
|
||||
if not isinstance(value, list):
|
||||
return []
|
||||
items: list[str] = []
|
||||
for item in value:
|
||||
if item is None:
|
||||
continue
|
||||
text = str(item).strip()
|
||||
if text and text not in items:
|
||||
items.append(text[:300])
|
||||
if len(items) >= limit:
|
||||
break
|
||||
return items
|
||||
|
||||
|
||||
async def summarize_with_llm(
|
||||
content: str,
|
||||
*,
|
||||
title: str | None = None,
|
||||
summary_hint: str | None = None,
|
||||
tags: list[str] | None = None,
|
||||
max_summary_chars: int = 800,
|
||||
purpose: str = "generic knowledge memory",
|
||||
) -> dict[str, Any]:
|
||||
"""Summarize content using an OpenAI-compatible chat completions API."""
|
||||
settings = _llm_settings()
|
||||
if not settings["model"]:
|
||||
raise LLMConfigurationError("LLM model is not configured. Set MEMORY_GATEWAY_LLM_MODEL or llm.model.")
|
||||
if not settings["api_key"] and not settings["base_url"].startswith(("http://127.0.0.1", "http://localhost")):
|
||||
raise LLMConfigurationError("LLM API key is not configured. Set MEMORY_GATEWAY_LLM_API_KEY or OPENAI_API_KEY.")
|
||||
|
||||
trimmed = content[: settings["max_input_chars"]]
|
||||
tag_text = ", ".join(tags or [])
|
||||
system_prompt = (
|
||||
"You are a precise knowledge curator. Summarize input into reusable memory. "
|
||||
"Return only valid JSON with these keys: title, summary, key_points, tags. "
|
||||
"summary must be concise but specific; key_points must be reusable, evidence-based bullets. "
|
||||
"Do not invent facts not present in the input. Preserve important identifiers, paths, URLs, IPs, IDs, and verdicts."
|
||||
)
|
||||
user_prompt = f"""
|
||||
Purpose: {purpose}
|
||||
Provided title: {title or ''}
|
||||
Provided summary hint: {summary_hint or ''}
|
||||
Provided tags: {tag_text}
|
||||
Max summary characters: {max_summary_chars}
|
||||
|
||||
Content:
|
||||
{trimmed}
|
||||
""".strip()
|
||||
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if settings["api_key"]:
|
||||
headers["Authorization"] = f"Bearer {settings['api_key']}"
|
||||
|
||||
payload = {
|
||||
"model": settings["model"],
|
||||
"messages": [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_prompt},
|
||||
],
|
||||
"temperature": 0.2,
|
||||
"response_format": {"type": "json_object"},
|
||||
}
|
||||
async with httpx.AsyncClient(timeout=settings["timeout"]) as client:
|
||||
response = await client.post(f"{settings['base_url']}/chat/completions", headers=headers, json=payload)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
try:
|
||||
content_text = data["choices"][0]["message"]["content"]
|
||||
except (KeyError, IndexError, TypeError) as exc:
|
||||
raise LLMSummaryError(f"Unexpected LLM response shape: {data}") from exc
|
||||
|
||||
parsed = _extract_json(content_text)
|
||||
merged_tags = []
|
||||
for tag in [*(tags or []), *_coerce_string_list(parsed.get("tags"), limit=8)]:
|
||||
tag = str(tag).strip()
|
||||
if tag and tag not in merged_tags:
|
||||
merged_tags.append(tag)
|
||||
|
||||
summary = str(parsed.get("summary") or "").strip()
|
||||
return {
|
||||
"title": str(parsed.get("title") or title or "Untitled summary").strip()[:160],
|
||||
"summary": summary[:max(120, max_summary_chars)],
|
||||
"key_points": _coerce_string_list(parsed.get("key_points"), limit=10),
|
||||
"tags": merged_tags,
|
||||
"llm": {
|
||||
"provider": "openai-compatible",
|
||||
"base_url": settings["base_url"],
|
||||
"model": settings["model"],
|
||||
},
|
||||
}
|
||||
Reference in New Issue
Block a user