159 lines
5.3 KiB
Python
159 lines
5.3 KiB
Python
"""LLM helpers for Memory Gateway summaries."""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
import re
|
|
from typing import Any
|
|
|
|
import httpx
|
|
|
|
from .config import get_config
|
|
|
|
|
|
class LLMConfigurationError(RuntimeError):
|
|
"""Raised when LLM summarization is requested but not configured."""
|
|
|
|
|
|
class LLMSummaryError(RuntimeError):
|
|
"""Raised when the LLM response cannot be used."""
|
|
|
|
|
|
def _llm_settings() -> dict[str, Any]:
|
|
config = get_config()
|
|
llm_config = getattr(config, "llm", None)
|
|
|
|
base_url = (
|
|
os.environ.get("MEMORY_GATEWAY_LLM_BASE_URL")
|
|
or os.environ.get("OPENAI_BASE_URL")
|
|
or getattr(llm_config, "base_url", "")
|
|
or "https://api.openai.com/v1"
|
|
).rstrip("/")
|
|
api_key = (
|
|
os.environ.get("MEMORY_GATEWAY_LLM_API_KEY")
|
|
or os.environ.get("OPENAI_API_KEY")
|
|
or getattr(llm_config, "api_key", "")
|
|
)
|
|
model = (
|
|
os.environ.get("MEMORY_GATEWAY_LLM_MODEL")
|
|
or os.environ.get("OPENAI_MODEL")
|
|
or getattr(llm_config, "model", "")
|
|
)
|
|
timeout = int(os.environ.get("MEMORY_GATEWAY_LLM_TIMEOUT") or getattr(llm_config, "timeout", 60))
|
|
max_input_chars = int(os.environ.get("MEMORY_GATEWAY_LLM_MAX_INPUT_CHARS") or getattr(llm_config, "max_input_chars", 24000))
|
|
return {
|
|
"base_url": base_url,
|
|
"api_key": api_key,
|
|
"model": model,
|
|
"timeout": timeout,
|
|
"max_input_chars": max_input_chars,
|
|
}
|
|
|
|
|
|
def _extract_json(text: str) -> dict[str, Any]:
|
|
text = text.strip()
|
|
if text.startswith("```"):
|
|
text = re.sub(r"^```(?:json)?\s*", "", text)
|
|
text = re.sub(r"\s*```$", "", text)
|
|
try:
|
|
return json.loads(text)
|
|
except json.JSONDecodeError:
|
|
match = re.search(r"\{.*\}", text, flags=re.S)
|
|
if not match:
|
|
raise LLMSummaryError("LLM did not return JSON") from None
|
|
return json.loads(match.group(0))
|
|
|
|
|
|
def _coerce_string_list(value: Any, limit: int = 12) -> list[str]:
|
|
if not isinstance(value, list):
|
|
return []
|
|
items: list[str] = []
|
|
for item in value:
|
|
if item is None:
|
|
continue
|
|
text = str(item).strip()
|
|
if text and text not in items:
|
|
items.append(text[:300])
|
|
if len(items) >= limit:
|
|
break
|
|
return items
|
|
|
|
|
|
async def summarize_with_llm(
|
|
content: str,
|
|
*,
|
|
title: str | None = None,
|
|
summary_hint: str | None = None,
|
|
tags: list[str] | None = None,
|
|
max_summary_chars: int = 800,
|
|
purpose: str = "generic knowledge memory",
|
|
) -> dict[str, Any]:
|
|
"""Summarize content using an OpenAI-compatible chat completions API."""
|
|
settings = _llm_settings()
|
|
if not settings["model"]:
|
|
raise LLMConfigurationError("LLM model is not configured. Set MEMORY_GATEWAY_LLM_MODEL or llm.model.")
|
|
if not settings["api_key"] and not settings["base_url"].startswith(("http://127.0.0.1", "http://localhost")):
|
|
raise LLMConfigurationError("LLM API key is not configured. Set MEMORY_GATEWAY_LLM_API_KEY or OPENAI_API_KEY.")
|
|
|
|
trimmed = content[: settings["max_input_chars"]]
|
|
tag_text = ", ".join(tags or [])
|
|
system_prompt = (
|
|
"You are a precise knowledge curator. Summarize input into reusable memory. "
|
|
"Return only valid JSON with these keys: title, summary, key_points, tags. "
|
|
"summary must be concise but specific; key_points must be reusable, evidence-based bullets. "
|
|
"Do not invent facts not present in the input. Preserve important identifiers, paths, URLs, IPs, IDs, and verdicts."
|
|
)
|
|
user_prompt = f"""
|
|
Purpose: {purpose}
|
|
Provided title: {title or ''}
|
|
Provided summary hint: {summary_hint or ''}
|
|
Provided tags: {tag_text}
|
|
Max summary characters: {max_summary_chars}
|
|
|
|
Content:
|
|
{trimmed}
|
|
""".strip()
|
|
|
|
headers = {"Content-Type": "application/json"}
|
|
if settings["api_key"]:
|
|
headers["Authorization"] = f"Bearer {settings['api_key']}"
|
|
|
|
payload = {
|
|
"model": settings["model"],
|
|
"messages": [
|
|
{"role": "system", "content": system_prompt},
|
|
{"role": "user", "content": user_prompt},
|
|
],
|
|
"temperature": 0.2,
|
|
"response_format": {"type": "json_object"},
|
|
}
|
|
async with httpx.AsyncClient(timeout=settings["timeout"]) as client:
|
|
response = await client.post(f"{settings['base_url']}/chat/completions", headers=headers, json=payload)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
try:
|
|
content_text = data["choices"][0]["message"]["content"]
|
|
except (KeyError, IndexError, TypeError) as exc:
|
|
raise LLMSummaryError(f"Unexpected LLM response shape: {data}") from exc
|
|
|
|
parsed = _extract_json(content_text)
|
|
merged_tags = []
|
|
for tag in [*(tags or []), *_coerce_string_list(parsed.get("tags"), limit=8)]:
|
|
tag = str(tag).strip()
|
|
if tag and tag not in merged_tags:
|
|
merged_tags.append(tag)
|
|
|
|
summary = str(parsed.get("summary") or "").strip()
|
|
return {
|
|
"title": str(parsed.get("title") or title or "Untitled summary").strip()[:160],
|
|
"summary": summary[:max(120, max_summary_chars)],
|
|
"key_points": _coerce_string_list(parsed.get("key_points"), limit=10),
|
|
"tags": merged_tags,
|
|
"llm": {
|
|
"provider": "openai-compatible",
|
|
"base_url": settings["base_url"],
|
|
"model": settings["model"],
|
|
},
|
|
}
|