memory-gateway/memory_gateway/llm.py

"""LLM helpers for Memory Gateway summaries."""
from __future__ import annotations

import json
import os
import re
from typing import Any

import httpx

from .config import get_config


class LLMConfigurationError(RuntimeError):
    """Raised when LLM summarization is requested but not configured."""


class LLMSummaryError(RuntimeError):
    """Raised when the LLM response cannot be used."""


def _llm_settings() -> dict[str, Any]:
    config = get_config()
    llm_config = getattr(config, "llm", None)

    base_url = (
        os.environ.get("MEMORY_GATEWAY_LLM_BASE_URL")
        or os.environ.get("OPENAI_BASE_URL")
        or getattr(llm_config, "base_url", "")
        or "https://api.openai.com/v1"
    ).rstrip("/")
    api_key = (
        os.environ.get("MEMORY_GATEWAY_LLM_API_KEY")
        or os.environ.get("OPENAI_API_KEY")
        or getattr(llm_config, "api_key", "")
    )
    model = (
        os.environ.get("MEMORY_GATEWAY_LLM_MODEL")
        or os.environ.get("OPENAI_MODEL")
        or getattr(llm_config, "model", "")
    )
    timeout = int(os.environ.get("MEMORY_GATEWAY_LLM_TIMEOUT") or getattr(llm_config, "timeout", 60))
    max_input_chars = int(os.environ.get("MEMORY_GATEWAY_LLM_MAX_INPUT_CHARS") or getattr(llm_config, "max_input_chars", 24000))
    return {
        "base_url": base_url,
        "api_key": api_key,
        "model": model,
        "timeout": timeout,
        "max_input_chars": max_input_chars,
    }


def _extract_json(text: str) -> dict[str, Any]:
    text = text.strip()
    if text.startswith("```"):
        text = re.sub(r"^```(?:json)?\s*", "", text)
        text = re.sub(r"\s*```$", "", text)
    try:
        return json.loads(text)
    except json.JSONDecodeError:
        match = re.search(r"\{.*\}", text, flags=re.S)
        if not match:
            raise LLMSummaryError("LLM did not return JSON") from None
        return json.loads(match.group(0))


def _coerce_string_list(value: Any, limit: int = 12) -> list[str]:
    if not isinstance(value, list):
        return []
    items: list[str] = []
    for item in value:
        if item is None:
            continue
        text = str(item).strip()
        if text and text not in items:
            items.append(text[:300])
        if len(items) >= limit:
            break
    return items


async def summarize_with_llm(
    content: str,
    *,
    title: str | None = None,
    summary_hint: str | None = None,
    tags: list[str] | None = None,
    max_summary_chars: int = 800,
    purpose: str = "generic knowledge memory",
) -> dict[str, Any]:
    """Summarize content using an OpenAI-compatible chat completions API."""
    settings = _llm_settings()
    if not settings["model"]:
        raise LLMConfigurationError("LLM model is not configured. Set MEMORY_GATEWAY_LLM_MODEL or llm.model.")
    if not settings["api_key"] and not settings["base_url"].startswith(("http://127.0.0.1", "http://localhost")):
        raise LLMConfigurationError("LLM API key is not configured. Set MEMORY_GATEWAY_LLM_API_KEY or OPENAI_API_KEY.")

    trimmed = content[: settings["max_input_chars"]]
    tag_text = ", ".join(tags or [])
    system_prompt = (
        "You are a precise knowledge curator. Summarize input into reusable memory. "
        "Return only valid JSON with these keys: title, summary, key_points, tags. "
        "summary must be concise but specific; key_points must be reusable, evidence-based bullets. "
        "Do not invent facts not present in the input. Preserve important identifiers, paths, URLs, IPs, IDs, and verdicts."
    )
    user_prompt = f"""
Purpose: {purpose}
Provided title: {title or ''}
Provided summary hint: {summary_hint or ''}
Provided tags: {tag_text}
Max summary characters: {max_summary_chars}

Content:
{trimmed}
""".strip()

    headers = {"Content-Type": "application/json"}
    if settings["api_key"]:
        headers["Authorization"] = f"Bearer {settings['api_key']}"

    payload = {
        "model": settings["model"],
        "messages": [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt},
        ],
        "temperature": 0.2,
        "response_format": {"type": "json_object"},
    }
    async with httpx.AsyncClient(timeout=settings["timeout"]) as client:
        response = await client.post(f"{settings['base_url']}/chat/completions", headers=headers, json=payload)
        response.raise_for_status()
        data = response.json()

    try:
        content_text = data["choices"][0]["message"]["content"]
    except (KeyError, IndexError, TypeError) as exc:
        raise LLMSummaryError(f"Unexpected LLM response shape: {data}") from exc

    parsed = _extract_json(content_text)
    merged_tags = []
    for tag in [*(tags or []), *_coerce_string_list(parsed.get("tags"), limit=8)]:
        tag = str(tag).strip()
        if tag and tag not in merged_tags:
            merged_tags.append(tag)

    summary = str(parsed.get("summary") or "").strip()
    return {
        "title": str(parsed.get("title") or title or "Untitled summary").strip()[:160],
        "summary": summary[:max(120, max_summary_chars)],
        "key_points": _coerce_string_list(parsed.get("key_points"), limit=10),
        "tags": merged_tags,
        "llm": {
            "provider": "openai-compatible",
            "base_url": settings["base_url"],
            "model": settings["model"],
        },
    }