Files
memory-gateway/eval/hermes_memory_eval/hermes_client.py
tomtan c173fa45a7 Add Hermes memory evaluation framework with LoCoMo dataset support
- Implement HermesClient for interacting with the Hermes CLI.
- Create judge module for grading QA outputs from Hermes memory.
- Develop LoCoMo dataset parsing and formatting utilities.
- Introduce run_eval script to facilitate memory evaluation using LoCoMo-style datasets.
2026-05-27 17:06:26 +08:00

52 lines
1.6 KiB
Python

"""Hermes CLI client used by the memory evaluation runner."""
from __future__ import annotations
import os
import subprocess
from dataclasses import dataclass, field
from typing import Mapping
@dataclass(frozen=True)
class HermesClientConfig:
command: str = "hermes"
timeout_seconds: int = 600
quiet: bool = True
source: str = "memory-eval"
extra_args: list[str] = field(default_factory=list)
class HermesClient:
def __init__(self, config: HermesClientConfig):
self._config = config
def chat(self, message: str, *, user_id: str, env: Mapping[str, str] | None = None) -> str:
command = [self._config.command, "chat"]
if self._config.quiet:
command.append("-Q")
if self._config.source:
command.extend(["--source", self._config.source])
command.extend(self._config.extra_args)
command.extend(["-q", message])
process_env = os.environ.copy()
process_env["MEMORY_SYSTEM_USER_ID"] = user_id
if env:
process_env.update({key: str(value) for key, value in env.items() if value is not None})
result = subprocess.run(
command,
capture_output=True,
check=False,
env=process_env,
text=True,
timeout=self._config.timeout_seconds,
)
if result.returncode != 0:
stderr = result.stderr.strip()
stdout = result.stdout.strip()
detail = stderr or stdout or f"exit code {result.returncode}"
raise RuntimeError(f"Hermes command failed: {detail}")
return result.stdout.strip()