- Implement HermesClient for interacting with the Hermes CLI. - Create judge module for grading QA outputs from Hermes memory. - Develop LoCoMo dataset parsing and formatting utilities. - Introduce run_eval script to facilitate memory evaluation using LoCoMo-style datasets.
52 lines
1.6 KiB
Python
52 lines
1.6 KiB
Python
"""Hermes CLI client used by the memory evaluation runner."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import subprocess
|
|
from dataclasses import dataclass, field
|
|
from typing import Mapping
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class HermesClientConfig:
|
|
command: str = "hermes"
|
|
timeout_seconds: int = 600
|
|
quiet: bool = True
|
|
source: str = "memory-eval"
|
|
extra_args: list[str] = field(default_factory=list)
|
|
|
|
|
|
class HermesClient:
|
|
def __init__(self, config: HermesClientConfig):
|
|
self._config = config
|
|
|
|
def chat(self, message: str, *, user_id: str, env: Mapping[str, str] | None = None) -> str:
|
|
command = [self._config.command, "chat"]
|
|
if self._config.quiet:
|
|
command.append("-Q")
|
|
if self._config.source:
|
|
command.extend(["--source", self._config.source])
|
|
command.extend(self._config.extra_args)
|
|
command.extend(["-q", message])
|
|
|
|
process_env = os.environ.copy()
|
|
process_env["MEMORY_SYSTEM_USER_ID"] = user_id
|
|
if env:
|
|
process_env.update({key: str(value) for key, value in env.items() if value is not None})
|
|
|
|
result = subprocess.run(
|
|
command,
|
|
capture_output=True,
|
|
check=False,
|
|
env=process_env,
|
|
text=True,
|
|
timeout=self._config.timeout_seconds,
|
|
)
|
|
if result.returncode != 0:
|
|
stderr = result.stderr.strip()
|
|
stdout = result.stdout.strip()
|
|
detail = stderr or stdout or f"exit code {result.returncode}"
|
|
raise RuntimeError(f"Hermes command failed: {detail}")
|
|
return result.stdout.strip()
|