Add Hermes memory evaluation framework with LoCoMo dataset support

- Implement HermesClient for interacting with the Hermes CLI.
- Create judge module for grading QA outputs from Hermes memory.
- Develop LoCoMo dataset parsing and formatting utilities.
- Introduce run_eval script to facilitate memory evaluation using LoCoMo-style datasets.
This commit is contained in:
2026-05-27 17:06:26 +08:00
parent ba59133d80
commit c173fa45a7
11 changed files with 68338 additions and 0 deletions

View File

@ -0,0 +1,25 @@
hermes:
command: "hermes"
timeout_seconds: 600
quiet: true
source: "memory-eval"
extra_args: []
memory:
env_file: "/home/tom/.hermes/memory_system.env"
endpoint: "http://127.0.0.1:1934"
api_key: ""
user_prefix: "locomo-"
search_use_llm: false
commit_every_turns: 1
commit_interval_seconds: 0
qa:
prompt_template: "请先使用 memory_system_search 查询长期记忆,再根据检索到的记忆回答问题。如果记忆中没有答案,请直接说不知道,不要编造。\n\n问题{question}"
judge:
base_url: "https://api.openai.com/v1"
api_key_env: "OPENAI_API_KEY"
model: "gpt-4o-mini"
parallel: 4
timeout_seconds: 120