Add Hermes memory evaluation framework with LoCoMo dataset support

- Implement HermesClient for interacting with the Hermes CLI. - Create judge module for grading QA outputs from Hermes memory. - Develop LoCoMo dataset parsing and formatting utilities. - Introduce run_eval script to facilitate memory evaluation using LoCoMo-style datasets.
2026-05-27 17:06:26 +08:00
parent ba59133d80
commit c173fa45a7
11 changed files with 68338 additions and 0 deletions
--- a/eval/hermes_memory_eval/config.example.yaml
+++ b/eval/hermes_memory_eval/config.example.yaml
@ -0,0 +1,25 @@
+hermes:
+  command: "hermes"
+  timeout_seconds: 600
+  quiet: true
+  source: "memory-eval"
+  extra_args: []
+
+memory:
+  env_file: "/home/tom/.hermes/memory_system.env"
+  endpoint: "http://127.0.0.1:1934"
+  api_key: ""
+  user_prefix: "locomo-"
+  search_use_llm: false
+  commit_every_turns: 1
+  commit_interval_seconds: 0
+
+qa:
+  prompt_template: "请先使用 memory_system_search 查询长期记忆，再根据检索到的记忆回答问题。如果记忆中没有答案，请直接说不知道，不要编造。\n\n问题：{question}"
+
+judge:
+  base_url: "https://api.openai.com/v1"
+  api_key_env: "OPENAI_API_KEY"
+  model: "gpt-4o-mini"
+  parallel: 4
+  timeout_seconds: 120