Add Hermes memory evaluation framework with LoCoMo dataset support
- Implement HermesClient for interacting with the Hermes CLI. - Create judge module for grading QA outputs from Hermes memory. - Develop LoCoMo dataset parsing and formatting utilities. - Introduce run_eval script to facilitate memory evaluation using LoCoMo-style datasets.
This commit is contained in:
25
eval/hermes_memory_eval/config.example.yaml
Normal file
25
eval/hermes_memory_eval/config.example.yaml
Normal file
@ -0,0 +1,25 @@
|
||||
hermes:
|
||||
command: "hermes"
|
||||
timeout_seconds: 600
|
||||
quiet: true
|
||||
source: "memory-eval"
|
||||
extra_args: []
|
||||
|
||||
memory:
|
||||
env_file: "/home/tom/.hermes/memory_system.env"
|
||||
endpoint: "http://127.0.0.1:1934"
|
||||
api_key: ""
|
||||
user_prefix: "locomo-"
|
||||
search_use_llm: false
|
||||
commit_every_turns: 1
|
||||
commit_interval_seconds: 0
|
||||
|
||||
qa:
|
||||
prompt_template: "请先使用 memory_system_search 查询长期记忆,再根据检索到的记忆回答问题。如果记忆中没有答案,请直接说不知道,不要编造。\n\n问题:{question}"
|
||||
|
||||
judge:
|
||||
base_url: "https://api.openai.com/v1"
|
||||
api_key_env: "OPENAI_API_KEY"
|
||||
model: "gpt-4o-mini"
|
||||
parallel: 4
|
||||
timeout_seconds: 120
|
||||
Reference in New Issue
Block a user