chore: initialize EverOS 1.0.0
md-first memory extraction framework for AI agents. Markdown is the single source of truth; SQLite holds state and LanceDB provides the rebuildable vector + BM25 + scalar index. The codebase follows a single-direction DDD layering (entrypoints -> service -> memory -> infra, with component / core / config cross-cutting) enforced by import-linter. Engineering surface: - Coding conventions in .claude/rules/ (path-scoped) and workflows in .claude/skills/ (/commit, /new-branch, /pr). - GitHub Actions CI runs make lint + test + integration; pre-commit mirrors the gates locally (ruff, hygiene hooks, gitlint commit-msg). - Commit messages follow Conventional Commits, enforced by gitlint. - make lint also enforces datetime two-zone discipline and OpenAPI drift.
This commit is contained in:
@ -0,0 +1,71 @@
|
||||
/**
|
||||
* Simple substring search for mock memories
|
||||
* To be replaced with semantic retrieval in production
|
||||
*/
|
||||
|
||||
const MAX_CANDIDATES = 15;
|
||||
|
||||
/**
|
||||
* @typedef {Object} Memory
|
||||
* @property {string} text - The memory content
|
||||
* @property {string} timestamp - ISO timestamp when memory was created
|
||||
*/
|
||||
|
||||
/**
|
||||
* Search memories for matches to query terms
|
||||
* @param {string} query - User's prompt
|
||||
* @param {Memory[]} memories - Array of memory objects
|
||||
* @returns {Memory[]} Matching memories (max 15)
|
||||
*/
|
||||
export function searchMemories(query, memories) {
|
||||
if (!query || !memories || memories.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
// Split query into terms, filter out very short terms
|
||||
const queryTerms = query
|
||||
.toLowerCase()
|
||||
.split(/\s+/)
|
||||
.filter(term => term.length > 2);
|
||||
|
||||
if (queryTerms.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
// Find memories that match any query term
|
||||
const matches = memories.filter(memory => {
|
||||
const memoryLower = memory.text.toLowerCase();
|
||||
return queryTerms.some(term => memoryLower.includes(term));
|
||||
});
|
||||
|
||||
// Return up to MAX_CANDIDATES
|
||||
return matches.slice(0, MAX_CANDIDATES);
|
||||
}
|
||||
|
||||
/**
|
||||
* Count words/tokens in a string (multilingual support)
|
||||
* - For CJK (Chinese/Japanese/Korean): counts each character as a token
|
||||
* - For other languages: counts space-separated words
|
||||
* - For mixed text: counts both
|
||||
* @param {string} text - Input text
|
||||
* @returns {number} Word/token count
|
||||
*/
|
||||
export function countWords(text) {
|
||||
if (!text) return 0;
|
||||
const trimmed = text.trim();
|
||||
if (!trimmed) return 0;
|
||||
|
||||
// Regex for CJK characters (Chinese, Japanese Kanji, Korean Hanja)
|
||||
// Also includes Japanese Hiragana/Katakana and Korean Hangul
|
||||
const cjkRegex = /[\u4E00-\u9FFF\u3400-\u4DBF\u3040-\u309F\u30A0-\u30FF\uAC00-\uD7AF]/g;
|
||||
|
||||
// Count CJK characters
|
||||
const cjkMatches = trimmed.match(cjkRegex);
|
||||
const cjkCount = cjkMatches ? cjkMatches.length : 0;
|
||||
|
||||
// Remove CJK characters and count remaining space-separated words
|
||||
const nonCjkText = trimmed.replace(cjkRegex, ' ').trim();
|
||||
const wordCount = nonCjkText ? nonCjkText.split(/\s+/).filter(w => w.length > 0).length : 0;
|
||||
|
||||
return cjkCount + wordCount;
|
||||
}
|
||||
Reference in New Issue
Block a user