chore: initialize EverOS 1.0.0

md-first memory extraction framework for AI agents.

Markdown is the single source of truth; SQLite holds state and LanceDB
provides the rebuildable vector + BM25 + scalar index. The codebase follows
a single-direction DDD layering (entrypoints -> service -> memory -> infra,
with component / core / config cross-cutting) enforced by import-linter.

Engineering surface:
- Coding conventions in .claude/rules/ (path-scoped) and workflows in
  .claude/skills/ (/commit, /new-branch, /pr).
- GitHub Actions CI runs make lint + test + integration; pre-commit mirrors
  the gates locally (ruff, hygiene hooks, gitlint commit-msg).
- Commit messages follow Conventional Commits, enforced by gitlint.
- make lint also enforces datetime two-zone discipline and OpenAPI drift.
This commit is contained in:
Elliot Chen
2026-06-05 22:35:51 +08:00
commit 518b8eca85
636 changed files with 160553 additions and 0 deletions

View File

@ -0,0 +1,368 @@
"""Markdown entries — id format, marker spans, and audit-form parsing.
Three closely-related entry concepts live together here so a reader
sees the whole entry surface in one file:
1. :class:`EntryId` — the ``<prefix>_<YYYYMMDD>_<NNNN>`` structured id
stamped into each daily-log entry's open / close markers. Carries
the prefix declared by the frontmatter schema, the date bucket, and
the in-file zero-padded sequence.
2. :class:`Entry` — a marker-delimited span inside a markdown body::
<!-- entry:abc123 -->
...content...
<!-- /entry:abc123 -->
:func:`split_entries` and :func:`find_entry` locate these spans
without interpreting the inner content. Higher layers (writers,
cascade) parse it per record type.
3. :class:`StructuredEntry` — :class:`Entry` extended with the parsed
audit-form body fields (header / inline / sections). Built either
from a raw body string via :func:`parse_structured_entry` or from
an existing :class:`Entry` via :meth:`Entry.as_structured`.
Audit-form layout::
## <header> ← optional H2 (usually entry id, for grep)
**key**: value ← inline fields, one per line
**key2**: value2
### Section Title ← section fields: H3 + free-form text
body content...
### Another Section
more content...
The audit chassis is intentionally **type-agnostic** — every field
round-trips as a string. Inline values are stringified on render
(lists become ``[a, b, c]``, scalars use ``str()``); on parse
everything is the raw text after the colon. Section titles are kept
verbatim. This keeps parsing tolerant of stray fields, wrapped
strings, and manually-typed timestamps; the strong-typed model lives
in business writers + the SQLite/LanceDB indexes.
Cross-user uniqueness is handled at the database layer via a composite
``<user_id>_<entry_id>`` field; it is *not* encoded into the
:class:`EntryId` string itself.
"""
from __future__ import annotations
import datetime as _dt
import re
from collections.abc import Mapping
from dataclasses import dataclass, field
from typing import Self
# ── EntryId — structured id for marker stamping ─────────────────────────
_DATE_FMT = "%Y%m%d"
_SEQ_DIGITS = 8
"""Minimum zero-padding for the in-file seq.
8 digits keeps lexicographic order == numeric order up to 10**8
entries per file (per user, per day). ``format()`` is "at least 8"
larger seqs emit more digits without truncation. ``parse`` is
permissive: shorter (legacy 4-digit) and longer seq strings both
parse cleanly; format normalises to >= 8 digits on round-trip.
"""
@dataclass(frozen=True, slots=True)
class EntryId:
"""Parsed components of an entry id (``<prefix>_<YYYYMMDD>_<NNNN>``)."""
prefix: str
date: _dt.date
seq: int
def format(self) -> str:
"""Render as ``<prefix>_<YYYYMMDD>_<NNNN>``."""
return (
f"{self.prefix}_{self.date.strftime(_DATE_FMT)}_{self.seq:0{_SEQ_DIGITS}d}"
)
def __str__(self) -> str: # noqa: D401
return self.format()
@classmethod
def parse(cls, s: str) -> Self:
"""Parse ``<prefix>_<YYYYMMDD>_<NNNN>``.
Uses ``rsplit("_", 2)`` so a multi-segment prefix (rare, but
possible) is preserved as-is.
"""
parts = s.rsplit("_", 2)
if len(parts) != 3:
raise ValueError(f"invalid entry id format: {s!r}")
prefix, date_str, seq_str = parts
if not prefix:
raise ValueError(f"empty prefix in entry id: {s!r}")
try:
d = _dt.datetime.strptime(date_str, _DATE_FMT).date()
except ValueError as exc:
raise ValueError(f"invalid date in entry id: {s!r}") from exc
try:
seq = int(seq_str)
except ValueError as exc:
raise ValueError(f"invalid seq in entry id: {s!r}") from exc
if seq < 0:
raise ValueError(f"negative seq in entry id: {s!r}")
return cls(prefix=prefix, date=d, seq=seq)
@classmethod
def next_for(cls, prefix: str, date: _dt.date, current_count: int) -> Self:
"""Build the id for the next entry given the file's current count.
``current_count`` is the value of ``frontmatter.entry_count``
*before* this append. The new id gets ``seq = current_count + 1``.
"""
if current_count < 0:
raise ValueError(f"current_count must be >= 0, got {current_count}")
return cls(prefix=prefix, date=date, seq=current_count + 1)
# ── Entry — marker-delimited span inside a body ─────────────────────────
# Filename / URL-safe id alphabet for the marker.
_ID_PATTERN = r"[A-Za-z0-9_-]+"
_OPEN_RE = re.compile(rf"<!-- entry:({_ID_PATTERN}) -->")
@dataclass(frozen=True)
class Entry:
"""One marker-delimited entry within a markdown body.
Attributes:
id: Value between ``entry:`` and ``-->`` in the open marker.
body: Content between the open and close markers, with one leading
and one trailing newline removed (typical formatter output).
start: Offset of the opening ``<!-- entry:id -->`` in the source body.
end: Offset just past the closing ``<!-- /entry:id -->`` in the source.
"""
id: str
body: str
start: int
end: int
def as_structured(self) -> StructuredEntry:
"""Parse my body as audit-form and return a :class:`StructuredEntry`.
The id / body / start / end fields are preserved; the parsed
``header`` / ``inline`` / ``sections`` are added on top.
"""
return parse_structured_entry(self.body, _origin=self)
def split_entries(body: str) -> list[Entry]:
"""Scan ``body`` and return every entry in order.
Unmatched / unterminated open markers stop the scan at the first
such marker — partial entries are not returned. Callers needing
strict validation should layer a dedicated check on top.
"""
entries: list[Entry] = []
pos = 0
while True:
open_match = _OPEN_RE.search(body, pos)
if open_match is None:
break
entry_id = open_match.group(1)
close_match = _close_re_for(entry_id).search(body, open_match.end())
if close_match is None:
# Unterminated entry — abort further scanning.
break
entries.append(
Entry(
id=entry_id,
body=_strip_one_newline(body[open_match.end() : close_match.start()]),
start=open_match.start(),
end=close_match.end(),
)
)
pos = close_match.end()
return entries
def find_entry(body: str, entry_id: str) -> Entry | None:
"""Find the first entry with ``entry_id``, or ``None``."""
open_re = re.compile(rf"<!-- entry:{re.escape(entry_id)} -->")
open_match = open_re.search(body)
if open_match is None:
return None
close_match = _close_re_for(entry_id).search(body, open_match.end())
if close_match is None:
return None
return Entry(
id=entry_id,
body=_strip_one_newline(body[open_match.end() : close_match.start()]),
start=open_match.start(),
end=close_match.end(),
)
def _close_re_for(entry_id: str) -> re.Pattern[str]:
"""Build the close-marker regex for a specific id."""
return re.compile(rf"<!-- /entry:{re.escape(entry_id)} -->")
def _strip_one_newline(text: str) -> str:
"""Strip one leading and one trailing newline (typical formatter padding)."""
if text.startswith("\r\n"):
text = text[2:]
elif text.startswith("\n"):
text = text[1:]
if text.endswith("\r\n"):
text = text[:-2]
elif text.endswith("\n"):
text = text[:-1]
return text
# ── StructuredEntry — Entry + parsed audit-form fields ──────────────────
# H2 line: ``## <header>``.
_H2_RE = re.compile(r"^##\s+(.+?)\s*$", re.MULTILINE)
# Inline field: ``**key**: value``. Anchored to line start so a stray
# ``**emphasis**`` mid-paragraph isn't mistaken for a field.
_INLINE_RE = re.compile(
r"^\*\*(?P<key>[^*\n]+?)\*\*:\s*(?P<value>.*?)\s*$",
re.MULTILINE,
)
# H3 line: ``### Title``.
_H3_RE = re.compile(r"^###\s+(.+?)\s*$", re.MULTILINE)
@dataclass(frozen=True)
class StructuredEntry(Entry):
""":class:`Entry` whose body has been parsed as audit-form data.
Inherits ``id`` / ``body`` / ``start`` / ``end`` from :class:`Entry`
(zeroed when built from a raw body string with no marker context)
and adds three parsed views of the body: the optional H2 header,
the inline ``**key**: value`` map, and the ``### Title`` sections.
Audit-form values are strings only; type coercion is the caller's
job (a strong-typed model lives in the writer / index).
"""
header: str | None = None
inline: dict[str, str] = field(default_factory=dict)
sections: dict[str, str] = field(default_factory=dict)
def render_structured_entry(
*,
header: str | None = None,
inline: Mapping[str, object] | None = None,
sections: Mapping[str, str] | None = None,
) -> str:
"""Render an audit-form entry body.
Args:
header: Optional H2 line at the top (typically the entry id —
redundant with the marker but useful for plain-text grep).
inline: ``{key: value}`` rendered as ``**key**: value``. Values
are stringified: ``list``/``tuple`` become ``[a, b, c]``;
``None`` becomes the empty string; everything else uses
``str()``.
sections: ``{title: body}`` rendered as ``### Title`` plus the
body text. Title is verbatim; body's trailing whitespace is
stripped.
Returns:
The rendered string, no trailing newline (the caller — typically
:meth:`MarkdownWriter.append_entry` — handles markers + newlines).
"""
inline = inline or {}
sections = sections or {}
lines: list[str] = []
if header:
lines.append(f"## {header}")
lines.append("")
for key, value in inline.items():
lines.append(f"**{key}**: {_render_value(value)}")
for title, body in sections.items():
lines.append("")
lines.append(f"### {title}")
lines.append(body.rstrip())
return "\n".join(lines)
def parse_structured_entry(
body: str, *, _origin: Entry | None = None
) -> StructuredEntry:
"""Parse an audit-form entry body. Strings only — no type coercion.
Tolerant of:
- missing H2 (``header`` will be ``None``)
- inline fields appearing before, between or after sections
(only matches before the first H3 are taken as the inline block)
- extra whitespace and stray lines (silently kept inside the
enclosing section's body)
When called via :meth:`Entry.as_structured`, the ``_origin`` Entry
contributes its ``id`` / ``start`` / ``end``; otherwise those fall
back to ``""`` / ``0`` / ``len(body)``.
Returns:
:class:`StructuredEntry` with everything as strings.
"""
text = body.strip("\n")
# Split on H3 lines.
parts = _H3_RE.split(text)
head = parts[0]
sections_dict: dict[str, str] = {}
for i in range(1, len(parts), 2):
title = parts[i].strip()
content = parts[i + 1] if i + 1 < len(parts) else ""
sections_dict[title] = content.strip("\n").rstrip()
header: str | None = None
h2 = _H2_RE.search(head)
if h2:
header = h2.group(1).strip()
inline_dict: dict[str, str] = {
m.group("key").strip(): m.group("value").strip()
for m in _INLINE_RE.finditer(head)
}
if _origin is not None:
return StructuredEntry(
id=_origin.id,
body=_origin.body,
start=_origin.start,
end=_origin.end,
header=header,
inline=inline_dict,
sections=sections_dict,
)
return StructuredEntry(
id="",
body=body,
start=0,
end=len(body),
header=header,
inline=inline_dict,
sections=sections_dict,
)
def _render_value(value: object) -> str:
"""Stringify an inline value the audit-friendly way."""
if value is None:
return ""
if isinstance(value, list | tuple):
return "[" + ", ".join(str(item) for item in value) + "]"
return str(value)