chore: initialize EverOS 1.0.0

md-first memory extraction framework for AI agents. Markdown is the single source of truth; SQLite holds state and LanceDB provides the rebuildable vector + BM25 + scalar index. The codebase follows a single-direction DDD layering (entrypoints -> service -> memory -> infra, with component / core / config cross-cutting) enforced by import-linter. Engineering surface: - Coding conventions in .claude/rules/ (path-scoped) and workflows in .claude/skills/ (/commit, /new-branch, /pr). - GitHub Actions CI runs make lint + test + integration; pre-commit mirrors the gates locally (ruff, hygiene hooks, gitlint commit-msg). - Commit messages follow Conventional Commits, enforced by gitlint. - make lint also enforces datetime two-zone discipline and OpenAPI drift.
2026-06-05 22:35:51 +08:00
commit 518b8eca85
636 changed files with 160553 additions and 0 deletions
--- a/src/everos/core/persistence/markdown/entries.py
+++ b/src/everos/core/persistence/markdown/entries.py
@ -0,0 +1,368 @@
+"""Markdown entries — id format, marker spans, and audit-form parsing.
+
+Three closely-related entry concepts live together here so a reader
+sees the whole entry surface in one file:
+
+1. :class:`EntryId` — the ``<prefix>_<YYYYMMDD>_<NNNN>`` structured id
+   stamped into each daily-log entry's open / close markers. Carries
+   the prefix declared by the frontmatter schema, the date bucket, and
+   the in-file zero-padded sequence.
+
+2. :class:`Entry` — a marker-delimited span inside a markdown body::
+
+       <!-- entry:abc123 -->
+       ...content...
+       <!-- /entry:abc123 -->
+
+   :func:`split_entries` and :func:`find_entry` locate these spans
+   without interpreting the inner content. Higher layers (writers,
+   cascade) parse it per record type.
+
+3. :class:`StructuredEntry` — :class:`Entry` extended with the parsed
+   audit-form body fields (header / inline / sections). Built either
+   from a raw body string via :func:`parse_structured_entry` or from
+   an existing :class:`Entry` via :meth:`Entry.as_structured`.
+
+Audit-form layout::
+
+    ## <header>                ← optional H2 (usually entry id, for grep)
+
+    **key**: value             ← inline fields, one per line
+    **key2**: value2
+
+    ### Section Title          ← section fields: H3 + free-form text
+    body content...
+
+    ### Another Section
+    more content...
+
+The audit chassis is intentionally **type-agnostic** — every field
+round-trips as a string. Inline values are stringified on render
+(lists become ``[a, b, c]``, scalars use ``str()``); on parse
+everything is the raw text after the colon. Section titles are kept
+verbatim. This keeps parsing tolerant of stray fields, wrapped
+strings, and manually-typed timestamps; the strong-typed model lives
+in business writers + the SQLite/LanceDB indexes.
+
+Cross-user uniqueness is handled at the database layer via a composite
+``<user_id>_<entry_id>`` field; it is *not* encoded into the
+:class:`EntryId` string itself.
+"""
+
+from __future__ import annotations
+
+import datetime as _dt
+import re
+from collections.abc import Mapping
+from dataclasses import dataclass, field
+from typing import Self
+
+# ── EntryId — structured id for marker stamping ─────────────────────────
+
+_DATE_FMT = "%Y%m%d"
+_SEQ_DIGITS = 8
+"""Minimum zero-padding for the in-file seq.
+
+8 digits keeps lexicographic order == numeric order up to 10**8
+entries per file (per user, per day). ``format()`` is "at least 8" —
+larger seqs emit more digits without truncation. ``parse`` is
+permissive: shorter (legacy 4-digit) and longer seq strings both
+parse cleanly; format normalises to >= 8 digits on round-trip.
+"""
+
+
+@dataclass(frozen=True, slots=True)
+class EntryId:
+    """Parsed components of an entry id (``<prefix>_<YYYYMMDD>_<NNNN>``)."""
+
+    prefix: str
+    date: _dt.date
+    seq: int
+
+    def format(self) -> str:
+        """Render as ``<prefix>_<YYYYMMDD>_<NNNN>``."""
+        return (
+            f"{self.prefix}_{self.date.strftime(_DATE_FMT)}_{self.seq:0{_SEQ_DIGITS}d}"
+        )
+
+    def __str__(self) -> str:  # noqa: D401
+        return self.format()
+
+    @classmethod
+    def parse(cls, s: str) -> Self:
+        """Parse ``<prefix>_<YYYYMMDD>_<NNNN>``.
+
+        Uses ``rsplit("_", 2)`` so a multi-segment prefix (rare, but
+        possible) is preserved as-is.
+        """
+        parts = s.rsplit("_", 2)
+        if len(parts) != 3:
+            raise ValueError(f"invalid entry id format: {s!r}")
+        prefix, date_str, seq_str = parts
+        if not prefix:
+            raise ValueError(f"empty prefix in entry id: {s!r}")
+        try:
+            d = _dt.datetime.strptime(date_str, _DATE_FMT).date()
+        except ValueError as exc:
+            raise ValueError(f"invalid date in entry id: {s!r}") from exc
+        try:
+            seq = int(seq_str)
+        except ValueError as exc:
+            raise ValueError(f"invalid seq in entry id: {s!r}") from exc
+        if seq < 0:
+            raise ValueError(f"negative seq in entry id: {s!r}")
+        return cls(prefix=prefix, date=d, seq=seq)
+
+    @classmethod
+    def next_for(cls, prefix: str, date: _dt.date, current_count: int) -> Self:
+        """Build the id for the next entry given the file's current count.
+
+        ``current_count`` is the value of ``frontmatter.entry_count``
+        *before* this append. The new id gets ``seq = current_count + 1``.
+        """
+        if current_count < 0:
+            raise ValueError(f"current_count must be >= 0, got {current_count}")
+        return cls(prefix=prefix, date=date, seq=current_count + 1)
+
+
+# ── Entry — marker-delimited span inside a body ─────────────────────────
+
+# Filename / URL-safe id alphabet for the marker.
+_ID_PATTERN = r"[A-Za-z0-9_-]+"
+_OPEN_RE = re.compile(rf"<!-- entry:({_ID_PATTERN}) -->")
+
+
+@dataclass(frozen=True)
+class Entry:
+    """One marker-delimited entry within a markdown body.
+
+    Attributes:
+        id: Value between ``entry:`` and ``-->`` in the open marker.
+        body: Content between the open and close markers, with one leading
+            and one trailing newline removed (typical formatter output).
+        start: Offset of the opening ``<!-- entry:id -->`` in the source body.
+        end: Offset just past the closing ``<!-- /entry:id -->`` in the source.
+    """
+
+    id: str
+    body: str
+    start: int
+    end: int
+
+    def as_structured(self) -> StructuredEntry:
+        """Parse my body as audit-form and return a :class:`StructuredEntry`.
+
+        The id / body / start / end fields are preserved; the parsed
+        ``header`` / ``inline`` / ``sections`` are added on top.
+        """
+        return parse_structured_entry(self.body, _origin=self)
+
+
+def split_entries(body: str) -> list[Entry]:
+    """Scan ``body`` and return every entry in order.
+
+    Unmatched / unterminated open markers stop the scan at the first
+    such marker — partial entries are not returned. Callers needing
+    strict validation should layer a dedicated check on top.
+    """
+    entries: list[Entry] = []
+    pos = 0
+    while True:
+        open_match = _OPEN_RE.search(body, pos)
+        if open_match is None:
+            break
+        entry_id = open_match.group(1)
+        close_match = _close_re_for(entry_id).search(body, open_match.end())
+        if close_match is None:
+            # Unterminated entry — abort further scanning.
+            break
+        entries.append(
+            Entry(
+                id=entry_id,
+                body=_strip_one_newline(body[open_match.end() : close_match.start()]),
+                start=open_match.start(),
+                end=close_match.end(),
+            )
+        )
+        pos = close_match.end()
+    return entries
+
+
+def find_entry(body: str, entry_id: str) -> Entry | None:
+    """Find the first entry with ``entry_id``, or ``None``."""
+    open_re = re.compile(rf"<!-- entry:{re.escape(entry_id)} -->")
+    open_match = open_re.search(body)
+    if open_match is None:
+        return None
+    close_match = _close_re_for(entry_id).search(body, open_match.end())
+    if close_match is None:
+        return None
+    return Entry(
+        id=entry_id,
+        body=_strip_one_newline(body[open_match.end() : close_match.start()]),
+        start=open_match.start(),
+        end=close_match.end(),
+    )
+
+
+def _close_re_for(entry_id: str) -> re.Pattern[str]:
+    """Build the close-marker regex for a specific id."""
+    return re.compile(rf"<!-- /entry:{re.escape(entry_id)} -->")
+
+
+def _strip_one_newline(text: str) -> str:
+    """Strip one leading and one trailing newline (typical formatter padding)."""
+    if text.startswith("\r\n"):
+        text = text[2:]
+    elif text.startswith("\n"):
+        text = text[1:]
+    if text.endswith("\r\n"):
+        text = text[:-2]
+    elif text.endswith("\n"):
+        text = text[:-1]
+    return text
+
+
+# ── StructuredEntry — Entry + parsed audit-form fields ──────────────────
+
+# H2 line: ``## <header>``.
+_H2_RE = re.compile(r"^##\s+(.+?)\s*$", re.MULTILINE)
+# Inline field: ``**key**: value``. Anchored to line start so a stray
+# ``**emphasis**`` mid-paragraph isn't mistaken for a field.
+_INLINE_RE = re.compile(
+    r"^\*\*(?P<key>[^*\n]+?)\*\*:\s*(?P<value>.*?)\s*$",
+    re.MULTILINE,
+)
+# H3 line: ``### Title``.
+_H3_RE = re.compile(r"^###\s+(.+?)\s*$", re.MULTILINE)
+
+
+@dataclass(frozen=True)
+class StructuredEntry(Entry):
+    """:class:`Entry` whose body has been parsed as audit-form data.
+
+    Inherits ``id`` / ``body`` / ``start`` / ``end`` from :class:`Entry`
+    (zeroed when built from a raw body string with no marker context)
+    and adds three parsed views of the body: the optional H2 header,
+    the inline ``**key**: value`` map, and the ``### Title`` sections.
+
+    Audit-form values are strings only; type coercion is the caller's
+    job (a strong-typed model lives in the writer / index).
+    """
+
+    header: str | None = None
+    inline: dict[str, str] = field(default_factory=dict)
+    sections: dict[str, str] = field(default_factory=dict)
+
+
+def render_structured_entry(
+    *,
+    header: str | None = None,
+    inline: Mapping[str, object] | None = None,
+    sections: Mapping[str, str] | None = None,
+) -> str:
+    """Render an audit-form entry body.
+
+    Args:
+        header: Optional H2 line at the top (typically the entry id —
+            redundant with the marker but useful for plain-text grep).
+        inline: ``{key: value}`` rendered as ``**key**: value``. Values
+            are stringified: ``list``/``tuple`` become ``[a, b, c]``;
+            ``None`` becomes the empty string; everything else uses
+            ``str()``.
+        sections: ``{title: body}`` rendered as ``### Title`` plus the
+            body text. Title is verbatim; body's trailing whitespace is
+            stripped.
+
+    Returns:
+        The rendered string, no trailing newline (the caller — typically
+        :meth:`MarkdownWriter.append_entry` — handles markers + newlines).
+    """
+    inline = inline or {}
+    sections = sections or {}
+    lines: list[str] = []
+
+    if header:
+        lines.append(f"## {header}")
+        lines.append("")
+
+    for key, value in inline.items():
+        lines.append(f"**{key}**: {_render_value(value)}")
+
+    for title, body in sections.items():
+        lines.append("")
+        lines.append(f"### {title}")
+        lines.append(body.rstrip())
+
+    return "\n".join(lines)
+
+
+def parse_structured_entry(
+    body: str, *, _origin: Entry | None = None
+) -> StructuredEntry:
+    """Parse an audit-form entry body. Strings only — no type coercion.
+
+    Tolerant of:
+
+    - missing H2 (``header`` will be ``None``)
+    - inline fields appearing before, between or after sections
+      (only matches before the first H3 are taken as the inline block)
+    - extra whitespace and stray lines (silently kept inside the
+      enclosing section's body)
+
+    When called via :meth:`Entry.as_structured`, the ``_origin`` Entry
+    contributes its ``id`` / ``start`` / ``end``; otherwise those fall
+    back to ``""`` / ``0`` / ``len(body)``.
+
+    Returns:
+        :class:`StructuredEntry` with everything as strings.
+    """
+    text = body.strip("\n")
+
+    # Split on H3 lines.
+    parts = _H3_RE.split(text)
+    head = parts[0]
+    sections_dict: dict[str, str] = {}
+    for i in range(1, len(parts), 2):
+        title = parts[i].strip()
+        content = parts[i + 1] if i + 1 < len(parts) else ""
+        sections_dict[title] = content.strip("\n").rstrip()
+
+    header: str | None = None
+    h2 = _H2_RE.search(head)
+    if h2:
+        header = h2.group(1).strip()
+
+    inline_dict: dict[str, str] = {
+        m.group("key").strip(): m.group("value").strip()
+        for m in _INLINE_RE.finditer(head)
+    }
+
+    if _origin is not None:
+        return StructuredEntry(
+            id=_origin.id,
+            body=_origin.body,
+            start=_origin.start,
+            end=_origin.end,
+            header=header,
+            inline=inline_dict,
+            sections=sections_dict,
+        )
+    return StructuredEntry(
+        id="",
+        body=body,
+        start=0,
+        end=len(body),
+        header=header,
+        inline=inline_dict,
+        sections=sections_dict,
+    )
+
+
+def _render_value(value: object) -> str:
+    """Stringify an inline value the audit-friendly way."""
+    if value is None:
+        return ""
+    if isinstance(value, list | tuple):
+        return "[" + ", ".join(str(item) for item in value) + "]"
+    return str(value)