chore: initialize EverOS 1.0.0

md-first memory extraction framework for AI agents. Markdown is the single source of truth; SQLite holds state and LanceDB provides the rebuildable vector + BM25 + scalar index. The codebase follows a single-direction DDD layering (entrypoints -> service -> memory -> infra, with component / core / config cross-cutting) enforced by import-linter. Engineering surface: - Coding conventions in .claude/rules/ (path-scoped) and workflows in .claude/skills/ (/commit, /new-branch, /pr). - GitHub Actions CI runs make lint + test + integration; pre-commit mirrors the gates locally (ruff, hygiene hooks, gitlint commit-msg). - Commit messages follow Conventional Commits, enforced by gitlint. - make lint also enforces datetime two-zone discipline and OpenAPI drift.
2026-06-05 22:35:51 +08:00
commit 518b8eca85
636 changed files with 160553 additions and 0 deletions
--- a/src/everos/core/persistence/markdown/init.py
+++ b/src/everos/core/persistence/markdown/init.py
@ -0,0 +1,62 @@
+"""Markdown file IO toolkit.
+
+Atomic write + YAML frontmatter parse/dump + entry marker parse +
+audit-form structured-entry parsing. Knows nothing about business
+models (no MemCell / Episode); the :class:`Entry` here is a
+*marker-delimited* span within a markdown body, not a business record.
+
+External usage (IO + parse):
+    from everos.core.persistence.markdown import (
+        Entry, EntryId, StructuredEntry,
+        MarkdownReader, MarkdownWriter, ParsedMarkdown,
+        parse_frontmatter, dump_frontmatter,
+        split_entries, find_entry,
+        parse_structured_entry, render_structured_entry,
+    )
+
+External usage (frontmatter schema chassis):
+    from everos.core.persistence.markdown import (
+        BaseFrontmatter, UserScopedFrontmatter, AgentScopedFrontmatter,
+        DailyLogPathMixin, SkillPathMixin, ProfilePathMixin,
+    )
+"""
+
+from .entries import Entry as Entry
+from .entries import EntryId as EntryId
+from .entries import StructuredEntry as StructuredEntry
+from .entries import find_entry as find_entry
+from .entries import parse_structured_entry as parse_structured_entry
+from .entries import render_structured_entry as render_structured_entry
+from .entries import split_entries as split_entries
+from .frontmatter import AgentScopedFrontmatter as AgentScopedFrontmatter
+from .frontmatter import BaseFrontmatter as BaseFrontmatter
+from .frontmatter import DailyLogPathMixin as DailyLogPathMixin
+from .frontmatter import ProfilePathMixin as ProfilePathMixin
+from .frontmatter import SkillPathMixin as SkillPathMixin
+from .frontmatter import UserScopedFrontmatter as UserScopedFrontmatter
+from .frontmatter import dump_frontmatter as dump_frontmatter
+from .frontmatter import parse_frontmatter as parse_frontmatter
+from .parsed import ParsedMarkdown as ParsedMarkdown
+from .reader import MarkdownReader as MarkdownReader
+from .writer import MarkdownWriter as MarkdownWriter
+
+__all__ = [
+    "AgentScopedFrontmatter",
+    "BaseFrontmatter",
+    "DailyLogPathMixin",
+    "Entry",
+    "EntryId",
+    "MarkdownReader",
+    "MarkdownWriter",
+    "ParsedMarkdown",
+    "ProfilePathMixin",
+    "SkillPathMixin",
+    "StructuredEntry",
+    "UserScopedFrontmatter",
+    "dump_frontmatter",
+    "find_entry",
+    "parse_frontmatter",
+    "parse_structured_entry",
+    "render_structured_entry",
+    "split_entries",
+]
--- a/src/everos/core/persistence/markdown/entries.py
+++ b/src/everos/core/persistence/markdown/entries.py
@ -0,0 +1,368 @@
+"""Markdown entries — id format, marker spans, and audit-form parsing.
+
+Three closely-related entry concepts live together here so a reader
+sees the whole entry surface in one file:
+
+1. :class:`EntryId` — the ``<prefix>_<YYYYMMDD>_<NNNN>`` structured id
+   stamped into each daily-log entry's open / close markers. Carries
+   the prefix declared by the frontmatter schema, the date bucket, and
+   the in-file zero-padded sequence.
+
+2. :class:`Entry` — a marker-delimited span inside a markdown body::
+
+       <!-- entry:abc123 -->
+       ...content...
+       <!-- /entry:abc123 -->
+
+   :func:`split_entries` and :func:`find_entry` locate these spans
+   without interpreting the inner content. Higher layers (writers,
+   cascade) parse it per record type.
+
+3. :class:`StructuredEntry` — :class:`Entry` extended with the parsed
+   audit-form body fields (header / inline / sections). Built either
+   from a raw body string via :func:`parse_structured_entry` or from
+   an existing :class:`Entry` via :meth:`Entry.as_structured`.
+
+Audit-form layout::
+
+    ## <header>                ← optional H2 (usually entry id, for grep)
+
+    **key**: value             ← inline fields, one per line
+    **key2**: value2
+
+    ### Section Title          ← section fields: H3 + free-form text
+    body content...
+
+    ### Another Section
+    more content...
+
+The audit chassis is intentionally **type-agnostic** — every field
+round-trips as a string. Inline values are stringified on render
+(lists become ``[a, b, c]``, scalars use ``str()``); on parse
+everything is the raw text after the colon. Section titles are kept
+verbatim. This keeps parsing tolerant of stray fields, wrapped
+strings, and manually-typed timestamps; the strong-typed model lives
+in business writers + the SQLite/LanceDB indexes.
+
+Cross-user uniqueness is handled at the database layer via a composite
+``<user_id>_<entry_id>`` field; it is *not* encoded into the
+:class:`EntryId` string itself.
+"""
+
+from __future__ import annotations
+
+import datetime as _dt
+import re
+from collections.abc import Mapping
+from dataclasses import dataclass, field
+from typing import Self
+
+# ── EntryId — structured id for marker stamping ─────────────────────────
+
+_DATE_FMT = "%Y%m%d"
+_SEQ_DIGITS = 8
+"""Minimum zero-padding for the in-file seq.
+
+8 digits keeps lexicographic order == numeric order up to 10**8
+entries per file (per user, per day). ``format()`` is "at least 8" —
+larger seqs emit more digits without truncation. ``parse`` is
+permissive: shorter (legacy 4-digit) and longer seq strings both
+parse cleanly; format normalises to >= 8 digits on round-trip.
+"""
+
+
+@dataclass(frozen=True, slots=True)
+class EntryId:
+    """Parsed components of an entry id (``<prefix>_<YYYYMMDD>_<NNNN>``)."""
+
+    prefix: str
+    date: _dt.date
+    seq: int
+
+    def format(self) -> str:
+        """Render as ``<prefix>_<YYYYMMDD>_<NNNN>``."""
+        return (
+            f"{self.prefix}_{self.date.strftime(_DATE_FMT)}_{self.seq:0{_SEQ_DIGITS}d}"
+        )
+
+    def __str__(self) -> str:  # noqa: D401
+        return self.format()
+
+    @classmethod
+    def parse(cls, s: str) -> Self:
+        """Parse ``<prefix>_<YYYYMMDD>_<NNNN>``.
+
+        Uses ``rsplit("_", 2)`` so a multi-segment prefix (rare, but
+        possible) is preserved as-is.
+        """
+        parts = s.rsplit("_", 2)
+        if len(parts) != 3:
+            raise ValueError(f"invalid entry id format: {s!r}")
+        prefix, date_str, seq_str = parts
+        if not prefix:
+            raise ValueError(f"empty prefix in entry id: {s!r}")
+        try:
+            d = _dt.datetime.strptime(date_str, _DATE_FMT).date()
+        except ValueError as exc:
+            raise ValueError(f"invalid date in entry id: {s!r}") from exc
+        try:
+            seq = int(seq_str)
+        except ValueError as exc:
+            raise ValueError(f"invalid seq in entry id: {s!r}") from exc
+        if seq < 0:
+            raise ValueError(f"negative seq in entry id: {s!r}")
+        return cls(prefix=prefix, date=d, seq=seq)
+
+    @classmethod
+    def next_for(cls, prefix: str, date: _dt.date, current_count: int) -> Self:
+        """Build the id for the next entry given the file's current count.
+
+        ``current_count`` is the value of ``frontmatter.entry_count``
+        *before* this append. The new id gets ``seq = current_count + 1``.
+        """
+        if current_count < 0:
+            raise ValueError(f"current_count must be >= 0, got {current_count}")
+        return cls(prefix=prefix, date=date, seq=current_count + 1)
+
+
+# ── Entry — marker-delimited span inside a body ─────────────────────────
+
+# Filename / URL-safe id alphabet for the marker.
+_ID_PATTERN = r"[A-Za-z0-9_-]+"
+_OPEN_RE = re.compile(rf"<!-- entry:({_ID_PATTERN}) -->")
+
+
+@dataclass(frozen=True)
+class Entry:
+    """One marker-delimited entry within a markdown body.
+
+    Attributes:
+        id: Value between ``entry:`` and ``-->`` in the open marker.
+        body: Content between the open and close markers, with one leading
+            and one trailing newline removed (typical formatter output).
+        start: Offset of the opening ``<!-- entry:id -->`` in the source body.
+        end: Offset just past the closing ``<!-- /entry:id -->`` in the source.
+    """
+
+    id: str
+    body: str
+    start: int
+    end: int
+
+    def as_structured(self) -> StructuredEntry:
+        """Parse my body as audit-form and return a :class:`StructuredEntry`.
+
+        The id / body / start / end fields are preserved; the parsed
+        ``header`` / ``inline`` / ``sections`` are added on top.
+        """
+        return parse_structured_entry(self.body, _origin=self)
+
+
+def split_entries(body: str) -> list[Entry]:
+    """Scan ``body`` and return every entry in order.
+
+    Unmatched / unterminated open markers stop the scan at the first
+    such marker — partial entries are not returned. Callers needing
+    strict validation should layer a dedicated check on top.
+    """
+    entries: list[Entry] = []
+    pos = 0
+    while True:
+        open_match = _OPEN_RE.search(body, pos)
+        if open_match is None:
+            break
+        entry_id = open_match.group(1)
+        close_match = _close_re_for(entry_id).search(body, open_match.end())
+        if close_match is None:
+            # Unterminated entry — abort further scanning.
+            break
+        entries.append(
+            Entry(
+                id=entry_id,
+                body=_strip_one_newline(body[open_match.end() : close_match.start()]),
+                start=open_match.start(),
+                end=close_match.end(),
+            )
+        )
+        pos = close_match.end()
+    return entries
+
+
+def find_entry(body: str, entry_id: str) -> Entry | None:
+    """Find the first entry with ``entry_id``, or ``None``."""
+    open_re = re.compile(rf"<!-- entry:{re.escape(entry_id)} -->")
+    open_match = open_re.search(body)
+    if open_match is None:
+        return None
+    close_match = _close_re_for(entry_id).search(body, open_match.end())
+    if close_match is None:
+        return None
+    return Entry(
+        id=entry_id,
+        body=_strip_one_newline(body[open_match.end() : close_match.start()]),
+        start=open_match.start(),
+        end=close_match.end(),
+    )
+
+
+def _close_re_for(entry_id: str) -> re.Pattern[str]:
+    """Build the close-marker regex for a specific id."""
+    return re.compile(rf"<!-- /entry:{re.escape(entry_id)} -->")
+
+
+def _strip_one_newline(text: str) -> str:
+    """Strip one leading and one trailing newline (typical formatter padding)."""
+    if text.startswith("\r\n"):
+        text = text[2:]
+    elif text.startswith("\n"):
+        text = text[1:]
+    if text.endswith("\r\n"):
+        text = text[:-2]
+    elif text.endswith("\n"):
+        text = text[:-1]
+    return text
+
+
+# ── StructuredEntry — Entry + parsed audit-form fields ──────────────────
+
+# H2 line: ``## <header>``.
+_H2_RE = re.compile(r"^##\s+(.+?)\s*$", re.MULTILINE)
+# Inline field: ``**key**: value``. Anchored to line start so a stray
+# ``**emphasis**`` mid-paragraph isn't mistaken for a field.
+_INLINE_RE = re.compile(
+    r"^\*\*(?P<key>[^*\n]+?)\*\*:\s*(?P<value>.*?)\s*$",
+    re.MULTILINE,
+)
+# H3 line: ``### Title``.
+_H3_RE = re.compile(r"^###\s+(.+?)\s*$", re.MULTILINE)
+
+
+@dataclass(frozen=True)
+class StructuredEntry(Entry):
+    """:class:`Entry` whose body has been parsed as audit-form data.
+
+    Inherits ``id`` / ``body`` / ``start`` / ``end`` from :class:`Entry`
+    (zeroed when built from a raw body string with no marker context)
+    and adds three parsed views of the body: the optional H2 header,
+    the inline ``**key**: value`` map, and the ``### Title`` sections.
+
+    Audit-form values are strings only; type coercion is the caller's
+    job (a strong-typed model lives in the writer / index).
+    """
+
+    header: str | None = None
+    inline: dict[str, str] = field(default_factory=dict)
+    sections: dict[str, str] = field(default_factory=dict)
+
+
+def render_structured_entry(
+    *,
+    header: str | None = None,
+    inline: Mapping[str, object] | None = None,
+    sections: Mapping[str, str] | None = None,
+) -> str:
+    """Render an audit-form entry body.
+
+    Args:
+        header: Optional H2 line at the top (typically the entry id —
+            redundant with the marker but useful for plain-text grep).
+        inline: ``{key: value}`` rendered as ``**key**: value``. Values
+            are stringified: ``list``/``tuple`` become ``[a, b, c]``;
+            ``None`` becomes the empty string; everything else uses
+            ``str()``.
+        sections: ``{title: body}`` rendered as ``### Title`` plus the
+            body text. Title is verbatim; body's trailing whitespace is
+            stripped.
+
+    Returns:
+        The rendered string, no trailing newline (the caller — typically
+        :meth:`MarkdownWriter.append_entry` — handles markers + newlines).
+    """
+    inline = inline or {}
+    sections = sections or {}
+    lines: list[str] = []
+
+    if header:
+        lines.append(f"## {header}")
+        lines.append("")
+
+    for key, value in inline.items():
+        lines.append(f"**{key}**: {_render_value(value)}")
+
+    for title, body in sections.items():
+        lines.append("")
+        lines.append(f"### {title}")
+        lines.append(body.rstrip())
+
+    return "\n".join(lines)
+
+
+def parse_structured_entry(
+    body: str, *, _origin: Entry | None = None
+) -> StructuredEntry:
+    """Parse an audit-form entry body. Strings only — no type coercion.
+
+    Tolerant of:
+
+    - missing H2 (``header`` will be ``None``)
+    - inline fields appearing before, between or after sections
+      (only matches before the first H3 are taken as the inline block)
+    - extra whitespace and stray lines (silently kept inside the
+      enclosing section's body)
+
+    When called via :meth:`Entry.as_structured`, the ``_origin`` Entry
+    contributes its ``id`` / ``start`` / ``end``; otherwise those fall
+    back to ``""`` / ``0`` / ``len(body)``.
+
+    Returns:
+        :class:`StructuredEntry` with everything as strings.
+    """
+    text = body.strip("\n")
+
+    # Split on H3 lines.
+    parts = _H3_RE.split(text)
+    head = parts[0]
+    sections_dict: dict[str, str] = {}
+    for i in range(1, len(parts), 2):
+        title = parts[i].strip()
+        content = parts[i + 1] if i + 1 < len(parts) else ""
+        sections_dict[title] = content.strip("\n").rstrip()
+
+    header: str | None = None
+    h2 = _H2_RE.search(head)
+    if h2:
+        header = h2.group(1).strip()
+
+    inline_dict: dict[str, str] = {
+        m.group("key").strip(): m.group("value").strip()
+        for m in _INLINE_RE.finditer(head)
+    }
+
+    if _origin is not None:
+        return StructuredEntry(
+            id=_origin.id,
+            body=_origin.body,
+            start=_origin.start,
+            end=_origin.end,
+            header=header,
+            inline=inline_dict,
+            sections=sections_dict,
+        )
+    return StructuredEntry(
+        id="",
+        body=body,
+        start=0,
+        end=len(body),
+        header=header,
+        inline=inline_dict,
+        sections=sections_dict,
+    )
+
+
+def _render_value(value: object) -> str:
+    """Stringify an inline value the audit-friendly way."""
+    if value is None:
+        return ""
+    if isinstance(value, list | tuple):
+        return "[" + ", ".join(str(item) for item in value) + "]"
+    return str(value)
--- a/src/everos/core/persistence/markdown/frontmatter.py
+++ b/src/everos/core/persistence/markdown/frontmatter.py
@ -0,0 +1,300 @@
+"""Frontmatter — YAML block parse / dump + L1 schema chassis.
+
+Frontmatter is the leading ``---``-delimited YAML block at the top of
+a markdown document::
+
+    ---
+    title: Hello
+    tags: [a, b]
+    ---
+    # Body starts here
+
+Two complementary surfaces live here:
+
+1. :func:`parse_frontmatter` / :func:`dump_frontmatter` — schema-free
+   YAML helpers (``yaml.safe_load`` / ``yaml.safe_dump``,
+   ``sort_keys=False`` so caller-controlled key order is preserved).
+
+2. The L1 chassis classes — :class:`BaseFrontmatter`,
+   :class:`UserScopedFrontmatter`, :class:`AgentScopedFrontmatter` —
+   which fix the *absolute-readonly* fields (``id`` / ``type`` /
+   ``schema_version``) plus scope (``user_id`` / ``agent_id`` +
+   ``track``). Every business frontmatter schema in
+   ``infra/persistence/markdown/mds/`` subclasses one of these.
+
+Concrete business schemas (``UserMemcellDailyFrontmatter``,
+``SkillFrontmatter``, …) live in ``infra``; they add per-record
+business fields plus the path-resolution metadata daily-log writers
+need (``ENTRY_ID_PREFIX`` / ``DIR_NAME`` / ``FILE_PREFIX``).
+"""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+from typing import Any, ClassVar, Literal
+
+import yaml
+from pydantic import BaseModel, ConfigDict
+
+# ── YAML helpers ────────────────────────────────────────────────────────
+
+_DELIM = "---"
+
+
+def parse_frontmatter(text: str) -> tuple[dict[str, Any], str]:
+    """Parse a leading ``---\\n...\\n---\\n`` YAML block.
+
+    Returns:
+        (meta, remainder): ``meta`` is the parsed YAML mapping (empty dict
+        if no frontmatter present, malformed, or non-mapping). ``remainder``
+        is everything after the closing delimiter line — including the body's
+        leading content as-is.
+
+    Notes:
+        - If the document does not start with ``---``, returns ``({}, text)``
+          unchanged.
+        - If a closing ``---`` line is not found, returns ``({}, text)``.
+        - If the YAML block is empty (``---\\n---\\n``), returns
+          ``({}, remainder)``.
+        - If the parsed YAML is not a mapping (e.g. a scalar list), returns
+          ``({}, text)`` — frontmatter must be a mapping.
+    """
+    if not text.startswith(_DELIM):
+        return {}, text
+
+    # Skip the opening "---" and the newline that must follow it.
+    rest = text[len(_DELIM) :]
+    if rest.startswith("\r\n"):
+        rest = rest[2:]
+    elif rest.startswith("\n"):
+        rest = rest[1:]
+    else:
+        # Opening "---" not followed by a newline → not a valid frontmatter.
+        return {}, text
+
+    closing_idx = _find_closing_delim(rest)
+    if closing_idx is None:
+        return {}, text
+
+    yaml_block = rest[:closing_idx]
+    remainder = rest[closing_idx + len(_DELIM) :]
+    # Drop the newline that follows the closing delimiter, if any.
+    if remainder.startswith("\r\n"):
+        remainder = remainder[2:]
+    elif remainder.startswith("\n"):
+        remainder = remainder[1:]
+
+    parsed: Any = yaml.safe_load(yaml_block) if yaml_block.strip() else {}
+    if parsed is None:
+        parsed = {}
+    if not isinstance(parsed, dict):
+        return {}, text
+    return parsed, remainder
+
+
+def dump_frontmatter(meta: Mapping[str, Any]) -> str:
+    """Render a mapping as a ``---\\n<yaml>\\n---\\n`` block.
+
+    An empty mapping yields the empty string (no delimiters). The YAML
+    payload preserves caller-supplied key order (``sort_keys=False``).
+    """
+    if not meta:
+        return ""
+    yaml_block = yaml.safe_dump(
+        dict(meta),
+        sort_keys=False,
+        allow_unicode=True,
+        default_flow_style=False,
+    )
+    return f"{_DELIM}\n{yaml_block}{_DELIM}\n"
+
+
+def _find_closing_delim(text: str) -> int | None:
+    """Find the offset of a line that is exactly ``---``.
+
+    A "line" is text between two newlines (or string boundaries).
+    Returns the offset of the first character of the matching line, or
+    ``None`` if no such line exists.
+    """
+    pos = 0
+    while pos < len(text):
+        nl = text.find("\n", pos)
+        line = text[pos:nl] if nl != -1 else text[pos:]
+        if line.rstrip("\r") == _DELIM:
+            return pos
+        if nl == -1:
+            return None
+        pos = nl + 1
+    return None
+
+
+# ── L1 schema chassis ───────────────────────────────────────────────────
+
+
+class BaseFrontmatter(BaseModel):
+    """L1 fields every markdown frontmatter must carry.
+
+    These match the *absolute-readonly* tier in the EverOS Markdown First
+    spec — they identify the record across markdown ↔ LanceDB and must
+    never be rewritten by a human edit.
+
+    Subclasses add scope (``UserScopedFrontmatter`` /
+    ``AgentScopedFrontmatter``) plus per-record business fields.
+    """
+
+    SCOPE_DIR: ClassVar[str] = ""
+    """Top-level directory under the memory-root that holds this kind.
+
+    Scope mixins set this to ``"users"`` / ``"agents"``. Scope-agnostic
+    schemas (rare) leave it empty; consumers that need to resolve a path
+    (writers, layout reverse-lookup) must reject schemas with empty
+    ``SCOPE_DIR``.
+    """
+
+    id: str
+    type: str
+    schema_version: int = 1
+
+    # Permit additional fields so L2 system-managed metadata
+    # (``md_sha256``, ``last_indexed_at``, ``lsn``, …) can ride along on
+    # the same model without forcing every subclass to redeclare them.
+    model_config = ConfigDict(extra="allow")
+
+    @classmethod
+    def path_glob(cls) -> str:
+        """Return an ``fnmatch``-style glob (relative to memory-root)
+        covering every markdown file this schema describes.
+
+        Used by the cascade kind registry — the scanner walks every kind's
+        ``path_glob()`` to enumerate eligible files without hard-coding
+        path patterns in cascade. The schema is the single source of truth
+        for both the writer's path resolution and the scanner's enumeration.
+
+        Subclasses must override — typically by mixing in
+        :class:`DailyLogPathMixin` or :class:`SkillPathMixin` *before* the
+        scope mixin in the MRO so this abstract version is shadowed.
+        """
+        raise NotImplementedError(
+            f"{cls.__name__} must declare path_glob() "
+            f"(mix in DailyLogPathMixin / SkillPathMixin, or override directly)"
+        )
+
+
+class DailyLogPathMixin:
+    """Path strategy for daily-log files.
+
+    Files live at ``<SCOPE_DIR>/<scope_id>/<DIR_NAME>/<FILE_PREFIX>-<YYYY-MM-DD>.md``.
+    Subclasses must inherit a scope mixin (``UserScopedFrontmatter`` /
+    ``AgentScopedFrontmatter``) supplying ``SCOPE_DIR``, and must declare
+    their own ``DIR_NAME`` / ``FILE_PREFIX`` ClassVars.
+
+    Place **this mixin first** so Python's MRO resolves ``path_glob()`` to
+    the mixin's concrete implementation rather than
+    :meth:`BaseFrontmatter.path_glob`'s ``NotImplementedError`` stub::
+
+        class EpisodeDailyFrontmatter(DailyLogPathMixin, UserScopedFrontmatter):
+            DIR_NAME: ClassVar[str] = "episodes"
+            FILE_PREFIX: ClassVar[str] = "episode"
+            ...
+    """
+
+    DIR_NAME: ClassVar[str]
+    FILE_PREFIX: ClassVar[str]
+    SCOPE_DIR: ClassVar[str]
+
+    @classmethod
+    def path_glob(cls) -> str:
+        # Leading ``*/*/`` matches the <app>/<project> scope prefix that
+        # precedes every user-visible dir; the scanner's ``root.glob`` is
+        # anchored at root, so the prefix is mandatory (without it nothing
+        # matches), and the watcher's right-anchored ``PurePosixPath.match``
+        # agrees on the same shape.
+        return f"*/*/{cls.SCOPE_DIR}/*/{cls.DIR_NAME}/{cls.FILE_PREFIX}-*.md"
+
+
+class SkillPathMixin:
+    """Path strategy for skill-directory files.
+
+    Each skill lives at ``<SCOPE_DIR>/<scope_id>/<SKILLS_CONTAINER_NAME>/
+    <SKILL_DIR_PREFIX><skill_name>/<SKILL_MAIN_FILENAME>``. The glob covers
+    every skill's main file; sibling ``references/*.md`` and ``scripts/*``
+    are excluded (they ride alongside the main file and the cascade
+    daemon rebuilds the index column by concatenation, see
+    :class:`AgentSkillFrontmatter`'s docstring).
+
+    Place **this mixin first** so MRO resolves ``path_glob()`` here::
+
+        class AgentSkillFrontmatter(SkillPathMixin, AgentScopedFrontmatter):
+            SKILLS_CONTAINER_NAME: ClassVar[str] = "skills"
+            SKILL_DIR_PREFIX: ClassVar[str] = "skill_"
+            SKILL_MAIN_FILENAME: ClassVar[str] = "SKILL.md"
+            ...
+    """
+
+    SKILLS_CONTAINER_NAME: ClassVar[str]
+    SKILL_DIR_PREFIX: ClassVar[str]
+    SKILL_MAIN_FILENAME: ClassVar[str]
+    SCOPE_DIR: ClassVar[str]
+
+    @classmethod
+    def path_glob(cls) -> str:
+        # Leading ``*/*/`` matches the <app>/<project> scope prefix.
+        return (
+            f"*/*/{cls.SCOPE_DIR}/*/{cls.SKILLS_CONTAINER_NAME}/"
+            f"{cls.SKILL_DIR_PREFIX}*/{cls.SKILL_MAIN_FILENAME}"
+        )
+
+
+class ProfilePathMixin:
+    """Path strategy for single-file profile markdown.
+
+    Profiles live at ``<SCOPE_DIR>/<scope_id>/<PROFILE_FILENAME>`` —
+    one fixed-name file directly under the scope's owner directory, no
+    intermediate ``<dir>/`` segment (unlike daily-logs) and no per-name
+    subdir (unlike skills). Subclasses must inherit a scope mixin
+    (``UserScopedFrontmatter`` / ``AgentScopedFrontmatter``) supplying
+    ``SCOPE_DIR`` and declare their own ``PROFILE_FILENAME``.
+
+    Place **this mixin first** so MRO resolves ``path_glob()`` here::
+
+        class UserProfileFrontmatter(ProfilePathMixin, UserScopedFrontmatter):
+            PROFILE_FILENAME: ClassVar[str] = "user.md"
+            ...
+    """
+
+    PROFILE_FILENAME: ClassVar[str]
+    SCOPE_DIR: ClassVar[str]
+
+    @classmethod
+    def path_glob(cls) -> str:
+        # Leading ``*/*/`` matches the <app>/<project> scope prefix.
+        return f"*/*/{cls.SCOPE_DIR}/*/{cls.PROFILE_FILENAME}"
+
+
+class UserScopedFrontmatter(BaseFrontmatter):
+    """Records that belong to a single user (track = ``user``).
+
+    The frontmatter only carries the *file-level* scope (``user_id``,
+    which the path itself already expresses); business attributes like
+    ``group_id`` live inside each entry's structured body — see
+    :class:`StructuredEntry` in :mod:`.entries`.
+    """
+
+    SCOPE_DIR: ClassVar[str] = "users"
+
+    user_id: str
+    track: Literal["user"] = "user"
+
+
+class AgentScopedFrontmatter(BaseFrontmatter):
+    """Records that belong to a single agent (track = ``agent``).
+
+    Same scope-vs-business split as :class:`UserScopedFrontmatter`:
+    ``agent_id`` is the file-level scope; ``group_id`` etc. ride on
+    each entry, not on the file frontmatter.
+    """
+
+    SCOPE_DIR: ClassVar[str] = "agents"
+
+    agent_id: str
+    track: Literal["agent"] = "agent"
--- a/src/everos/core/persistence/markdown/parsed.py
+++ b/src/everos/core/persistence/markdown/parsed.py
@ -0,0 +1,31 @@
+"""Parsed-markdown data type.
+
+The output shape of :class:`MarkdownReader` is held here, separate
+from the reader implementation: callers that only consume parse
+results don't need to import the reader machinery, and downstream
+modules (writer, business readers) can produce :class:`ParsedMarkdown`
+without going through ``MarkdownReader.read`` if they already hold
+the pieces.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any
+
+from .entries import Entry
+
+
+@dataclass(frozen=True)
+class ParsedMarkdown:
+    """A markdown document after parsing.
+
+    Attributes:
+        frontmatter: Parsed YAML mapping (empty dict if no frontmatter block).
+        body: Document text after the frontmatter block; not entry-stripped.
+        entries: Marker-delimited entries discovered inside ``body``.
+    """
+
+    frontmatter: dict[str, Any]
+    body: str
+    entries: list[Entry] = field(default_factory=list)
--- a/src/everos/core/persistence/markdown/reader.py
+++ b/src/everos/core/persistence/markdown/reader.py
@ -0,0 +1,42 @@
+"""Markdown file reader.
+
+Loads a markdown document and splits it into:
+
+    1. ``frontmatter`` — parsed YAML (empty dict if absent)
+    2. ``body`` — raw text after the closing ``---`` delimiter
+    3. ``entries`` — marker-delimited spans inside ``body``
+
+The reader is purely parsing; it does not validate frontmatter shape,
+entry content, or cross-references. Higher layers add business-aware
+checks. The :class:`ParsedMarkdown` data type lives in :mod:`.parsed`.
+
+``parse`` is sync (pure in-memory string processing). ``read`` is async
+and uses :class:`anyio.Path` so file I/O does not block the event loop.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import anyio
+
+from .entries import split_entries
+from .frontmatter import parse_frontmatter
+from .parsed import ParsedMarkdown
+
+
+class MarkdownReader:
+    """Parse markdown files / strings into :class:`ParsedMarkdown`."""
+
+    @staticmethod
+    def parse(text: str) -> ParsedMarkdown:
+        """Parse already-loaded text (no IO)."""
+        meta, body = parse_frontmatter(text)
+        entries = split_entries(body)
+        return ParsedMarkdown(frontmatter=meta, body=body, entries=entries)
+
+    @staticmethod
+    async def read(path: Path) -> ParsedMarkdown:
+        """Read the file at ``path`` and parse its content."""
+        text = await anyio.Path(path).read_text(encoding="utf-8")
+        return MarkdownReader.parse(text)
--- a/src/everos/core/persistence/markdown/writer.py
+++ b/src/everos/core/persistence/markdown/writer.py
@ -0,0 +1,269 @@
+"""Markdown file writer with atomic write semantics.
+
+Atomicity is provided by writing to a same-directory temp file
+(``.<name>.tmp.<uuid>``) and using :func:`os.replace` to rename it onto
+the target. Keeping the temp file in the same directory guarantees the
+rename is on the same filesystem (POSIX rename is atomic only within a
+single fs).
+
+All public methods are async. File I/O (``read_text`` / ``write_text``
+/ ``mkdir``) goes through :class:`anyio.Path`; the few syscalls without
+a native async equivalent (``os.fsync`` / ``os.replace`` / ``unlink``
+in the cleanup path) are offloaded via :func:`anyio.to_thread.run_sync`.
+
+In-process per-path locking
+---------------------------
+:meth:`append_entry` / :meth:`append_entries` are read-modify-write of
+the whole file (load frontmatter+body, merge an entry block, atomic
+write the result). The atomic write itself is safe, but the read→write
+window crosses ``await`` points. Concurrent asyncio tasks targeting the
+same path would otherwise lose-update each other (both read N entries,
+both produce N+1, second write overwrites the first → 1 entry lost).
+
+To prevent this, an in-process per-path :class:`asyncio.Lock` is held
+across the entire read-modify-write sequence. Lock objects live on the
+writer instance (not class-level) so they bind to the event loop active
+when the writer was constructed — this avoids the
+"Lock bound to different loop" failure mode that surfaces when
+pytest-asyncio rebuilds the loop between tests but module-level writer
+singletons leak Lock objects across boundaries.
+
+Process-level coordination (multi-process writers against the same
+memory-root) remains the job of
+:func:`everos.core.persistence.locking.memory_root_lock`, which uses
+``fcntl.flock``. The two locks compose: per-path async lock serialises
+tasks within one process, ``memory_root_lock`` serialises processes
+against each other.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import contextlib
+import os
+import uuid
+from collections.abc import Mapping, Sequence
+from pathlib import Path
+from typing import Any
+
+import anyio
+
+from ..memory_root import MemoryRoot
+from .entries import EntryId
+from .frontmatter import dump_frontmatter
+from .reader import MarkdownReader
+
+
+class MarkdownWriter:
+    """Atomic writer for markdown files inside a memory-root.
+
+    The ``memory_root`` reference is held to enable future enforcement that
+    targets stay within the configured root; current writes do not depend on
+    it for the rename itself (same-dir temp file).
+    """
+
+    def __init__(self, memory_root: MemoryRoot) -> None:
+        self._memory_root = memory_root
+        # Per-path async lock registry. ``setdefault`` is GIL-atomic, so
+        # concurrent callers race only on the dict insert (resolved by
+        # ``setdefault`` returning the existing value), not on the Lock.
+        # Plain dict (not WeakValueDictionary): a Lock with pending waiters
+        # must outlive any task awaiting it; ref-counted GC would race with
+        # those waiters. See Python bpo-28427 for the WeakValueDictionary
+        # multithreading hazard that bites the weak-ref approach.
+        self._path_locks: dict[Path, asyncio.Lock] = {}
+
+    @property
+    def memory_root(self) -> MemoryRoot:
+        return self._memory_root
+
+    def lock_for(self, path: Path) -> asyncio.Lock:
+        """Return the per-path lock; create on first use.
+
+        Public so that higher-level writers (e.g. :class:`BaseDailyWriter`)
+        can serialise their own multi-step ``read → compute → write``
+        sequences against this writer's single-step ``append`` paths.
+        Pair with :meth:`_append_entries_unlocked` to avoid reentrant
+        re-acquisition of the same lock from within an already-locked
+        critical section (``asyncio.Lock`` is *not* reentrant).
+        """
+        # Resolve to an absolute canonical path so aliases (relative vs.
+        # absolute, symlinks) share the same lock object.
+        key = Path(path).resolve()
+        lock = self._path_locks.get(key)
+        if lock is None:
+            lock = asyncio.Lock()
+            self._path_locks[key] = lock
+        return lock
+
+    async def write(self, path: Path, content: str) -> Path:
+        """Atomically write ``content`` to ``path``.
+
+        Steps:
+            1. ``mkdir -p`` the parent directory.
+            2. Write to ``<parent>/.<name>.tmp.<uuid>``.
+            3. ``flush`` + ``fsync`` the temp file.
+            4. ``os.replace`` the temp file onto ``path`` (atomic on POSIX).
+
+        Returns:
+            ``path`` (resolved as written).
+        """
+        target = Path(path)
+        await anyio.Path(target.parent).mkdir(parents=True, exist_ok=True)
+        tmp = target.parent / f".{target.name}.tmp.{uuid.uuid4().hex}"
+        try:
+            await anyio.to_thread.run_sync(_write_and_fsync, tmp, content)
+            await anyio.to_thread.run_sync(os.replace, tmp, target)
+        except Exception:
+            # Best-effort cleanup of the staging file on failure.
+            await _unlink_quiet(tmp)
+            raise
+        return target
+
+    async def write_markdown(
+        self,
+        path: Path,
+        *,
+        frontmatter: Mapping[str, Any] | None = None,
+        body: str = "",
+    ) -> Path:
+        """Assemble ``frontmatter`` + ``body`` then atomic-write to ``path``."""
+        head = dump_frontmatter(frontmatter or {})
+        return await self.write(path, head + body)
+
+    async def append_entry(
+        self,
+        path: Path,
+        *,
+        entry_body: str,
+        entry_id: EntryId,
+        frontmatter_updates: Mapping[str, Any] | None = None,
+    ) -> Path:
+        """Append a single entry block to a markdown file, merging frontmatter.
+
+        Convenience wrapper around :meth:`append_entries` for single-entry
+        callers. See that method for full semantics.
+
+        Args:
+            path: Target markdown file. Created if missing.
+            entry_body: Content between the open and close markers.
+                One leading and trailing newline are added automatically.
+            entry_id: The id to stamp on this entry. The caller normally
+                builds it with :meth:`EntryId.next_for`.
+            frontmatter_updates: Mapping shallow-merged into existing
+                frontmatter (later wins). ``None`` skips the merge.
+
+        Returns:
+            ``path`` (resolved as written).
+        """
+        return await self.append_entries(
+            path,
+            [(entry_body, entry_id)],
+            frontmatter_updates=frontmatter_updates,
+        )
+
+    async def append_entries(
+        self,
+        path: Path,
+        entries: Sequence[tuple[str, EntryId]],
+        *,
+        frontmatter_updates: Mapping[str, Any] | None = None,
+    ) -> Path:
+        """Append ``N`` entry blocks in a single locked read-modify-write cycle.
+
+        Compared with calling :meth:`append_entry` ``N`` times, this:
+
+        * Performs one file read + one file write instead of ``N`` of each
+          (IO complexity drops from ``O(N²)`` to ``O(N)`` when the file
+          already holds many entries).
+        * Holds the per-path lock for one short critical section instead of
+          ``N`` separate acquisitions.
+        * Updates ``frontmatter`` once at the end (no intermediate
+          ``entry_count`` flapping).
+
+        The caller assigns and supplies all :class:`EntryId` values — see
+        :meth:`append_entry` for the rationale. The order in ``entries`` is
+        the order the blocks land in the file.
+
+        Args:
+            path: Target markdown file. Created if missing.
+            entries: ``(entry_body, entry_id)`` pairs to append, in order.
+                Empty sequence is allowed; the file is still touched for
+                frontmatter updates if any are supplied.
+            frontmatter_updates: Mapping shallow-merged into existing
+                frontmatter once after all entries are appended.
+
+        Returns:
+            ``path`` (resolved as written).
+        """
+        target = Path(path)
+        async with self.lock_for(target):
+            return await self._append_entries_unlocked(
+                target,
+                entries,
+                frontmatter_updates=frontmatter_updates,
+            )
+
+    async def _append_entries_unlocked(
+        self,
+        path: Path,
+        entries: Sequence[tuple[str, EntryId]],
+        *,
+        frontmatter_updates: Mapping[str, Any] | None = None,
+    ) -> Path:
+        """Same as :meth:`append_entries` but assumes the caller already
+        holds :meth:`lock_for` ``(path)``.
+
+        For use by higher-level writers that perform a multi-step
+        ``read → compute eid → write`` sequence and need to keep the lock
+        held across the read and the write. Public ``append_entries`` /
+        ``append_entry`` always wrap this with the lock.
+
+        Reentrant re-acquisition is unsafe — ``asyncio.Lock`` is not
+        reentrant, so calling this without holding the lock yourself
+        breaks the safety contract.
+        """
+        target = Path(path)
+
+        # 1. Load existing markdown (or initialise empty).
+        if await anyio.Path(target).is_file():
+            parsed = await MarkdownReader.read(target)
+            meta: dict[str, Any] = dict(parsed.frontmatter)
+            body = parsed.body
+        else:
+            meta = {}
+            body = ""
+
+        # 2. Shallow-merge frontmatter updates.
+        if frontmatter_updates:
+            meta.update(frontmatter_updates)
+
+        # 3. Append all entry blocks in order.
+        if entries:
+            if body and not body.endswith("\n"):
+                body += "\n"
+            appended_blocks: list[str] = []
+            for entry_body, entry_id in entries:
+                eid_str = entry_id.format()
+                appended_blocks.append(
+                    f"<!-- entry:{eid_str} -->\n{entry_body}\n"
+                    f"<!-- /entry:{eid_str} -->\n"
+                )
+            body = body + "".join(appended_blocks)
+
+        # 4. Atomic write.
+        return await self.write_markdown(target, frontmatter=meta, body=body)
+
+
+def _write_and_fsync(tmp: Path, content: str) -> None:
+    """Sync helper: write + fsync the staging file. Offloaded to a thread."""
+    with open(tmp, "w", encoding="utf-8") as fh:
+        fh.write(content)
+        fh.flush()
+        os.fsync(fh.fileno())
+
+
+async def _unlink_quiet(tmp: Path) -> None:
+    """Best-effort unlink — swallow OSError so the original exception wins."""
+    with contextlib.suppress(OSError):
+        await anyio.Path(tmp).unlink(missing_ok=True)