chore: initialize EverOS 1.0.0

md-first memory extraction framework for AI agents.

Markdown is the single source of truth; SQLite holds state and LanceDB
provides the rebuildable vector + BM25 + scalar index. The codebase follows
a single-direction DDD layering (entrypoints -> service -> memory -> infra,
with component / core / config cross-cutting) enforced by import-linter.

Engineering surface:
- Coding conventions in .claude/rules/ (path-scoped) and workflows in
  .claude/skills/ (/commit, /new-branch, /pr).
- GitHub Actions CI runs make lint + test + integration; pre-commit mirrors
  the gates locally (ruff, hygiene hooks, gitlint commit-msg).
- Commit messages follow Conventional Commits, enforced by gitlint.
- make lint also enforces datetime two-zone discipline and OpenAPI drift.
This commit is contained in:
Elliot Chen
2026-06-05 22:35:51 +08:00
commit 518b8eca85
636 changed files with 160553 additions and 0 deletions

View File

@ -0,0 +1,62 @@
"""Markdown file IO toolkit.
Atomic write + YAML frontmatter parse/dump + entry marker parse +
audit-form structured-entry parsing. Knows nothing about business
models (no MemCell / Episode); the :class:`Entry` here is a
*marker-delimited* span within a markdown body, not a business record.
External usage (IO + parse):
from everos.core.persistence.markdown import (
Entry, EntryId, StructuredEntry,
MarkdownReader, MarkdownWriter, ParsedMarkdown,
parse_frontmatter, dump_frontmatter,
split_entries, find_entry,
parse_structured_entry, render_structured_entry,
)
External usage (frontmatter schema chassis):
from everos.core.persistence.markdown import (
BaseFrontmatter, UserScopedFrontmatter, AgentScopedFrontmatter,
DailyLogPathMixin, SkillPathMixin, ProfilePathMixin,
)
"""
from .entries import Entry as Entry
from .entries import EntryId as EntryId
from .entries import StructuredEntry as StructuredEntry
from .entries import find_entry as find_entry
from .entries import parse_structured_entry as parse_structured_entry
from .entries import render_structured_entry as render_structured_entry
from .entries import split_entries as split_entries
from .frontmatter import AgentScopedFrontmatter as AgentScopedFrontmatter
from .frontmatter import BaseFrontmatter as BaseFrontmatter
from .frontmatter import DailyLogPathMixin as DailyLogPathMixin
from .frontmatter import ProfilePathMixin as ProfilePathMixin
from .frontmatter import SkillPathMixin as SkillPathMixin
from .frontmatter import UserScopedFrontmatter as UserScopedFrontmatter
from .frontmatter import dump_frontmatter as dump_frontmatter
from .frontmatter import parse_frontmatter as parse_frontmatter
from .parsed import ParsedMarkdown as ParsedMarkdown
from .reader import MarkdownReader as MarkdownReader
from .writer import MarkdownWriter as MarkdownWriter
__all__ = [
"AgentScopedFrontmatter",
"BaseFrontmatter",
"DailyLogPathMixin",
"Entry",
"EntryId",
"MarkdownReader",
"MarkdownWriter",
"ParsedMarkdown",
"ProfilePathMixin",
"SkillPathMixin",
"StructuredEntry",
"UserScopedFrontmatter",
"dump_frontmatter",
"find_entry",
"parse_frontmatter",
"parse_structured_entry",
"render_structured_entry",
"split_entries",
]

View File

@ -0,0 +1,368 @@
"""Markdown entries — id format, marker spans, and audit-form parsing.
Three closely-related entry concepts live together here so a reader
sees the whole entry surface in one file:
1. :class:`EntryId` — the ``<prefix>_<YYYYMMDD>_<NNNN>`` structured id
stamped into each daily-log entry's open / close markers. Carries
the prefix declared by the frontmatter schema, the date bucket, and
the in-file zero-padded sequence.
2. :class:`Entry` — a marker-delimited span inside a markdown body::
<!-- entry:abc123 -->
...content...
<!-- /entry:abc123 -->
:func:`split_entries` and :func:`find_entry` locate these spans
without interpreting the inner content. Higher layers (writers,
cascade) parse it per record type.
3. :class:`StructuredEntry` — :class:`Entry` extended with the parsed
audit-form body fields (header / inline / sections). Built either
from a raw body string via :func:`parse_structured_entry` or from
an existing :class:`Entry` via :meth:`Entry.as_structured`.
Audit-form layout::
## <header> ← optional H2 (usually entry id, for grep)
**key**: value ← inline fields, one per line
**key2**: value2
### Section Title ← section fields: H3 + free-form text
body content...
### Another Section
more content...
The audit chassis is intentionally **type-agnostic** — every field
round-trips as a string. Inline values are stringified on render
(lists become ``[a, b, c]``, scalars use ``str()``); on parse
everything is the raw text after the colon. Section titles are kept
verbatim. This keeps parsing tolerant of stray fields, wrapped
strings, and manually-typed timestamps; the strong-typed model lives
in business writers + the SQLite/LanceDB indexes.
Cross-user uniqueness is handled at the database layer via a composite
``<user_id>_<entry_id>`` field; it is *not* encoded into the
:class:`EntryId` string itself.
"""
from __future__ import annotations
import datetime as _dt
import re
from collections.abc import Mapping
from dataclasses import dataclass, field
from typing import Self
# ── EntryId — structured id for marker stamping ─────────────────────────
_DATE_FMT = "%Y%m%d"
_SEQ_DIGITS = 8
"""Minimum zero-padding for the in-file seq.
8 digits keeps lexicographic order == numeric order up to 10**8
entries per file (per user, per day). ``format()`` is "at least 8"
larger seqs emit more digits without truncation. ``parse`` is
permissive: shorter (legacy 4-digit) and longer seq strings both
parse cleanly; format normalises to >= 8 digits on round-trip.
"""
@dataclass(frozen=True, slots=True)
class EntryId:
"""Parsed components of an entry id (``<prefix>_<YYYYMMDD>_<NNNN>``)."""
prefix: str
date: _dt.date
seq: int
def format(self) -> str:
"""Render as ``<prefix>_<YYYYMMDD>_<NNNN>``."""
return (
f"{self.prefix}_{self.date.strftime(_DATE_FMT)}_{self.seq:0{_SEQ_DIGITS}d}"
)
def __str__(self) -> str: # noqa: D401
return self.format()
@classmethod
def parse(cls, s: str) -> Self:
"""Parse ``<prefix>_<YYYYMMDD>_<NNNN>``.
Uses ``rsplit("_", 2)`` so a multi-segment prefix (rare, but
possible) is preserved as-is.
"""
parts = s.rsplit("_", 2)
if len(parts) != 3:
raise ValueError(f"invalid entry id format: {s!r}")
prefix, date_str, seq_str = parts
if not prefix:
raise ValueError(f"empty prefix in entry id: {s!r}")
try:
d = _dt.datetime.strptime(date_str, _DATE_FMT).date()
except ValueError as exc:
raise ValueError(f"invalid date in entry id: {s!r}") from exc
try:
seq = int(seq_str)
except ValueError as exc:
raise ValueError(f"invalid seq in entry id: {s!r}") from exc
if seq < 0:
raise ValueError(f"negative seq in entry id: {s!r}")
return cls(prefix=prefix, date=d, seq=seq)
@classmethod
def next_for(cls, prefix: str, date: _dt.date, current_count: int) -> Self:
"""Build the id for the next entry given the file's current count.
``current_count`` is the value of ``frontmatter.entry_count``
*before* this append. The new id gets ``seq = current_count + 1``.
"""
if current_count < 0:
raise ValueError(f"current_count must be >= 0, got {current_count}")
return cls(prefix=prefix, date=date, seq=current_count + 1)
# ── Entry — marker-delimited span inside a body ─────────────────────────
# Filename / URL-safe id alphabet for the marker.
_ID_PATTERN = r"[A-Za-z0-9_-]+"
_OPEN_RE = re.compile(rf"<!-- entry:({_ID_PATTERN}) -->")
@dataclass(frozen=True)
class Entry:
"""One marker-delimited entry within a markdown body.
Attributes:
id: Value between ``entry:`` and ``-->`` in the open marker.
body: Content between the open and close markers, with one leading
and one trailing newline removed (typical formatter output).
start: Offset of the opening ``<!-- entry:id -->`` in the source body.
end: Offset just past the closing ``<!-- /entry:id -->`` in the source.
"""
id: str
body: str
start: int
end: int
def as_structured(self) -> StructuredEntry:
"""Parse my body as audit-form and return a :class:`StructuredEntry`.
The id / body / start / end fields are preserved; the parsed
``header`` / ``inline`` / ``sections`` are added on top.
"""
return parse_structured_entry(self.body, _origin=self)
def split_entries(body: str) -> list[Entry]:
"""Scan ``body`` and return every entry in order.
Unmatched / unterminated open markers stop the scan at the first
such marker — partial entries are not returned. Callers needing
strict validation should layer a dedicated check on top.
"""
entries: list[Entry] = []
pos = 0
while True:
open_match = _OPEN_RE.search(body, pos)
if open_match is None:
break
entry_id = open_match.group(1)
close_match = _close_re_for(entry_id).search(body, open_match.end())
if close_match is None:
# Unterminated entry — abort further scanning.
break
entries.append(
Entry(
id=entry_id,
body=_strip_one_newline(body[open_match.end() : close_match.start()]),
start=open_match.start(),
end=close_match.end(),
)
)
pos = close_match.end()
return entries
def find_entry(body: str, entry_id: str) -> Entry | None:
"""Find the first entry with ``entry_id``, or ``None``."""
open_re = re.compile(rf"<!-- entry:{re.escape(entry_id)} -->")
open_match = open_re.search(body)
if open_match is None:
return None
close_match = _close_re_for(entry_id).search(body, open_match.end())
if close_match is None:
return None
return Entry(
id=entry_id,
body=_strip_one_newline(body[open_match.end() : close_match.start()]),
start=open_match.start(),
end=close_match.end(),
)
def _close_re_for(entry_id: str) -> re.Pattern[str]:
"""Build the close-marker regex for a specific id."""
return re.compile(rf"<!-- /entry:{re.escape(entry_id)} -->")
def _strip_one_newline(text: str) -> str:
"""Strip one leading and one trailing newline (typical formatter padding)."""
if text.startswith("\r\n"):
text = text[2:]
elif text.startswith("\n"):
text = text[1:]
if text.endswith("\r\n"):
text = text[:-2]
elif text.endswith("\n"):
text = text[:-1]
return text
# ── StructuredEntry — Entry + parsed audit-form fields ──────────────────
# H2 line: ``## <header>``.
_H2_RE = re.compile(r"^##\s+(.+?)\s*$", re.MULTILINE)
# Inline field: ``**key**: value``. Anchored to line start so a stray
# ``**emphasis**`` mid-paragraph isn't mistaken for a field.
_INLINE_RE = re.compile(
r"^\*\*(?P<key>[^*\n]+?)\*\*:\s*(?P<value>.*?)\s*$",
re.MULTILINE,
)
# H3 line: ``### Title``.
_H3_RE = re.compile(r"^###\s+(.+?)\s*$", re.MULTILINE)
@dataclass(frozen=True)
class StructuredEntry(Entry):
""":class:`Entry` whose body has been parsed as audit-form data.
Inherits ``id`` / ``body`` / ``start`` / ``end`` from :class:`Entry`
(zeroed when built from a raw body string with no marker context)
and adds three parsed views of the body: the optional H2 header,
the inline ``**key**: value`` map, and the ``### Title`` sections.
Audit-form values are strings only; type coercion is the caller's
job (a strong-typed model lives in the writer / index).
"""
header: str | None = None
inline: dict[str, str] = field(default_factory=dict)
sections: dict[str, str] = field(default_factory=dict)
def render_structured_entry(
*,
header: str | None = None,
inline: Mapping[str, object] | None = None,
sections: Mapping[str, str] | None = None,
) -> str:
"""Render an audit-form entry body.
Args:
header: Optional H2 line at the top (typically the entry id —
redundant with the marker but useful for plain-text grep).
inline: ``{key: value}`` rendered as ``**key**: value``. Values
are stringified: ``list``/``tuple`` become ``[a, b, c]``;
``None`` becomes the empty string; everything else uses
``str()``.
sections: ``{title: body}`` rendered as ``### Title`` plus the
body text. Title is verbatim; body's trailing whitespace is
stripped.
Returns:
The rendered string, no trailing newline (the caller — typically
:meth:`MarkdownWriter.append_entry` — handles markers + newlines).
"""
inline = inline or {}
sections = sections or {}
lines: list[str] = []
if header:
lines.append(f"## {header}")
lines.append("")
for key, value in inline.items():
lines.append(f"**{key}**: {_render_value(value)}")
for title, body in sections.items():
lines.append("")
lines.append(f"### {title}")
lines.append(body.rstrip())
return "\n".join(lines)
def parse_structured_entry(
body: str, *, _origin: Entry | None = None
) -> StructuredEntry:
"""Parse an audit-form entry body. Strings only — no type coercion.
Tolerant of:
- missing H2 (``header`` will be ``None``)
- inline fields appearing before, between or after sections
(only matches before the first H3 are taken as the inline block)
- extra whitespace and stray lines (silently kept inside the
enclosing section's body)
When called via :meth:`Entry.as_structured`, the ``_origin`` Entry
contributes its ``id`` / ``start`` / ``end``; otherwise those fall
back to ``""`` / ``0`` / ``len(body)``.
Returns:
:class:`StructuredEntry` with everything as strings.
"""
text = body.strip("\n")
# Split on H3 lines.
parts = _H3_RE.split(text)
head = parts[0]
sections_dict: dict[str, str] = {}
for i in range(1, len(parts), 2):
title = parts[i].strip()
content = parts[i + 1] if i + 1 < len(parts) else ""
sections_dict[title] = content.strip("\n").rstrip()
header: str | None = None
h2 = _H2_RE.search(head)
if h2:
header = h2.group(1).strip()
inline_dict: dict[str, str] = {
m.group("key").strip(): m.group("value").strip()
for m in _INLINE_RE.finditer(head)
}
if _origin is not None:
return StructuredEntry(
id=_origin.id,
body=_origin.body,
start=_origin.start,
end=_origin.end,
header=header,
inline=inline_dict,
sections=sections_dict,
)
return StructuredEntry(
id="",
body=body,
start=0,
end=len(body),
header=header,
inline=inline_dict,
sections=sections_dict,
)
def _render_value(value: object) -> str:
"""Stringify an inline value the audit-friendly way."""
if value is None:
return ""
if isinstance(value, list | tuple):
return "[" + ", ".join(str(item) for item in value) + "]"
return str(value)

View File

@ -0,0 +1,300 @@
"""Frontmatter — YAML block parse / dump + L1 schema chassis.
Frontmatter is the leading ``---``-delimited YAML block at the top of
a markdown document::
---
title: Hello
tags: [a, b]
---
# Body starts here
Two complementary surfaces live here:
1. :func:`parse_frontmatter` / :func:`dump_frontmatter` — schema-free
YAML helpers (``yaml.safe_load`` / ``yaml.safe_dump``,
``sort_keys=False`` so caller-controlled key order is preserved).
2. The L1 chassis classes — :class:`BaseFrontmatter`,
:class:`UserScopedFrontmatter`, :class:`AgentScopedFrontmatter` —
which fix the *absolute-readonly* fields (``id`` / ``type`` /
``schema_version``) plus scope (``user_id`` / ``agent_id`` +
``track``). Every business frontmatter schema in
``infra/persistence/markdown/mds/`` subclasses one of these.
Concrete business schemas (``UserMemcellDailyFrontmatter``,
``SkillFrontmatter``, …) live in ``infra``; they add per-record
business fields plus the path-resolution metadata daily-log writers
need (``ENTRY_ID_PREFIX`` / ``DIR_NAME`` / ``FILE_PREFIX``).
"""
from __future__ import annotations
from collections.abc import Mapping
from typing import Any, ClassVar, Literal
import yaml
from pydantic import BaseModel, ConfigDict
# ── YAML helpers ────────────────────────────────────────────────────────
_DELIM = "---"
def parse_frontmatter(text: str) -> tuple[dict[str, Any], str]:
"""Parse a leading ``---\\n...\\n---\\n`` YAML block.
Returns:
(meta, remainder): ``meta`` is the parsed YAML mapping (empty dict
if no frontmatter present, malformed, or non-mapping). ``remainder``
is everything after the closing delimiter line — including the body's
leading content as-is.
Notes:
- If the document does not start with ``---``, returns ``({}, text)``
unchanged.
- If a closing ``---`` line is not found, returns ``({}, text)``.
- If the YAML block is empty (``---\\n---\\n``), returns
``({}, remainder)``.
- If the parsed YAML is not a mapping (e.g. a scalar list), returns
``({}, text)`` — frontmatter must be a mapping.
"""
if not text.startswith(_DELIM):
return {}, text
# Skip the opening "---" and the newline that must follow it.
rest = text[len(_DELIM) :]
if rest.startswith("\r\n"):
rest = rest[2:]
elif rest.startswith("\n"):
rest = rest[1:]
else:
# Opening "---" not followed by a newline → not a valid frontmatter.
return {}, text
closing_idx = _find_closing_delim(rest)
if closing_idx is None:
return {}, text
yaml_block = rest[:closing_idx]
remainder = rest[closing_idx + len(_DELIM) :]
# Drop the newline that follows the closing delimiter, if any.
if remainder.startswith("\r\n"):
remainder = remainder[2:]
elif remainder.startswith("\n"):
remainder = remainder[1:]
parsed: Any = yaml.safe_load(yaml_block) if yaml_block.strip() else {}
if parsed is None:
parsed = {}
if not isinstance(parsed, dict):
return {}, text
return parsed, remainder
def dump_frontmatter(meta: Mapping[str, Any]) -> str:
"""Render a mapping as a ``---\\n<yaml>\\n---\\n`` block.
An empty mapping yields the empty string (no delimiters). The YAML
payload preserves caller-supplied key order (``sort_keys=False``).
"""
if not meta:
return ""
yaml_block = yaml.safe_dump(
dict(meta),
sort_keys=False,
allow_unicode=True,
default_flow_style=False,
)
return f"{_DELIM}\n{yaml_block}{_DELIM}\n"
def _find_closing_delim(text: str) -> int | None:
"""Find the offset of a line that is exactly ``---``.
A "line" is text between two newlines (or string boundaries).
Returns the offset of the first character of the matching line, or
``None`` if no such line exists.
"""
pos = 0
while pos < len(text):
nl = text.find("\n", pos)
line = text[pos:nl] if nl != -1 else text[pos:]
if line.rstrip("\r") == _DELIM:
return pos
if nl == -1:
return None
pos = nl + 1
return None
# ── L1 schema chassis ───────────────────────────────────────────────────
class BaseFrontmatter(BaseModel):
"""L1 fields every markdown frontmatter must carry.
These match the *absolute-readonly* tier in the EverOS Markdown First
spec — they identify the record across markdown ↔ LanceDB and must
never be rewritten by a human edit.
Subclasses add scope (``UserScopedFrontmatter`` /
``AgentScopedFrontmatter``) plus per-record business fields.
"""
SCOPE_DIR: ClassVar[str] = ""
"""Top-level directory under the memory-root that holds this kind.
Scope mixins set this to ``"users"`` / ``"agents"``. Scope-agnostic
schemas (rare) leave it empty; consumers that need to resolve a path
(writers, layout reverse-lookup) must reject schemas with empty
``SCOPE_DIR``.
"""
id: str
type: str
schema_version: int = 1
# Permit additional fields so L2 system-managed metadata
# (``md_sha256``, ``last_indexed_at``, ``lsn``, …) can ride along on
# the same model without forcing every subclass to redeclare them.
model_config = ConfigDict(extra="allow")
@classmethod
def path_glob(cls) -> str:
"""Return an ``fnmatch``-style glob (relative to memory-root)
covering every markdown file this schema describes.
Used by the cascade kind registry — the scanner walks every kind's
``path_glob()`` to enumerate eligible files without hard-coding
path patterns in cascade. The schema is the single source of truth
for both the writer's path resolution and the scanner's enumeration.
Subclasses must override — typically by mixing in
:class:`DailyLogPathMixin` or :class:`SkillPathMixin` *before* the
scope mixin in the MRO so this abstract version is shadowed.
"""
raise NotImplementedError(
f"{cls.__name__} must declare path_glob() "
f"(mix in DailyLogPathMixin / SkillPathMixin, or override directly)"
)
class DailyLogPathMixin:
"""Path strategy for daily-log files.
Files live at ``<SCOPE_DIR>/<scope_id>/<DIR_NAME>/<FILE_PREFIX>-<YYYY-MM-DD>.md``.
Subclasses must inherit a scope mixin (``UserScopedFrontmatter`` /
``AgentScopedFrontmatter``) supplying ``SCOPE_DIR``, and must declare
their own ``DIR_NAME`` / ``FILE_PREFIX`` ClassVars.
Place **this mixin first** so Python's MRO resolves ``path_glob()`` to
the mixin's concrete implementation rather than
:meth:`BaseFrontmatter.path_glob`'s ``NotImplementedError`` stub::
class EpisodeDailyFrontmatter(DailyLogPathMixin, UserScopedFrontmatter):
DIR_NAME: ClassVar[str] = "episodes"
FILE_PREFIX: ClassVar[str] = "episode"
...
"""
DIR_NAME: ClassVar[str]
FILE_PREFIX: ClassVar[str]
SCOPE_DIR: ClassVar[str]
@classmethod
def path_glob(cls) -> str:
# Leading ``*/*/`` matches the <app>/<project> scope prefix that
# precedes every user-visible dir; the scanner's ``root.glob`` is
# anchored at root, so the prefix is mandatory (without it nothing
# matches), and the watcher's right-anchored ``PurePosixPath.match``
# agrees on the same shape.
return f"*/*/{cls.SCOPE_DIR}/*/{cls.DIR_NAME}/{cls.FILE_PREFIX}-*.md"
class SkillPathMixin:
"""Path strategy for skill-directory files.
Each skill lives at ``<SCOPE_DIR>/<scope_id>/<SKILLS_CONTAINER_NAME>/
<SKILL_DIR_PREFIX><skill_name>/<SKILL_MAIN_FILENAME>``. The glob covers
every skill's main file; sibling ``references/*.md`` and ``scripts/*``
are excluded (they ride alongside the main file and the cascade
daemon rebuilds the index column by concatenation, see
:class:`AgentSkillFrontmatter`'s docstring).
Place **this mixin first** so MRO resolves ``path_glob()`` here::
class AgentSkillFrontmatter(SkillPathMixin, AgentScopedFrontmatter):
SKILLS_CONTAINER_NAME: ClassVar[str] = "skills"
SKILL_DIR_PREFIX: ClassVar[str] = "skill_"
SKILL_MAIN_FILENAME: ClassVar[str] = "SKILL.md"
...
"""
SKILLS_CONTAINER_NAME: ClassVar[str]
SKILL_DIR_PREFIX: ClassVar[str]
SKILL_MAIN_FILENAME: ClassVar[str]
SCOPE_DIR: ClassVar[str]
@classmethod
def path_glob(cls) -> str:
# Leading ``*/*/`` matches the <app>/<project> scope prefix.
return (
f"*/*/{cls.SCOPE_DIR}/*/{cls.SKILLS_CONTAINER_NAME}/"
f"{cls.SKILL_DIR_PREFIX}*/{cls.SKILL_MAIN_FILENAME}"
)
class ProfilePathMixin:
"""Path strategy for single-file profile markdown.
Profiles live at ``<SCOPE_DIR>/<scope_id>/<PROFILE_FILENAME>`` —
one fixed-name file directly under the scope's owner directory, no
intermediate ``<dir>/`` segment (unlike daily-logs) and no per-name
subdir (unlike skills). Subclasses must inherit a scope mixin
(``UserScopedFrontmatter`` / ``AgentScopedFrontmatter``) supplying
``SCOPE_DIR`` and declare their own ``PROFILE_FILENAME``.
Place **this mixin first** so MRO resolves ``path_glob()`` here::
class UserProfileFrontmatter(ProfilePathMixin, UserScopedFrontmatter):
PROFILE_FILENAME: ClassVar[str] = "user.md"
...
"""
PROFILE_FILENAME: ClassVar[str]
SCOPE_DIR: ClassVar[str]
@classmethod
def path_glob(cls) -> str:
# Leading ``*/*/`` matches the <app>/<project> scope prefix.
return f"*/*/{cls.SCOPE_DIR}/*/{cls.PROFILE_FILENAME}"
class UserScopedFrontmatter(BaseFrontmatter):
"""Records that belong to a single user (track = ``user``).
The frontmatter only carries the *file-level* scope (``user_id``,
which the path itself already expresses); business attributes like
``group_id`` live inside each entry's structured body — see
:class:`StructuredEntry` in :mod:`.entries`.
"""
SCOPE_DIR: ClassVar[str] = "users"
user_id: str
track: Literal["user"] = "user"
class AgentScopedFrontmatter(BaseFrontmatter):
"""Records that belong to a single agent (track = ``agent``).
Same scope-vs-business split as :class:`UserScopedFrontmatter`:
``agent_id`` is the file-level scope; ``group_id`` etc. ride on
each entry, not on the file frontmatter.
"""
SCOPE_DIR: ClassVar[str] = "agents"
agent_id: str
track: Literal["agent"] = "agent"

View File

@ -0,0 +1,31 @@
"""Parsed-markdown data type.
The output shape of :class:`MarkdownReader` is held here, separate
from the reader implementation: callers that only consume parse
results don't need to import the reader machinery, and downstream
modules (writer, business readers) can produce :class:`ParsedMarkdown`
without going through ``MarkdownReader.read`` if they already hold
the pieces.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any
from .entries import Entry
@dataclass(frozen=True)
class ParsedMarkdown:
"""A markdown document after parsing.
Attributes:
frontmatter: Parsed YAML mapping (empty dict if no frontmatter block).
body: Document text after the frontmatter block; not entry-stripped.
entries: Marker-delimited entries discovered inside ``body``.
"""
frontmatter: dict[str, Any]
body: str
entries: list[Entry] = field(default_factory=list)

View File

@ -0,0 +1,42 @@
"""Markdown file reader.
Loads a markdown document and splits it into:
1. ``frontmatter`` — parsed YAML (empty dict if absent)
2. ``body`` — raw text after the closing ``---`` delimiter
3. ``entries`` — marker-delimited spans inside ``body``
The reader is purely parsing; it does not validate frontmatter shape,
entry content, or cross-references. Higher layers add business-aware
checks. The :class:`ParsedMarkdown` data type lives in :mod:`.parsed`.
``parse`` is sync (pure in-memory string processing). ``read`` is async
and uses :class:`anyio.Path` so file I/O does not block the event loop.
"""
from __future__ import annotations
from pathlib import Path
import anyio
from .entries import split_entries
from .frontmatter import parse_frontmatter
from .parsed import ParsedMarkdown
class MarkdownReader:
"""Parse markdown files / strings into :class:`ParsedMarkdown`."""
@staticmethod
def parse(text: str) -> ParsedMarkdown:
"""Parse already-loaded text (no IO)."""
meta, body = parse_frontmatter(text)
entries = split_entries(body)
return ParsedMarkdown(frontmatter=meta, body=body, entries=entries)
@staticmethod
async def read(path: Path) -> ParsedMarkdown:
"""Read the file at ``path`` and parse its content."""
text = await anyio.Path(path).read_text(encoding="utf-8")
return MarkdownReader.parse(text)

View File

@ -0,0 +1,269 @@
"""Markdown file writer with atomic write semantics.
Atomicity is provided by writing to a same-directory temp file
(``.<name>.tmp.<uuid>``) and using :func:`os.replace` to rename it onto
the target. Keeping the temp file in the same directory guarantees the
rename is on the same filesystem (POSIX rename is atomic only within a
single fs).
All public methods are async. File I/O (``read_text`` / ``write_text``
/ ``mkdir``) goes through :class:`anyio.Path`; the few syscalls without
a native async equivalent (``os.fsync`` / ``os.replace`` / ``unlink``
in the cleanup path) are offloaded via :func:`anyio.to_thread.run_sync`.
In-process per-path locking
---------------------------
:meth:`append_entry` / :meth:`append_entries` are read-modify-write of
the whole file (load frontmatter+body, merge an entry block, atomic
write the result). The atomic write itself is safe, but the read→write
window crosses ``await`` points. Concurrent asyncio tasks targeting the
same path would otherwise lose-update each other (both read N entries,
both produce N+1, second write overwrites the first → 1 entry lost).
To prevent this, an in-process per-path :class:`asyncio.Lock` is held
across the entire read-modify-write sequence. Lock objects live on the
writer instance (not class-level) so they bind to the event loop active
when the writer was constructed — this avoids the
"Lock bound to different loop" failure mode that surfaces when
pytest-asyncio rebuilds the loop between tests but module-level writer
singletons leak Lock objects across boundaries.
Process-level coordination (multi-process writers against the same
memory-root) remains the job of
:func:`everos.core.persistence.locking.memory_root_lock`, which uses
``fcntl.flock``. The two locks compose: per-path async lock serialises
tasks within one process, ``memory_root_lock`` serialises processes
against each other.
"""
from __future__ import annotations
import asyncio
import contextlib
import os
import uuid
from collections.abc import Mapping, Sequence
from pathlib import Path
from typing import Any
import anyio
from ..memory_root import MemoryRoot
from .entries import EntryId
from .frontmatter import dump_frontmatter
from .reader import MarkdownReader
class MarkdownWriter:
"""Atomic writer for markdown files inside a memory-root.
The ``memory_root`` reference is held to enable future enforcement that
targets stay within the configured root; current writes do not depend on
it for the rename itself (same-dir temp file).
"""
def __init__(self, memory_root: MemoryRoot) -> None:
self._memory_root = memory_root
# Per-path async lock registry. ``setdefault`` is GIL-atomic, so
# concurrent callers race only on the dict insert (resolved by
# ``setdefault`` returning the existing value), not on the Lock.
# Plain dict (not WeakValueDictionary): a Lock with pending waiters
# must outlive any task awaiting it; ref-counted GC would race with
# those waiters. See Python bpo-28427 for the WeakValueDictionary
# multithreading hazard that bites the weak-ref approach.
self._path_locks: dict[Path, asyncio.Lock] = {}
@property
def memory_root(self) -> MemoryRoot:
return self._memory_root
def lock_for(self, path: Path) -> asyncio.Lock:
"""Return the per-path lock; create on first use.
Public so that higher-level writers (e.g. :class:`BaseDailyWriter`)
can serialise their own multi-step ``read → compute → write``
sequences against this writer's single-step ``append`` paths.
Pair with :meth:`_append_entries_unlocked` to avoid reentrant
re-acquisition of the same lock from within an already-locked
critical section (``asyncio.Lock`` is *not* reentrant).
"""
# Resolve to an absolute canonical path so aliases (relative vs.
# absolute, symlinks) share the same lock object.
key = Path(path).resolve()
lock = self._path_locks.get(key)
if lock is None:
lock = asyncio.Lock()
self._path_locks[key] = lock
return lock
async def write(self, path: Path, content: str) -> Path:
"""Atomically write ``content`` to ``path``.
Steps:
1. ``mkdir -p`` the parent directory.
2. Write to ``<parent>/.<name>.tmp.<uuid>``.
3. ``flush`` + ``fsync`` the temp file.
4. ``os.replace`` the temp file onto ``path`` (atomic on POSIX).
Returns:
``path`` (resolved as written).
"""
target = Path(path)
await anyio.Path(target.parent).mkdir(parents=True, exist_ok=True)
tmp = target.parent / f".{target.name}.tmp.{uuid.uuid4().hex}"
try:
await anyio.to_thread.run_sync(_write_and_fsync, tmp, content)
await anyio.to_thread.run_sync(os.replace, tmp, target)
except Exception:
# Best-effort cleanup of the staging file on failure.
await _unlink_quiet(tmp)
raise
return target
async def write_markdown(
self,
path: Path,
*,
frontmatter: Mapping[str, Any] | None = None,
body: str = "",
) -> Path:
"""Assemble ``frontmatter`` + ``body`` then atomic-write to ``path``."""
head = dump_frontmatter(frontmatter or {})
return await self.write(path, head + body)
async def append_entry(
self,
path: Path,
*,
entry_body: str,
entry_id: EntryId,
frontmatter_updates: Mapping[str, Any] | None = None,
) -> Path:
"""Append a single entry block to a markdown file, merging frontmatter.
Convenience wrapper around :meth:`append_entries` for single-entry
callers. See that method for full semantics.
Args:
path: Target markdown file. Created if missing.
entry_body: Content between the open and close markers.
One leading and trailing newline are added automatically.
entry_id: The id to stamp on this entry. The caller normally
builds it with :meth:`EntryId.next_for`.
frontmatter_updates: Mapping shallow-merged into existing
frontmatter (later wins). ``None`` skips the merge.
Returns:
``path`` (resolved as written).
"""
return await self.append_entries(
path,
[(entry_body, entry_id)],
frontmatter_updates=frontmatter_updates,
)
async def append_entries(
self,
path: Path,
entries: Sequence[tuple[str, EntryId]],
*,
frontmatter_updates: Mapping[str, Any] | None = None,
) -> Path:
"""Append ``N`` entry blocks in a single locked read-modify-write cycle.
Compared with calling :meth:`append_entry` ``N`` times, this:
* Performs one file read + one file write instead of ``N`` of each
(IO complexity drops from ``O(N²)`` to ``O(N)`` when the file
already holds many entries).
* Holds the per-path lock for one short critical section instead of
``N`` separate acquisitions.
* Updates ``frontmatter`` once at the end (no intermediate
``entry_count`` flapping).
The caller assigns and supplies all :class:`EntryId` values — see
:meth:`append_entry` for the rationale. The order in ``entries`` is
the order the blocks land in the file.
Args:
path: Target markdown file. Created if missing.
entries: ``(entry_body, entry_id)`` pairs to append, in order.
Empty sequence is allowed; the file is still touched for
frontmatter updates if any are supplied.
frontmatter_updates: Mapping shallow-merged into existing
frontmatter once after all entries are appended.
Returns:
``path`` (resolved as written).
"""
target = Path(path)
async with self.lock_for(target):
return await self._append_entries_unlocked(
target,
entries,
frontmatter_updates=frontmatter_updates,
)
async def _append_entries_unlocked(
self,
path: Path,
entries: Sequence[tuple[str, EntryId]],
*,
frontmatter_updates: Mapping[str, Any] | None = None,
) -> Path:
"""Same as :meth:`append_entries` but assumes the caller already
holds :meth:`lock_for` ``(path)``.
For use by higher-level writers that perform a multi-step
``read → compute eid → write`` sequence and need to keep the lock
held across the read and the write. Public ``append_entries`` /
``append_entry`` always wrap this with the lock.
Reentrant re-acquisition is unsafe — ``asyncio.Lock`` is not
reentrant, so calling this without holding the lock yourself
breaks the safety contract.
"""
target = Path(path)
# 1. Load existing markdown (or initialise empty).
if await anyio.Path(target).is_file():
parsed = await MarkdownReader.read(target)
meta: dict[str, Any] = dict(parsed.frontmatter)
body = parsed.body
else:
meta = {}
body = ""
# 2. Shallow-merge frontmatter updates.
if frontmatter_updates:
meta.update(frontmatter_updates)
# 3. Append all entry blocks in order.
if entries:
if body and not body.endswith("\n"):
body += "\n"
appended_blocks: list[str] = []
for entry_body, entry_id in entries:
eid_str = entry_id.format()
appended_blocks.append(
f"<!-- entry:{eid_str} -->\n{entry_body}\n"
f"<!-- /entry:{eid_str} -->\n"
)
body = body + "".join(appended_blocks)
# 4. Atomic write.
return await self.write_markdown(target, frontmatter=meta, body=body)
def _write_and_fsync(tmp: Path, content: str) -> None:
"""Sync helper: write + fsync the staging file. Offloaded to a thread."""
with open(tmp, "w", encoding="utf-8") as fh:
fh.write(content)
fh.flush()
os.fsync(fh.fileno())
async def _unlink_quiet(tmp: Path) -> None:
"""Best-effort unlink — swallow OSError so the original exception wins."""
with contextlib.suppress(OSError):
await anyio.Path(tmp).unlink(missing_ok=True)