Skip to content

Codecs

knowledgecomplex.codecs.markdown — YAML-frontmatter + markdown codec.

Implements the :class:~knowledgecomplex.schema.Codec protocol for knowledge complexes where each element is a markdown file with YAML frontmatter (structured metadata) and a markdown body with predefined section headers (prose content).

This follows the pattern used in production knowledge complexes authored in Obsidian — each element is a .md file, the YAML header holds structured attributes, and ## sections hold prose content.

Usage::

from knowledgecomplex.codecs import MarkdownCodec

codec = MarkdownCodec(
    frontmatter_attrs=["name", "author", "abstract"],
    section_attrs=["notes", "methodology"],
)
kc.register_codec("Paper", codec)

# Compile: KC element -> markdown file at its URI
kc.element("paper-1").compile()

# Decompile: markdown file -> KC element attributes
kc.element("paper-1").decompile()

MarkdownCodec

Codec for YAML-frontmatter + markdown files.

Each element maps to a single .md file. Attributes are stored in two places:

  • YAML frontmatter (between --- delimiters): structured metadata fields like name, author, description. These map 1:1 to KC element attributes.

  • Markdown body sections (## Header blocks): prose content like notes or analysis. The section header becomes the attribute name (lowercased, spaces replaced with underscores), and the section body becomes the attribute value.

Parameters:

Name Type Description Default
frontmatter_attrs list[str]

Attribute names stored in the YAML frontmatter.

required
section_attrs list[str]

Attribute names stored as ## Section blocks in the body.

required
Source code in knowledgecomplex/codecs/markdown.py
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
class MarkdownCodec:
    """Codec for YAML-frontmatter + markdown files.

    Each element maps to a single ``.md`` file. Attributes are stored in
    two places:

    - **YAML frontmatter** (between ``---`` delimiters): structured metadata
      fields like ``name``, ``author``, ``description``. These map 1:1 to
      KC element attributes.

    - **Markdown body sections** (``## Header`` blocks): prose content like
      notes or analysis. The section header becomes the attribute name
      (lowercased, spaces replaced with underscores), and the section body
      becomes the attribute value.

    Parameters
    ----------
    frontmatter_attrs : list[str]
        Attribute names stored in the YAML frontmatter.
    section_attrs : list[str]
        Attribute names stored as ``## Section`` blocks in the body.
    """

    def __init__(
        self,
        frontmatter_attrs: list[str],
        section_attrs: list[str],
    ) -> None:
        self.frontmatter_attrs = list(frontmatter_attrs)
        self.section_attrs = list(section_attrs)

    def compile(self, element: dict) -> None:
        """Write an element record to a markdown file at its URI.

        Parameters
        ----------
        element : dict
            Keys: ``id``, ``type``, ``uri``, plus all attribute key-value pairs.
        """
        uri = element["uri"]
        path = Path(uri.replace("file://", ""))
        path.parent.mkdir(parents=True, exist_ok=True)

        # Build YAML frontmatter
        fm: dict[str, Any] = {
            "id": element["id"],
            "type": element["type"],
        }
        for attr in self.frontmatter_attrs:
            if attr in element:
                fm[attr] = element[attr]

        # Build markdown body
        title = element.get("name", element["id"])
        lines = [f"# {title}", ""]
        for attr in self.section_attrs:
            header = attr.replace("_", " ").title()
            content = element.get(attr, "")
            lines.append(f"## {header}")
            lines.append("")
            lines.append(content if content else "(empty)")
            lines.append("")

        # Write file
        fm_str = yaml.dump(fm, default_flow_style=False, sort_keys=False).strip()
        body = "\n".join(lines)
        path.write_text(f"---\n{fm_str}\n---\n\n{body}\n")

    def decompile(self, uri: str) -> dict:
        """Read a markdown file and return attribute key-value pairs.

        Parameters
        ----------
        uri : str
            File URI (``file://`` prefix stripped automatically).

        Returns
        -------
        dict
            Attribute key-value pairs (no ``id``, ``type``, or ``uri``).
        """
        path = Path(uri.replace("file://", ""))
        text = path.read_text()

        # Split frontmatter from body
        fm_match = re.match(r"^---\s*\n(.*?)\n---\s*\n(.*)$", text, re.DOTALL)
        if not fm_match:
            raise ValueError(f"No YAML frontmatter found in {path}")

        fm_raw = fm_match.group(1)
        body = fm_match.group(2)

        # Parse YAML frontmatter
        fm = yaml.safe_load(fm_raw) or {}
        attrs: dict[str, str] = {}
        for attr in self.frontmatter_attrs:
            if attr in fm:
                attrs[attr] = str(fm[attr])

        # Parse ## sections from body
        section_pattern = re.compile(r"^## (.+)$", re.MULTILINE)
        sections = {}
        matches = list(section_pattern.finditer(body))
        for i, m in enumerate(matches):
            header = m.group(1).strip().lower().replace(" ", "_")
            start = m.end()
            end = matches[i + 1].start() if i + 1 < len(matches) else len(body)
            content = body[start:end].strip()
            if content == "(empty)":
                content = ""
            sections[header] = content

        for attr in self.section_attrs:
            if attr in sections:
                attrs[attr] = sections[attr]

        return attrs

compile(element)

Write an element record to a markdown file at its URI.

Parameters:

Name Type Description Default
element dict

Keys: id, type, uri, plus all attribute key-value pairs.

required
Source code in knowledgecomplex/codecs/markdown.py
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
def compile(self, element: dict) -> None:
    """Write an element record to a markdown file at its URI.

    Parameters
    ----------
    element : dict
        Keys: ``id``, ``type``, ``uri``, plus all attribute key-value pairs.
    """
    uri = element["uri"]
    path = Path(uri.replace("file://", ""))
    path.parent.mkdir(parents=True, exist_ok=True)

    # Build YAML frontmatter
    fm: dict[str, Any] = {
        "id": element["id"],
        "type": element["type"],
    }
    for attr in self.frontmatter_attrs:
        if attr in element:
            fm[attr] = element[attr]

    # Build markdown body
    title = element.get("name", element["id"])
    lines = [f"# {title}", ""]
    for attr in self.section_attrs:
        header = attr.replace("_", " ").title()
        content = element.get(attr, "")
        lines.append(f"## {header}")
        lines.append("")
        lines.append(content if content else "(empty)")
        lines.append("")

    # Write file
    fm_str = yaml.dump(fm, default_flow_style=False, sort_keys=False).strip()
    body = "\n".join(lines)
    path.write_text(f"---\n{fm_str}\n---\n\n{body}\n")

decompile(uri)

Read a markdown file and return attribute key-value pairs.

Parameters:

Name Type Description Default
uri str

File URI (file:// prefix stripped automatically).

required

Returns:

Type Description
dict

Attribute key-value pairs (no id, type, or uri).

Source code in knowledgecomplex/codecs/markdown.py
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
def decompile(self, uri: str) -> dict:
    """Read a markdown file and return attribute key-value pairs.

    Parameters
    ----------
    uri : str
        File URI (``file://`` prefix stripped automatically).

    Returns
    -------
    dict
        Attribute key-value pairs (no ``id``, ``type``, or ``uri``).
    """
    path = Path(uri.replace("file://", ""))
    text = path.read_text()

    # Split frontmatter from body
    fm_match = re.match(r"^---\s*\n(.*?)\n---\s*\n(.*)$", text, re.DOTALL)
    if not fm_match:
        raise ValueError(f"No YAML frontmatter found in {path}")

    fm_raw = fm_match.group(1)
    body = fm_match.group(2)

    # Parse YAML frontmatter
    fm = yaml.safe_load(fm_raw) or {}
    attrs: dict[str, str] = {}
    for attr in self.frontmatter_attrs:
        if attr in fm:
            attrs[attr] = str(fm[attr])

    # Parse ## sections from body
    section_pattern = re.compile(r"^## (.+)$", re.MULTILINE)
    sections = {}
    matches = list(section_pattern.finditer(body))
    for i, m in enumerate(matches):
        header = m.group(1).strip().lower().replace(" ", "_")
        start = m.end()
        end = matches[i + 1].start() if i + 1 < len(matches) else len(body)
        content = body[start:end].strip()
        if content == "(empty)":
            content = ""
        sections[header] = content

    for attr in self.section_attrs:
        if attr in sections:
            attrs[attr] = sections[attr]

    return attrs

verify_documents(kc, directory)

Check consistency between KC elements and markdown files on disk.

Verifies:

  • Every element with a URI has a corresponding file.
  • Every .md file in the directory has a corresponding element.
  • Attribute values in files match the KC (via decompile).

Parameters:

Name Type Description Default
kc KnowledgeComplex
required
directory str or Path

Root directory containing the markdown files.

required

Returns:

Type Description
list[str]

Discrepancy messages. Empty list means everything is consistent.

Source code in knowledgecomplex/codecs/markdown.py
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
def verify_documents(
    kc: "KnowledgeComplex",
    directory: str | Path,
) -> list[str]:
    """Check consistency between KC elements and markdown files on disk.

    Verifies:

    - Every element with a URI has a corresponding file.
    - Every ``.md`` file in the directory has a corresponding element.
    - Attribute values in files match the KC (via decompile).

    Parameters
    ----------
    kc : KnowledgeComplex
    directory : str or Path
        Root directory containing the markdown files.

    Returns
    -------
    list[str]
        Discrepancy messages. Empty list means everything is consistent.
    """
    directory = Path(directory)
    issues: list[str] = []

    # Collect URIs from KC elements
    uri_to_id: dict[str, str] = {}
    for eid in kc.element_ids():
        elem = kc.element(eid)
        if elem.uri:
            uri_to_id[elem.uri] = eid
            fpath = Path(elem.uri.replace("file://", ""))
            if not fpath.exists():
                issues.append(f"MISSING FILE: {eid} -> {fpath}")

    # Check for orphan files (in directory but not in KC)
    for md_file in sorted(directory.rglob("*.md")):
        file_uri = f"file://{md_file}"
        if file_uri not in uri_to_id:
            issues.append(f"ORPHAN FILE: {md_file} (no element in KC)")

    # Check attribute consistency
    for uri, eid in sorted(uri_to_id.items()):
        fpath = Path(uri.replace("file://", ""))
        if not fpath.exists():
            continue
        elem = kc.element(eid)
        try:
            codec = kc._resolve_codec(elem.type)
            file_attrs = codec.decompile(uri)
            kc_attrs = elem.attrs
            for key in file_attrs:
                if key in kc_attrs and file_attrs[key] != kc_attrs[key]:
                    issues.append(
                        f"MISMATCH: {eid}.{key} — "
                        f"KC='{kc_attrs[key][:40]}' vs file='{file_attrs[key][:40]}'"
                    )
        except Exception as e:
            issues.append(f"ERROR reading {eid}: {e}")

    return issues