Skip to content

Markdown Conversion

Markdown conversion utilities for TeXSmith.

MarkdownConversionError

Bases: Exception

Raised when Markdown cannot be converted into HTML.

MarkdownDocument dataclass

MarkdownDocument(html: str, front_matter: dict[str, Any])

Result of converting Markdown into HTML.

deduplicate_markdown_extensions

deduplicate_markdown_extensions(
    values: Iterable[str],
) -> list[str]

Remove duplicate extensions while preserving order and case.

Source code in src/texsmith/adapters/markdown/__init__.py
147
148
149
150
151
152
153
154
155
156
157
158
159
def deduplicate_markdown_extensions(values: Iterable[str]) -> list[str]:
    """Remove duplicate extensions while preserving order and case."""
    seen: set[str] = set()
    result: list[str] = []
    for value in values:
        if not isinstance(value, str):
            continue
        key = value.lower()
        if key in seen:
            continue
        seen.add(key)
        result.append(value)
    return result

normalize_markdown_extensions

normalize_markdown_extensions(
    values: Iterable[str] | str | None,
) -> list[str]

Normalise extension names from CLI-friendly strings into a flat list.

Source code in src/texsmith/adapters/markdown/__init__.py
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
def normalize_markdown_extensions(
    values: Iterable[str] | str | None,
) -> list[str]:
    """Normalise extension names from CLI-friendly strings into a flat list."""
    if values is None:
        return []

    if isinstance(values, str):
        candidates: Iterable[str] = [values]
    else:
        candidates = values

    normalized: list[str] = []
    for value in candidates:
        if not isinstance(value, str):
            continue
        chunks = re.split(r"[,\s\x00]+", value)
        normalized.extend(chunk for chunk in chunks if chunk)
    return normalized

render_markdown

render_markdown(
    source: str,
    extensions: Sequence[str] | None = None,
    *,
    base_path: str | Path | None = None,
) -> MarkdownDocument

Convert Markdown source into HTML while collecting front matter.

Source code in src/texsmith/adapters/markdown/__init__.py
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
def render_markdown(
    source: str,
    extensions: Sequence[str] | None = None,
    *,
    base_path: str | Path | None = None,
) -> MarkdownDocument:
    """Convert Markdown source into HTML while collecting front matter."""
    try:
        import markdown
    except ModuleNotFoundError as exc:  # pragma: no cover - environment dependent
        raise MarkdownConversionError(
            "Python Markdown is required to process Markdown inputs; "
            "install the 'markdown' package."
        ) from exc

    metadata, markdown_body = split_front_matter(source)

    active_extensions = list(extensions or ())
    extensions_key = tuple(active_extensions)

    snippet_enabled = any(
        _normalise_extension_name(extension) == "pymdownx.snippets"
        for extension in active_extensions
    )
    snippet_paths: tuple[str, ...] = ()
    if snippet_enabled and base_path is not None:
        snippet_paths = (str(Path(base_path).resolve()),)

    entry = _resolve_markdown_entry(markdown, extensions_key, snippet_paths)

    resolved_base: Path | None = None
    if base_path is not None:
        try:
            resolved_base = Path(base_path).resolve()
        except OSError:
            resolved_base = Path(base_path)

    try:
        with entry.lock:
            processor = entry.processor
            reset_callback = getattr(processor, "reset", None)
            if callable(reset_callback):
                reset_callback()
            processor.texsmith_mermaid_base_path = (
                str(resolved_base) if resolved_base is not None else None
            )
            html = processor.convert(markdown_body)
    except MarkdownConversionError:
        raise
    except Exception as exc:  # pragma: no cover - library-controlled
        raise MarkdownConversionError(f"Failed to convert Markdown source: {exc}") from exc

    return MarkdownDocument(html=html, front_matter=metadata)

resolve_markdown_extensions

resolve_markdown_extensions(
    requested: Iterable[str] | None,
    disabled: Iterable[str] | None,
) -> list[str]

Return the active Markdown extension list after applying overrides.

Source code in src/texsmith/adapters/markdown/__init__.py
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
def resolve_markdown_extensions(
    requested: Iterable[str] | None,
    disabled: Iterable[str] | None,
) -> list[str]:
    """Return the active Markdown extension list after applying overrides."""
    enabled = normalize_markdown_extensions(requested)
    disabled_normalized = {
        extension.lower() for extension in normalize_markdown_extensions(disabled)
    }

    combined = deduplicate_markdown_extensions(list(DEFAULT_MARKDOWN_EXTENSIONS) + enabled)

    if not disabled_normalized:
        return combined

    return [extension for extension in combined if extension.lower() not in disabled_normalized]

split_front_matter

split_front_matter(
    source: str,
) -> tuple[dict[str, Any], str]

Split YAML front matter from Markdown content, returning metadata and body.

Source code in src/texsmith/adapters/markdown/__init__.py
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
def split_front_matter(source: str) -> tuple[dict[str, Any], str]:
    """Split YAML front matter from Markdown content, returning metadata and body."""
    candidate = source.lstrip("\ufeff")
    prefix_len = len(source) - len(candidate)
    lines = candidate.splitlines()
    if not lines or lines[0].strip() != "---":
        return {}, source

    front_matter_lines: list[str] = []
    closing_index: int | None = None
    for idx, line in enumerate(lines[1:], start=1):
        stripped = line.strip()
        if stripped in {"---", "..."}:
            closing_index = idx
            break
        front_matter_lines.append(line)

    if closing_index is None:
        return {}, source

    raw_block = "\n".join(front_matter_lines)
    try:
        metadata = yaml.safe_load(raw_block) or {}
    except yaml.YAMLError:
        return {}, source

    if not isinstance(metadata, dict):
        metadata = {}

    body_lines = lines[closing_index + 1 :]
    body = "\n".join(body_lines)
    if source.endswith("\n"):
        body += "\n"

    prefix = source[:prefix_len]
    return metadata, prefix + body

Central registry for TeXSmith's bundled Markdown extensions.

ExtensionSpec dataclass

ExtensionSpec(
    slug: str,
    markdown_entry: str,
    renderer_entry: str | None = None,
    mkdocs_entry: str | None = None,
    description: str | None = None,
)

Describe how to import Markdown and renderer hooks for an extension.

iter_entry_points

iter_entry_points() -> Iterable[str]

Yield configured entry points for documentation/debugging.

Source code in src/texsmith/extensions/__init__.py
54
55
56
57
58
59
60
def iter_entry_points(self) -> Iterable[str]:
    """Yield configured entry points for documentation/debugging."""
    yield self.markdown_entry
    if self.renderer_entry:
        yield self.renderer_entry
    if self.mkdocs_entry:
        yield self.mkdocs_entry

available_extensions

available_extensions() -> list[ExtensionSpec]

Return the registered extension specs sorted by slug.

Source code in src/texsmith/extensions/__init__.py
121
122
123
def available_extensions() -> list[ExtensionSpec]:
    """Return the registered extension specs sorted by slug."""
    return [_EXTENSIONS[key] for key in sorted(_EXTENSIONS)]

get_extension_spec

get_extension_spec(name: str) -> ExtensionSpec

Look up the runtime spec for a given extension slug or qualified name.

Source code in src/texsmith/extensions/__init__.py
126
127
128
129
130
131
132
def get_extension_spec(name: str) -> ExtensionSpec:
    """Look up the runtime spec for a given extension slug or qualified name."""
    slug = _normalise_slug(name)
    try:
        return _EXTENSIONS[slug]
    except KeyError as exc:  # pragma: no cover - defensive
        raise KeyError(f"No TeXSmith extension named '{name}'.") from exc

load_markdown_extension

load_markdown_extension(name: str, **config: Any) -> Any

Instantiate a Python-Markdown extension by slug or qualified name.

Source code in src/texsmith/extensions/__init__.py
135
136
137
138
139
def load_markdown_extension(name: str, **config: Any) -> Any:
    """Instantiate a Python-Markdown extension by slug or qualified name."""
    spec = get_extension_spec(name)
    factory: Callable[..., Any] = _load_attribute(spec.markdown_entry)
    return factory(**config)

load_mkdocs_plugin

load_mkdocs_plugin(name: str) -> type[Any] | None

Return the MkDocs plugin class for an extension, if any.

Source code in src/texsmith/extensions/__init__.py
151
152
153
154
155
156
157
def load_mkdocs_plugin(name: str) -> type[Any] | None:
    """Return the MkDocs plugin class for an extension, if any."""
    spec = get_extension_spec(name)
    if not spec.mkdocs_entry:
        return None
    plugin_cls: type[Any] = _load_attribute(spec.mkdocs_entry)
    return plugin_cls

register_all_renderers

register_all_renderers(renderer: object) -> None

Register every available renderer hook on the provided renderer.

Source code in src/texsmith/extensions/__init__.py
142
143
144
145
146
147
148
def register_all_renderers(renderer: object) -> None:
    """Register every available renderer hook on the provided renderer."""
    for spec in _EXTENSIONS.values():
        if not spec.renderer_entry:
            continue
        register: Callable[[object], None] = _load_attribute(spec.renderer_entry)
        register(renderer)

Extension to capture undefined Markdown footnotes.

MissingFootnotesExtension

MissingFootnotesExtension(**kwargs: Any)

Bases: Extension

Detect footnote references lacking explicit definitions.

Source code in src/texsmith/extensions/missing_footnotes.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
def __init__(self, **kwargs: Any) -> None:
    self.config = {
        "element": ["texsmith-missing-footnote", "Tag inserted for missing notes."],
        "text_template": [
            "{id}",
            "Fallback text rendered for missing notes (can reference {id}).",
        ],
        "css_class": ["", "CSS class applied to placeholder nodes."],
        "link_to_list": [
            False,
            "When true, link to the generated footnote list despite the absence.",
        ],
        "data_attribute": [
            "data-footnote-id",
            "Custom attribute storing the missing footnote identifier.",
        ],
    }
    super().__init__(**kwargs)
    self._footnotes_ext: FootnoteExtension | None = None
    self._patched_pattern = False
    self.missing_ids: set[str] = set()

build_placeholder

build_placeholder(
    identifier: str, pattern: Any
) -> ElementTree.Element

Construct the XML placeholder inserted for missing footnotes.

Source code in src/texsmith/extensions/missing_footnotes.py
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
def build_placeholder(self, identifier: str, pattern: Any) -> ElementTree.Element:
    """Construct the XML placeholder inserted for missing footnotes."""
    element_name = self.getConfig("element")
    node = ElementTree.Element(element_name)

    css_class = self.getConfig("css_class")
    if css_class:
        node.set("class", css_class)

    data_attribute = self.getConfig("data_attribute")
    if data_attribute:
        node.set(data_attribute, identifier)

    text = self.getConfig("text_template").format(id=identifier)
    if self.getConfig("link_to_list"):
        footnote_extension = self._get_footnotes_extension(pattern.md)
        separator = footnote_extension.get_separator() if footnote_extension else ":"
        anchor = ElementTree.SubElement(node, "a")
        if css_class:
            anchor.set("class", css_class)
        anchor.set("href", f"#fn{separator}{identifier}")
        anchor.text = text
    else:
        node.text = text

    return node

extendMarkdown

extendMarkdown(md: Any) -> None

Patch the inline footnote processor to capture missing notes.

Source code in src/texsmith/extensions/missing_footnotes.py
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
def extendMarkdown(self, md: Any) -> None:  # noqa: N802 - markdown hook
    """Patch the inline footnote processor to capture missing notes."""
    md.registerExtension(self)
    if self._patched_pattern:
        return

    pattern = self._resolve_footnote_pattern(md)
    if pattern is None:
        raise RuntimeError(
            "MissingFootnotesExtension requires the 'footnotes' extension to be "
            "registered beforehand."
        )

    original_handle = pattern.handleMatch
    extension = self

    def patched_handle(self_pattern: Any, match: Any, data: Any) -> Any:
        result = original_handle(match, data)
        if result and result[0] is not None:
            return result

        footnote_id = match.group(1)
        extension.missing_ids.add(footnote_id)
        node = extension.build_placeholder(footnote_id, self_pattern)
        return node, match.start(0), match.end(0)

    pattern.handleMatch = MethodType(patched_handle, pattern)
    self._patched_pattern = True

reset

reset() -> None

Reset cached state before each Markdown conversion.

Source code in src/texsmith/extensions/missing_footnotes.py
38
39
40
def reset(self) -> None:
    """Reset cached state before each Markdown conversion."""
    self.missing_ids.clear()

makeExtension

makeExtension(**kwargs: Any) -> MissingFootnotesExtension

Entry point exposed to Python-Markdown.

Source code in src/texsmith/extensions/missing_footnotes.py
123
124
125
def makeExtension(**kwargs: Any) -> MissingFootnotesExtension:  # noqa: N802 - markdown hook
    """Entry point exposed to Python-Markdown."""
    return MissingFootnotesExtension(**kwargs)