Skip to content

Core

Configuration models used by the renderer.

CommonConfig

build_dir (Path | None)
Base directory for artifacts. Provide an absolute or project-relative path to override the default export root that books inherit when they do not specify one.
save_html (bool)
Persist the intermediate HTML render next to the PDF to aid troubleshooting before compilation.
mermaid_config (Path | None)
Path to a Mermaid configuration file. Point to a .json or .mermaid document to customise diagram rendering.
project_dir (Path | None)
MkDocs project root used to resolve relative paths when copying additional assets.
BCP 47 language code forwarded to for hyphenation, translations, and
metadata localisation.
legacy_latex_accents (bool)
When True, escape accented characters, ligatures, and typographic punctuation using legacy macros. When False, keep Unicode glyphs compatible with LuaLaTeX/XeLaTeX (default).
language (str | None)
BCP 47 language code forwarded to for hyphenation, translations, and metadata localisation.

CoverConfig

name (str)
Identifier of the cover template to apply. The value must match a template declared in the cover bundle.
color (str | None)
Primary colour override applied by the cover template.
logo (str | None)
Project-relative path to a logo asset displayed on the cover.

BookConfig

root (str | None)
Navigation entry treated as the starting point for the book. Use it when the root differs from the first MkDocs page.
title (str | None)
Title displayed on the cover and in output metadata. Falls back to site_name when omitted.
subtitle (str | None)
Optional subtitle appended to the cover and metadata.
author (str | None)
Primary author string rendered in the book metadata.
year (int | None)
Publication year to freeze in the output when site_date is not supplied.
email (str | None)
Contact address printed in the credits.
folder (Path | None)
Output directory for the rendered book. Defaults to a slug of the title when not provided.
frontmatter (list[str])
MkDocs page titles moved before the main matter.
backmatter (list[str])
MkDocs page titles grouped into the appendices.
base_level (int)
Heading offset applied to align section numbering with the template expectations.
copy_files (dict[str, str])
Mapping of glob patterns to destination paths for copying additional assets alongside the book.
index_is_foreword (bool)
Treat the index page as a foreword, typically removing numbering.
drop_title_index (bool)
Suppress the index page heading when it acts as a foreword.
cover (CoverConfig)
Nested configuration controlling the book cover.

LaTeXConfig

enabled (bool)
Toggle generation without discarding configuration.
books (list[BookConfig])
Collection of books to produce, inheriting defaults from CommonConfig.
clean_assets (bool)
Remove stale assets from build_dir to avoid accumulating unused files.

BookConfig

Bases: CommonConfig

Configuration for an individual book.

set_folder

set_folder() -> BookConfig

Populate the output folder from the book title when missing.

Source code in src/texsmith/core/config.py
157
158
159
160
161
162
@model_validator(mode="after")
def set_folder(self) -> BookConfig:
    """Populate the output folder from the book title when missing."""
    if self.folder is None and self.title:
        self.folder = Path(slugify(self.title, separator="-"))
    return self

CommonConfig

Bases: BaseModel

Common configuration propagated to each book.

CoverConfig

Bases: BaseModel

Metadata used to render book covers.

LaTeXConfig

Bases: CommonConfig

Configuration for taken from mkdocs.yml.

add_extra

add_extra(**extra_data: Any) -> None

Allow consumers to attach additional attributes at runtime.

Source code in src/texsmith/core/config.py
188
189
190
191
def add_extra(self, **extra_data: Any) -> None:
    """Allow consumers to attach additional attributes at runtime."""
    for key, value in extra_data.items():
        object.__setattr__(self, key, value)

propagate

propagate() -> LaTeXConfig

Propagate common values to nested book configurations.

Source code in src/texsmith/core/config.py
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
@model_validator(mode="after")
def propagate(self) -> LaTeXConfig:
    """Propagate common values to nested book configurations."""
    to_propagate = (
        "build_dir",
        "mermaid_config",
        "save_html",
        "project_dir",
        "language",
    )
    for book in self.books:
        for key in to_propagate:
            if getattr(book, key) is None:
                setattr(book, key, getattr(self, key))
    return self

Rendering context primitives shared across the pipeline.

AssetRegistry dataclass

AssetRegistry(
    output_root: Path,
    assets_map: MutableMapping[str, Path] = dict(),
    copy_assets: bool = True,
)

Centralised registry for rendered assets.

get

get(key: str) -> Path

Retrieve a previously registered artefact.

Source code in src/texsmith/core/context.py
156
157
158
159
160
161
def get(self, key: str) -> Path:
    """Retrieve a previously registered artefact."""
    try:
        return Path(self.assets_map[key])
    except KeyError as exc:
        raise AssetMissingError(f"Missing asset '{key}'") from exc

items

items() -> Iterable[tuple[str, Path]]

Iterate over registered assets yielding key/path pairs.

Source code in src/texsmith/core/context.py
163
164
165
def items(self) -> Iterable[tuple[str, Path]]:
    """Iterate over registered assets yielding key/path pairs."""
    return ((k, Path(v)) for k, v in self.assets_map.items())

latex_path

latex_path(path: Path | str) -> str

Return a -friendly path for an artefact.

Source code in src/texsmith/core/context.py
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
def latex_path(self, path: Path | str) -> str:
    """Return a LaTeX-friendly path for an artefact."""
    candidate = Path(path)
    if not candidate.is_absolute():
        return candidate.as_posix()

    output_dir = self.output_root.parent
    try:
        reference = candidate.relative_to(output_dir)
    except ValueError:
        try:
            reference = Path(os.path.relpath(candidate, output_dir))
        except ValueError:
            reference = candidate

    return reference.as_posix()

lookup

lookup(key: str) -> Path | None

Return a previously registered artefact when available.

Source code in src/texsmith/core/context.py
151
152
153
154
def lookup(self, key: str) -> Path | None:
    """Return a previously registered artefact when available."""
    stored = self.assets_map.get(key)
    return Path(stored) if stored is not None else None

register

register(key: str, artefact: Path | str) -> Path

Register a generated artefact and return its resolved path.

Source code in src/texsmith/core/context.py
143
144
145
146
147
148
149
def register(self, key: str, artefact: Path | str) -> Path:
    """Register a generated artefact and return its resolved path."""
    path = Path(artefact)
    if not path.is_absolute():
        path = (self.output_root / path).resolve() if self.copy_assets else Path(path)
    self.assets_map[key] = path
    return path

DocumentState dataclass

DocumentState(
    abbreviations: dict[str, str] = dict(),
    acronym_keys: dict[str, str] = dict(),
    acronyms: dict[str, tuple[str, str]] = dict(),
    glossary: dict[str, dict[str, Any]] = dict(),
    snippets: dict[str, dict[str, Any]] = dict(),
    solutions: list[dict[str, Any]] = list(),
    headings: list[dict[str, Any]] = list(),
    exercise_counter: int = 0,
    has_index_entries: bool = False,
    requires_shell_escape: bool = False,
    counters: dict[str, int] = dict(),
    bibliography: dict[str, dict[str, Any]] = dict(),
    citations: list[str] = list(),
    footnotes: dict[str, str] = dict(),
    index_entries: list[tuple[str, ...]] = list(),
    pygments_styles: dict[str, str] = dict(),
    script_usage: list[dict[str, Any]] = list(),
    fallback_summary: list[dict[str, Any]] = list(),
    callouts_used: bool = False,
)

In-memory state accumulated while rendering a document.

add_heading

add_heading(
    *, level: int, text: str, ref: str | None = None
) -> None

Track heading metadata to power table-of-contents generation.

Source code in src/texsmith/core/context.py
103
104
105
def add_heading(self, *, level: int, text: str, ref: str | None = None) -> None:
    """Track heading metadata to power table-of-contents generation."""
    self.headings.append({"level": level, "text": text, "ref": ref})

add_solution

add_solution(solution: dict[str, Any]) -> None

Append a solution block encountered during parsing.

Source code in src/texsmith/core/context.py
 99
100
101
def add_solution(self, solution: dict[str, Any]) -> None:
    """Append a solution block encountered during parsing."""
    self.solutions.append(solution)

next_counter

next_counter(key: str = 'default') -> int

Increment and return the named counter.

Source code in src/texsmith/core/context.py
113
114
115
116
117
def next_counter(self, key: str = "default") -> int:
    """Increment and return the named counter."""
    value = self.counters.get(key, 0) + 1
    self.counters[key] = value
    return value

next_exercise

next_exercise() -> int

Increment and return the exercise counter.

Source code in src/texsmith/core/context.py
107
108
109
110
111
def next_exercise(self) -> int:
    """Increment and return the exercise counter."""
    counter = self.next_counter("exercise")
    self.exercise_counter = counter
    return counter

peek_counter

peek_counter(key: str = 'default') -> int

Return the current value of the named counter without modifying it.

Source code in src/texsmith/core/context.py
119
120
121
def peek_counter(self, key: str = "default") -> int:
    """Return the current value of the named counter without modifying it."""
    return self.counters.get(key, 0)

record_citation

record_citation(key: str) -> None

Track citation keys used throughout the document.

Source code in src/texsmith/core/context.py
127
128
129
130
131
132
def record_citation(self, key: str) -> None:
    """Track citation keys used throughout the document."""
    if key in self._citation_index:
        return
    self._citation_index.add(key)
    self.citations.append(key)

register_snippet

register_snippet(key: str, payload: dict[str, Any]) -> None

Cache snippet metadata to render later in the pipeline.

Source code in src/texsmith/core/context.py
95
96
97
def register_snippet(self, key: str, payload: dict[str, Any]) -> None:
    """Cache snippet metadata to render later in the pipeline."""
    self.snippets[key] = payload

remember_abbreviation

remember_abbreviation(term: str, description: str) -> str

Track abbreviation definitions while ensuring consistency.

Source code in src/texsmith/core/context.py
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
def remember_abbreviation(self, term: str, description: str) -> str:
    """Track abbreviation definitions while ensuring consistency."""
    normalised_term = term.strip()
    normalised_description = description.strip()
    if not normalised_term or not normalised_description:
        return ""

    existing_description = self.abbreviations.get(normalised_term)
    if existing_description is not None:
        if existing_description != normalised_description:
            warnings.warn(
                (
                    f"Inconsistent acronym definition for '{normalised_term}': "
                    f"'{existing_description}' vs '{normalised_description}'"
                ),
                stacklevel=2,
            )
        return self.acronym_keys.get(normalised_term, "")

    key = self._generate_acronym_key(normalised_term)
    self.abbreviations[normalised_term] = normalised_description
    self.acronym_keys[normalised_term] = key
    self.acronyms[key] = (normalised_term, normalised_description)
    return key

remember_acronym

remember_acronym(term: str, description: str) -> str

Register an acronym definition keyed by a normalised identifier.

Source code in src/texsmith/core/context.py
50
51
52
def remember_acronym(self, term: str, description: str) -> str:
    """Register an acronym definition keyed by a normalised identifier."""
    return self.remember_abbreviation(term=term, description=description)

remember_glossary

remember_glossary(key: str, entry: dict[str, Any]) -> None

Record a glossary entry keyed by its identifier.

Source code in src/texsmith/core/context.py
91
92
93
def remember_glossary(self, key: str, entry: dict[str, Any]) -> None:
    """Record a glossary entry keyed by its identifier."""
    self.glossary[key] = entry

reset_counter

reset_counter(key: str) -> None

Clear the named counter if it has been tracked.

Source code in src/texsmith/core/context.py
123
124
125
def reset_counter(self, key: str) -> None:
    """Clear the named counter if it has been tracked."""
    self.counters.pop(key, None)

RenderContext dataclass

RenderContext(
    config: BookConfig,
    formatter: LaTeXFormatter,
    document: Any,
    assets: AssetRegistry,
    state: DocumentState = DocumentState(),
    runtime: dict[str, Any] = dict(),
    phase: RenderPhase | None = None,
)

Shared context passed to every handler during rendering.

attach_runtime

attach_runtime(**runtime: Any) -> None

Attach ad-hoc data visible to handlers for the running phase.

Source code in src/texsmith/core/context.py
211
212
213
214
def attach_runtime(self, **runtime: Any) -> None:
    """Attach ad-hoc data visible to handlers for the running phase."""
    self._persistent_runtime.update(runtime)
    self.runtime.update(runtime)

enter_phase

enter_phase(phase: RenderPhase) -> None

Mark the current phase and reset transient runtime data.

Source code in src/texsmith/core/context.py
205
206
207
208
209
def enter_phase(self, phase: RenderPhase) -> None:
    """Mark the current phase and reset transient runtime data."""
    self.phase = phase
    self.runtime = dict(self._persistent_runtime)
    self._skip_children[phase.value].clear()

is_processed

is_processed(
    node: Any, *, phase: RenderPhase | None = None
) -> bool

Check whether a node has been processed in the given phase.

Source code in src/texsmith/core/context.py
223
224
225
226
227
228
def is_processed(self, node: Any, *, phase: RenderPhase | None = None) -> bool:
    """Check whether a node has been processed in the given phase."""
    label = phase or self.phase
    if label is None:
        return False
    return id(node) in self._processed_nodes[label.value]

mark_processed

mark_processed(
    node: Any, *, phase: RenderPhase | None = None
) -> None

Flag a node as already transformed for the selected phase.

Source code in src/texsmith/core/context.py
216
217
218
219
220
221
def mark_processed(self, node: Any, *, phase: RenderPhase | None = None) -> None:
    """Flag a node as already transformed for the selected phase."""
    label = phase or self.phase
    if label is None:
        return
    self._processed_nodes[label.value].add(id(node))

should_skip_children

should_skip_children(
    node: Any, *, phase: RenderPhase | None = None
) -> bool

Check whether children should be skipped during traversal.

Source code in src/texsmith/core/context.py
237
238
239
240
241
242
def should_skip_children(self, node: Any, *, phase: RenderPhase | None = None) -> bool:
    """Check whether children should be skipped during traversal."""
    label = phase or self.phase
    if label is None:
        return False
    return id(node) in self._skip_children[label.value]

suppress_children

suppress_children(
    node: Any, *, phase: RenderPhase | None = None
) -> None

Prevent traversal of node children for the active phase.

Source code in src/texsmith/core/context.py
230
231
232
233
234
235
def suppress_children(self, node: Any, *, phase: RenderPhase | None = None) -> None:
    """Prevent traversal of node children for the active phase."""
    label = phase or self.phase
    if label is None:
        return
    self._skip_children[label.value].add(id(node))

Context objects used during document conversion.

AssetMapping dataclass

AssetMapping(
    source: Path, target: Path, kind: str | None = None
)

Describe how a source asset should be persisted for generation.

BinderContext dataclass

BinderContext(
    output_dir: Path,
    config: BookConfig,
    strategy: GenerationStrategy,
    language: str,
    slot_requests: dict[str, str],
    template_overrides: dict[str, Any],
    bibliography_map: dict[str, dict[str, Any]] = dict(),
    bibliography_collection: BibliographyCollection
    | None = None,
    template_binding: TemplateBinding | None = None,
    documents: list[Document] = list(),
    bound_segments: dict[
        str, list[SegmentContext]
    ] = dict(),
)

Binder-level context describing template binding and global state.

GenerationStrategy dataclass

GenerationStrategy(
    copy_assets: bool = True,
    convert_assets: bool = False,
    hash_assets: bool = False,
    prefer_inputs: bool = False,
    persist_manifest: bool = False,
)

Rendering strategy toggles shared across conversion workflows.

SegmentContext dataclass

SegmentContext(
    name: str,
    html: str,
    base_level: int,
    metadata: Mapping[str, Any] = dict(),
    bibliography: Mapping[str, Any] = dict(),
    assets: list[AssetMapping] = list(),
    destination: Path | None = None,
)

Represent a fragment destined for insertion into a template slot.

Shared conversion primitives exposed by the core package.

ConversionRequest dataclass

ConversionRequest(
    documents: Sequence[Path] = tuple(),
    bibliography_files: Sequence[Path] = list(),
    front_matter: Mapping[str, Any] | None = None,
    front_matter_path: Path | None = None,
    slot_assignments: Mapping[
        Path, Sequence[SlotAssignment]
    ] = dict(),
    selector: str = "article.md-content__inner",
    full_document: bool = False,
    base_level: int = 0,
    strip_heading_all: bool = False,
    strip_heading_first_document: bool = False,
    promote_title: bool = True,
    suppress_title: bool = False,
    numbered: bool = True,
    markdown_extensions: Sequence[str] = list(),
    template: str | None = None,
    render_dir: Path | None = None,
    template_options: Mapping[str, Any] = dict(),
    embed_fragments: bool = False,
    enable_fragments: Sequence[str] = tuple(),
    disable_fragments: Sequence[str] = tuple(),
    parser: str | None = None,
    disable_fallback_converters: bool = False,
    copy_assets: bool = True,
    convert_assets: bool = False,
    hash_assets: bool = False,
    manifest: bool = False,
    persist_debug_html: bool = False,
    language: str | None = None,
    legacy_latex_accents: bool = False,
    diagrams_backend: str | None = None,
    emitter: DiagnosticEmitter | None = None,
)

Immutable description of conversion inputs and engine settings.

copy

copy() -> ConversionRequest

Create a deep copy to avoid cross-run mutations.

Source code in src/texsmith/core/conversion/models.py
63
64
65
66
67
68
69
70
71
72
def copy(self) -> ConversionRequest:
    """Create a deep copy to avoid cross-run mutations."""
    payload: dict[str, Any] = {}
    for definition in fields(self):
        value = getattr(self, definition.name)
        if definition.name == "emitter":
            payload[definition.name] = value
        else:
            payload[definition.name] = copy.deepcopy(value)
    return ConversionRequest(**payload)

InputKind

Bases: Enum

Supported input modalities handled by the conversion pipeline.

SlotAssignment dataclass

SlotAssignment(
    slot: str, selector: str | None, include_document: bool
)

Directive mapping a document onto a template slot.

UnsupportedInputError

Bases: Exception

Raised when a CLI input argument cannot be processed.

coerce_slot_selector

coerce_slot_selector(payload: Any) -> str | None

Normalise a selector definition coming from front matter.

Source code in src/texsmith/core/conversion/inputs.py
49
50
51
52
53
54
55
56
57
58
59
def coerce_slot_selector(payload: Any) -> str | None:
    """Normalise a selector definition coming from front matter."""
    if isinstance(payload, str):
        candidate = payload.strip()
        return candidate or None
    if isinstance(payload, Mapping):
        for key in ("label", "title", "section"):
            value = payload.get(key)
            if isinstance(value, str) and value.strip():
                return value.strip()
    return None

extract_content

extract_content(html: str, selector: str) -> str

Extract and return the inner HTML for the first element matching selector.

Source code in src/texsmith/core/conversion/inputs.py
405
406
407
408
409
410
411
412
413
414
415
def extract_content(html: str, selector: str) -> str:
    """Extract and return the inner HTML for the first element matching selector."""
    try:
        soup = BeautifulSoup(html, "lxml")
    except FeatureNotFound:
        soup = BeautifulSoup(html, "html.parser")

    element = soup.select_one(selector)
    if element is None:
        raise ValueError(f"Unable to locate content using selector '{selector}'.")
    return element.decode_contents()

extract_front_matter_bibliography

extract_front_matter_bibliography(
    front_matter: Mapping[str, Any] | None,
) -> dict[str, InlineBibliographyEntry]

Return inline bibliography entries declared in the document front matter.

Source code in src/texsmith/core/conversion/inputs.py
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
def extract_front_matter_bibliography(
    front_matter: Mapping[str, Any] | None,
) -> dict[str, InlineBibliographyEntry]:
    """Return inline bibliography entries declared in the document front matter."""
    if not isinstance(front_matter, Mapping):
        return {}

    bibliography: dict[str, InlineBibliographyEntry] = {}
    container = front_matter.get("bibliography")
    if isinstance(container, Mapping):
        for key, value in container.items():
            if not isinstance(key, str):
                continue
            entry = _parse_inline_bibliography_entry(key, value)
            bibliography[key] = entry

    return bibliography

extract_front_matter_slots

extract_front_matter_slots(
    front_matter: Mapping[str, Any],
) -> dict[str, str]

Collect slot overrides defined in document front matter.

Source code in src/texsmith/core/conversion/inputs.py
108
109
110
111
112
113
114
115
def extract_front_matter_slots(front_matter: Mapping[str, Any]) -> dict[str, str]:
    """Collect slot overrides defined in document front matter."""
    overrides: dict[str, str] = {}

    root_slots = front_matter.get("slots") or front_matter.get("entrypoints")
    overrides.update(parse_slot_mapping(root_slots))

    return overrides

parse_slot_mapping

parse_slot_mapping(raw: Any) -> dict[str, str]

Parse slot mappings declared in front matter structures.

Source code in src/texsmith/core/conversion/inputs.py
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
def parse_slot_mapping(raw: Any) -> dict[str, str]:
    """Parse slot mappings declared in front matter structures."""
    overrides: dict[str, str] = {}
    if not raw:
        return overrides

    if isinstance(raw, Mapping):
        for slot_name, payload in raw.items():
            if not isinstance(slot_name, str):
                continue
            selector = coerce_slot_selector(payload)
            if selector:
                key = slot_name.strip()
                if key:
                    overrides[key] = selector
        return overrides

    if isinstance(raw, Iterable) and not isinstance(raw, str | bytes):
        for entry in raw:
            if not isinstance(entry, Mapping):
                continue
            slot_name = entry.get("target") or entry.get("slot")
            if not isinstance(slot_name, str):
                continue
            selector = entry.get("label") or entry.get("title") or entry.get("section")
            selector_value = coerce_slot_selector(selector)
            if not selector_value:
                selector_value = coerce_slot_selector(entry)
            slot_key = slot_name.strip()
            if slot_key and selector_value:
                overrides[slot_key] = selector_value
        return overrides

    if isinstance(raw, str):
        entry = raw.strip()
        if entry and ":" in entry:
            name, selector = entry.split(":", 1)
            name = name.strip()
            selector = selector.strip()
            if name and selector:
                overrides[name] = selector
        return overrides

    return overrides

Diagnostics emitters

texsmith.core.diagnostics defines the DiagnosticEmitter protocol plus a few stock implementations. Pass any emitter into ConversionService, convert_documents, or TemplateSession to intercept warnings, errors, and structured events.

Emitter Description Typical usage
CliEmitter (texsmith.ui.cli.diagnostics) Rich-powered emitter used by the Typer CLI. Respects -v and --debug, paints warnings as panels, and streams structured events to the diagnostics sidebar. Default when running texsmith. Import it in automation scripts when you want human-friendly output.
LoggingEmitter Forwards warning, error, and event calls to the standard logging module. Daemons, notebooks, or services that rely on existing logging policy.
NullEmitter No-op implementation. Useful when you want silent conversions or plan to capture diagnostics out-of-band. Unit tests and benchmarking.

Emitters expose a debug_enabled flag so downstream handlers can decide whether to include stack traces or expensive state dumps. Implement your own to route diagnostics to metrics systems or structured loggers.

Diagnostic abstractions shared across the conversion pipeline.

DiagnosticEmitter

Bases: Protocol

Interface used to surface warnings, errors, and structured events.

LoggingEmitter

LoggingEmitter(
    *,
    logger_obj: Logger | None = None,
    debug_enabled: bool = False,
)

Emitter that forwards diagnostics to the standard logging module.

Source code in src/texsmith/core/diagnostics.py
44
45
46
47
48
def __init__(
    self, *, logger_obj: logging.Logger | None = None, debug_enabled: bool = False
) -> None:
    self._logger = logger_obj or logger
    self.debug_enabled = debug_enabled

NullEmitter

Emitter that ignores every diagnostic.

format_event_message

format_event_message(
    name: str, payload: Mapping[str, Any]
) -> str | None

Return a human-friendly summary for selected diagnostic events.

Source code in src/texsmith/core/diagnostics.py
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
def format_event_message(name: str, payload: Mapping[str, Any]) -> str | None:
    """Return a human-friendly summary for selected diagnostic events."""
    try:
        data = dict(payload)
    except Exception:  # pragma: no cover - defensive
        data = {}

    if name == "asset_fetch":
        url = data.get("url") or "<unknown>"
        convert = data.get("convert")
        suffix_hint = data.get("suffix_hint")
        details: list[str] = []
        if convert:
            details.append("convert")
        if suffix_hint:
            details.append(f"suffix={suffix_hint}")
        suffix = f" ({', '.join(details)})" if details else ""
        return f"Fetching: {url}{suffix}"

    if name == "asset_fetch_cached":
        url = data.get("url") or "<unknown>"
        reason = data.get("reason") or "cache"
        return f"Reusing cached remote image: {url} ({reason})"

    if name == "doi_fetch":
        doi_value = data.get("value") or data.get("doi") or "<unknown>"
        key = data.get("key") or "<unknown>"
        mode = data.get("mode")
        source = data.get("source") or data.get("resolved_source")
        details: list[str] = []
        if mode:
            details.append(str(mode))
        if source:
            details.append(str(source))
        suffix = f" ({', '.join(details)})" if details else ""
        return f"Resolved DOI {doi_value} for entry '{key}'{suffix}"

    return None

Custom exception hierarchy for the rendering pipeline.

AssetMissingError

Bases: LatexRenderingError

Raised when an expected asset cannot be located or generated.

InvalidNodeError

Bases: LatexRenderingError

Raised when a handler receives an unexpected DOM node shape.

LatexRenderingError

Bases: RuntimeError

Base exception for rendering failures.

TransformerExecutionError

Bases: LatexRenderingError

Raised when an external converter fails to execute properly.

exception_hint

exception_hint(exc: BaseException) -> str | None

Return the most specific message available for an exception chain.

Source code in src/texsmith/core/exceptions.py
38
39
40
41
def exception_hint(exc: BaseException) -> str | None:
    """Return the most specific message available for an exception chain."""
    messages = exception_messages(exc)
    return messages[-1] if messages else None

exception_messages

exception_messages(exc: BaseException) -> list[str]

Return the collected message chain for an exception and its causes.

Source code in src/texsmith/core/exceptions.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
def exception_messages(exc: BaseException) -> list[str]:
    """Return the collected message chain for an exception and its causes."""
    messages: list[str] = []
    visited: set[int] = set()
    current: BaseException | None = exc
    while current is not None and id(current) not in visited:
        visited.add(id(current))
        text = str(current).strip()
        if text:
            first_line = text.splitlines()[0].strip()
            if first_line:
                messages.append(first_line)
        current = current.__cause__ or current.__context__
    return messages

Rule declaration and execution engine for the renderer.

This module implements the rule-based architecture that powers Texsmith's HTML-to- pipeline. Handlers declare their intent via the @renders decorator, which records structural metadata (phase, priority, targeted tags). At runtime the :class:RenderEngine collects those declarations, organises them per :class:RenderPhase, and walks the BeautifulSoup DOM ensuring that each pass is executed in a predictable, stable order.

Architecture

Declaration layer
@renders stores a lightweight :class:RuleDefinition on every handler.
Registry layer
:class:RenderRegistry collates definitions into sortable :class:RenderRule instances grouped by phase/tag.
Execution layer
:class:RenderEngine coordinates multi-pass traversal using the private

:class:_DOMVisitor to apply handlers depth-first while respecting auto-marking and child-suppression semantics.

This separation keeps rule authors focused on transformations while the engine handles ordering, deduplication, and orchestration concerns.

RenderEngine

RenderEngine(registry: RenderRegistry | None = None)

Execution engine that orchestrates the registered rules.

Source code in src/texsmith/core/rules.py
304
305
def __init__(self, registry: RenderRegistry | None = None) -> None:
    self.registry = registry or RenderRegistry()

collect_from

collect_from(owner: Any) -> None

Collect decorated callables from an object or module.

Source code in src/texsmith/core/rules.py
307
308
309
310
311
312
313
314
315
def collect_from(self, owner: Any) -> None:
    """Collect decorated callables from an object or module."""
    for attribute in dir(owner):
        handler = getattr(owner, attribute)
        definition = getattr(handler, "__render_rule__", None)
        if definition is None and hasattr(handler, "__func__"):
            definition = getattr(handler.__func__, "__render_rule__", None)
        if isinstance(definition, RuleDefinition):
            self.registry.register(definition.bind(handler))

register

register(handler: RuleCallable) -> None

Register a standalone callable decorated with @renders.

Source code in src/texsmith/core/rules.py
317
318
319
320
321
322
323
def register(self, handler: RuleCallable) -> None:
    """Register a standalone callable decorated with ``@renders``."""
    definition = getattr(handler, "__render_rule__", None)
    if not isinstance(definition, RuleDefinition):
        msg = "Handler must be decorated with @renders"
        raise TypeError(msg)
    self.registry.register(definition.bind(handler))

run

run(root: Tag, context: RenderContext) -> None

Execute all registered rules against the provided DOM root.

Source code in src/texsmith/core/rules.py
325
326
327
328
329
330
331
332
333
334
335
336
def run(self, root: Tag, context: RenderContext) -> None:
    """Execute all registered rules against the provided DOM root."""
    for phase in RenderPhase:
        context.enter_phase(phase)
        phase_rules = self.registry.rules_for_phase(phase)
        document_rules = phase_rules.get(DOCUMENT_NODE, ())

        for rule in document_rules:
            self._execute_rule(rule, root, context)

        visitor = _DOMVisitor(phase, phase_rules, context)
        visitor.walk(root)

RenderPhase

Bases: Enum

Ordered passes executed while mutating the parsed HTML tree.

The renderer performs multiple sweeps over the DOM instead of a single monolithic traversal. Each phase isolates a category of mutations so that earlier transformations stabilise before later ones begin. This drastically reduces coupling between handlers and makes ordering guarantees explicit.

Phases progress from coarse structural edits to fine-grained formatting:

PRE
normalise the tree and discard unwanted nodes before any heavy lifting occurs.
BLOCK
build block-level (paragraphs, lists, figures) once the structure is stable.
INLINE
apply inline formatting after blocks have established their final shape.
POST
run cleanup or bookkeeping steps that depend on previous phases, such as final numbering, synthetic nodes, or state aggregation.

BLOCK class-attribute instance-attribute

BLOCK = auto()

Block transformation pass: convert paragraphs, lists, figures, etc.

INLINE class-attribute instance-attribute

INLINE = auto()

Inline formatting pass: apply emphasis, links, inline math once blocks exist.

POST class-attribute instance-attribute

POST = auto()

Finalisation pass: run cleanup that depends on earlier transformations.

PRE class-attribute instance-attribute

PRE = auto()

DOM normalisation pass: strip/reshape nodes before structural work begins.

RenderRegistry

RenderRegistry()

Container used to gather render rules before execution.

Source code in src/texsmith/core/rules.py
159
160
161
def __init__(self) -> None:
    self._rules: dict[RenderPhase, dict[str, list[RenderRule]]] = {}
    self._rule_sources: dict[int, str] = {}

describe

describe() -> list[dict[str, object]]

Return a serialisable snapshot of the registered rules.

Source code in src/texsmith/core/rules.py
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
def describe(self) -> list[dict[str, object]]:
    """Return a serialisable snapshot of the registered rules."""
    entries: list[dict[str, object]] = []
    for phase in RenderPhase:
        for tag, rules in sorted(self.rules_for_phase(phase).items(), key=lambda item: item[0]):
            for order, rule in enumerate(rules):
                entries.append(
                    {
                        "phase": phase.name,
                        "tag": tag,
                        "name": rule.name,
                        "priority": rule.priority,
                        "before": list(rule.before),
                        "after": list(rule.after),
                        "order": order,
                    }
                )
    return entries

iter_phase

iter_phase(phase: RenderPhase) -> Iterable[RenderRule]

Iterate over rules for the provided phase.

Source code in src/texsmith/core/rules.py
178
179
180
181
182
def iter_phase(self, phase: RenderPhase) -> Iterable[RenderRule]:
    """Iterate over rules for the provided phase."""
    buckets = self._rules.get(phase, {})
    for tag_rules in buckets.values():
        yield from tag_rules

register

register(rule: RenderRule) -> None

Register a rule for later execution.

Source code in src/texsmith/core/rules.py
163
164
165
166
167
168
169
170
171
172
173
174
175
176
def register(self, rule: RenderRule) -> None:
    """Register a rule for later execution."""
    phase_bucket = self._rules.setdefault(rule.phase, {})
    self._rule_sources.setdefault(id(rule), rule.name)
    if rule.applies_to_document():
        tag_bucket = phase_bucket.setdefault(DOCUMENT_NODE, [])
        tag_bucket.append(rule)
        tag_bucket[:] = self._sort_rules(tag_bucket)
        return

    for tag in rule.tags:
        tag_bucket = phase_bucket.setdefault(tag, [])
        tag_bucket.append(rule)
        tag_bucket[:] = self._sort_rules(tag_bucket)

rules_for_phase

rules_for_phase(
    phase: RenderPhase,
) -> dict[str, tuple[RenderRule, ...]]

Return the rule mapping for the requested phase.

Source code in src/texsmith/core/rules.py
184
185
186
187
def rules_for_phase(self, phase: RenderPhase) -> dict[str, tuple[RenderRule, ...]]:
    """Return the rule mapping for the requested phase."""
    phase_bucket = self._rules.get(phase, {})
    return {tag: tuple(rules) for tag, rules in phase_bucket.items()}

RenderRule dataclass

RenderRule(
    priority: int,
    phase: RenderPhase,
    tags: tuple[str, ...],
    name: str,
    handler: RuleCallable,
    auto_mark: bool = True,
    nestable: bool = True,
    after_children: bool = False,
    before: tuple[str, ...] = (),
    after: tuple[str, ...] = (),
)

Concrete rendering rule registered in the engine.

applies_to_document

applies_to_document() -> bool

Return True when the rule targets the synthetic document node.

Source code in src/texsmith/core/rules.py
120
121
122
def applies_to_document(self) -> bool:
    """Return True when the rule targets the synthetic document node."""
    return self.tags == (DOCUMENT_NODE,)

RuleDefinition dataclass

RuleDefinition(
    phase: RenderPhase,
    tags: tuple[str, ...],
    priority: int = 0,
    name: str | None = None,
    auto_mark: bool = True,
    nestable: bool = True,
    after_children: bool = False,
    before: tuple[str, ...] = (),
    after: tuple[str, ...] = (),
)

Descriptor installed on handler callables by the decorator.

bind

bind(handler: RuleCallable) -> RenderRule

Create a concrete rule instance bound to the callable.

Source code in src/texsmith/core/rules.py
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
def bind(self, handler: RuleCallable) -> RenderRule:
    """Create a concrete rule instance bound to the callable."""
    name = self.name or getattr(handler, "__name__", handler.__class__.__name__)
    return RenderRule(
        phase=self.phase,
        tags=self.tags,
        priority=self.priority,
        name=name,
        handler=handler,
        auto_mark=self.auto_mark,
        nestable=self.nestable,
        after_children=self.after_children,
        before=self.before,
        after=self.after,
    )

RuleFactory

Bases: Protocol

Protocol implemented by rule decorators.

Decorators return lightweight factory objects instead of immediately constructing :class:RenderRule instances. This indirection lets us bind metadata once (at decoration time) while deferring handler resolution until the registry collects rules. The factory pattern keeps the decorator API ergonomic, avoids premature instantiation, and allows the same definition to be rebound for different callables (e.g. class/static methods) without duplicating registration logic.

bind

bind(handler: RuleCallable) -> RenderRule

Create a concrete render rule for the decorated handler.

Source code in src/texsmith/core/rules.py
97
98
99
def bind(self, handler: RuleCallable) -> RenderRule:
    """Create a concrete render rule for the decorated handler."""
    ...

renders

renders(
    *tags: str,
    phase: RenderPhase = RenderPhase.BLOCK,
    priority: int = 0,
    name: str | None = None,
    auto_mark: bool = True,
    nestable: bool = True,
    after_children: bool = False,
    before: Iterable[str] = (),
    after: Iterable[str] = (),
) -> Callable[[RuleCallable], RuleCallable]

Decorator used to register element handlers.

Source code in src/texsmith/core/rules.py
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
def renders(
    *tags: str,
    phase: RenderPhase = RenderPhase.BLOCK,
    priority: int = 0,
    name: str | None = None,
    auto_mark: bool = True,
    nestable: bool = True,
    after_children: bool = False,
    before: Iterable[str] = (),
    after: Iterable[str] = (),
) -> Callable[[RuleCallable], RuleCallable]:
    """Decorator used to register element handlers."""
    selected_tags = tags or (DOCUMENT_NODE,)
    definition = RuleDefinition(
        phase=phase,
        tags=tuple(selected_tags),
        priority=priority,
        name=name,
        auto_mark=auto_mark,
        nestable=nestable,
        after_children=after_children,
        before=tuple(before),
        after=tuple(after),
    )

    def decorator(handler: RuleCallable) -> RuleCallable:
        cast(Any, handler).__render_rule__ = definition
        return handler

    return decorator

Utility helpers specific to rendering.

escape_latex_chars

escape_latex_chars(
    text: str, *, legacy_accents: bool = False
) -> str

Escape special characters leveraging pylatexenc.

Source code in src/texsmith/adapters/latex/utils.py
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
def escape_latex_chars(text: str, *, legacy_accents: bool = False) -> str:
    """Escape LaTeX special characters leveraging pylatexenc."""
    if not text:
        return text
    parts: list[str] = []
    buffer: list[str] = []

    def _encode_chunk(chunk: str) -> str:
        escaped = "".join(_BASIC_LATEX_ESCAPE_MAP.get(char, char) for char in chunk)
        if legacy_accents:
            encoded = unicode_to_latex(escaped, non_ascii_only=True, unknown_char_warning=False)
            return _wrap_latex_output(encoded)
        return escaped

    def _should_skip_encoding(char: str) -> bool:
        try:
            name = unicodedata.name(char)
        except ValueError:
            return False
        if "SUPERSCRIPT" in name or "SUBSCRIPT" in name:
            return True
        return "MODIFIER LETTER" in name and ("SMALL" in name or "CAPITAL" in name)

    for char in text:
        if _should_skip_encoding(char):
            if buffer:
                parts.append(_encode_chunk("".join(buffer)))
                buffer.clear()
            parts.append(char)
        else:
            buffer.append(char)

    if buffer:
        parts.append(_encode_chunk("".join(buffer)))

    return "".join(parts)

Abstractions for invoking Docker containers safely.

DockerLimits dataclass

DockerLimits(
    cpus: float | int | None = None,
    memory: str | None = None,
    pids_limit: int | None = None,
)

Runtime constraints for Docker containers.

DockerRunRequest dataclass

DockerRunRequest(
    image: str,
    args: Sequence[str] = tuple(),
    mounts: Sequence[VolumeMount] = tuple(),
    environment: Mapping[str, str] = dict(),
    workdir: str | None = None,
    user: str | None = None,
    use_host_user: bool = True,
    remove: bool = True,
    limits: DockerLimits | None = None,
    network: str | None = None,
    extra_args: Sequence[str] = tuple(),
)

Full request payload for a Docker execution.

DockerRunner

DockerRunner(executable: str | None = None)

Utility class encapsulating Docker invocations.

Source code in src/texsmith/adapters/docker.py
53
54
55
def __init__(self, executable: str | None = None) -> None:
    self._explicit_executable = executable
    self._cached_executable: str | None = None

is_available

is_available() -> bool

Return True when Docker can be located.

Source code in src/texsmith/adapters/docker.py
57
58
59
60
61
62
def is_available(self) -> bool:
    """Return True when Docker can be located."""
    try:
        return self._resolve_executable(optional=True) is not None
    except TransformerExecutionError:
        return False

reset

reset() -> None

Clear cached executable lookup results.

Source code in src/texsmith/adapters/docker.py
64
65
66
def reset(self) -> None:
    """Clear cached executable lookup results."""
    self._cached_executable = None

run

run(
    request: DockerRunRequest,
    *,
    capture_output: bool = True,
    text: bool = True,
) -> subprocess.CompletedProcess[str]

Execute Docker with the supplied request.

Source code in src/texsmith/adapters/docker.py
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
def run(
    self,
    request: DockerRunRequest,
    *,
    capture_output: bool = True,
    text: bool = True,
) -> subprocess.CompletedProcess[str]:
    """Execute Docker with the supplied request."""
    command = self._build_run_command(request)
    try:
        result = subprocess.run(
            command,
            check=False,
            capture_output=capture_output,
            text=text,
        )
    except FileNotFoundError as exc:
        self._cached_executable = None
        raise TransformerExecutionError("Docker executable could not be located.") from exc
    except OSError as exc:
        raise TransformerExecutionError(f"Failed to invoke Docker: {exc}") from exc

    if result.returncode != 0:
        stderr = (result.stderr or "").strip()
        stdout = (result.stdout or "").strip()
        detail = stderr or stdout
        message = f"Docker image '{request.image}' failed with exit code {result.returncode}"
        if detail:
            message = f"{message}: {detail}"
        raise TransformerExecutionError(message)

    return result

VolumeMount dataclass

VolumeMount(
    source: Path | str, target: str, read_only: bool = False
)

Bind mount configuration.

is_docker_available

is_docker_available() -> bool

Check if Docker can be executed.

Source code in src/texsmith/adapters/docker.py
209
210
211
def is_docker_available() -> bool:
    """Check if Docker can be executed."""
    return _default_runner.is_available()

run_container

run_container(
    image: str,
    args: Sequence[str] = (),
    *,
    mounts: Sequence[VolumeMount] = (),
    environment: Mapping[str, str] | None = None,
    workdir: str | None = None,
    user: str | None = None,
    use_host_user: bool = True,
    limits: DockerLimits | None = None,
    network: str | None = None,
    remove: bool = True,
    extra_args: Sequence[str] = (),
    capture_output: bool = True,
    text: bool = True,
) -> subprocess.CompletedProcess[str]

Execute Docker using the shared runner.

Source code in src/texsmith/adapters/docker.py
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
def run_container(
    image: str,
    args: Sequence[str] = (),
    *,
    mounts: Sequence[VolumeMount] = (),
    environment: Mapping[str, str] | None = None,
    workdir: str | None = None,
    user: str | None = None,
    use_host_user: bool = True,
    limits: DockerLimits | None = None,
    network: str | None = None,
    remove: bool = True,
    extra_args: Sequence[str] = (),
    capture_output: bool = True,
    text: bool = True,
) -> subprocess.CompletedProcess[str]:
    """Execute Docker using the shared runner."""
    request = DockerRunRequest(
        image=image,
        args=tuple(args),
        mounts=tuple(mounts),
        environment=environment or {},
        workdir=workdir,
        user=user,
        use_host_user=use_host_user,
        remove=remove,
        limits=limits,
        network=network,
        extra_args=tuple(extra_args),
    )
    return _default_runner.run(
        request,
        capture_output=capture_output,
        text=text,
    )