Skip to content

Transformers

Conversion registry exposing high-level helpers for assets.

ConverterRegistry

ConverterRegistry()

Registry storing converter strategies.

Source code in src/texsmith/adapters/transformers/__init__.py
24
25
def __init__(self) -> None:
    self._strategies: dict[str, ConverterStrategy] = {}

convert

convert(
    name: str,
    source: Path | str,
    *,
    output_dir: Path,
    **options: Any,
) -> Any

Execute a converter strategy with the provided arguments.

Source code in src/texsmith/adapters/transformers/__init__.py
42
43
44
45
46
47
48
49
50
51
52
def convert(
    self,
    name: str,
    source: Path | str,
    *,
    output_dir: Path,
    **options: Any,
) -> Any:
    """Execute a converter strategy with the provided arguments."""
    strategy = self.get(name)
    return strategy(source, output_dir=output_dir, **options)

get

get(name: str) -> ConverterStrategy

Return a registered converter strategy or raise an execution error.

Source code in src/texsmith/adapters/transformers/__init__.py
31
32
33
34
35
36
def get(self, name: str) -> ConverterStrategy:
    """Return a registered converter strategy or raise an execution error."""
    try:
        return self._strategies[name]
    except KeyError as exc:  # pragma: no cover - defensive
        raise TransformerExecutionError(f"No converter registered for '{name}'") from exc

is_registered

is_registered(name: str) -> bool

Return True when a converter has been registered under the given name.

Source code in src/texsmith/adapters/transformers/__init__.py
38
39
40
def is_registered(self, name: str) -> bool:
    """Return True when a converter has been registered under the given name."""
    return name in self._strategies

register

register(name: str, strategy: ConverterStrategy) -> None

Register a converter strategy under a unique name.

Source code in src/texsmith/adapters/transformers/__init__.py
27
28
29
def register(self, name: str, strategy: ConverterStrategy) -> None:
    """Register a converter strategy under a unique name."""
    self._strategies[name] = strategy

ConverterStrategy

Bases: Protocol

Protocol implemented by concrete converter strategies.

DrawioToPdfStrategy

DrawioToPdfStrategy(
    image: str = "rlespinasse/drawio-desktop-headless",
)

Bases: CachedConversionStrategy

Convert draw.io diagrams using selectable backends (playwright, local, docker).

Source code in src/texsmith/adapters/transformers/strategies.py
1187
1188
1189
1190
1191
1192
1193
def __init__(
    self,
    image: str = "rlespinasse/drawio-desktop-headless",
) -> None:
    super().__init__("drawio")
    self.image = image
    self.export_url = _EXPORT3_URL

MermaidToPdfStrategy

MermaidToPdfStrategy(
    image: str = "minlag/mermaid-cli",
    *,
    default_theme: str = "neutral",
)

Bases: CachedConversionStrategy

Render Mermaid diagrams to PDF using the official CLI image.

Source code in src/texsmith/adapters/transformers/strategies.py
906
907
908
909
910
911
912
913
914
def __init__(
    self,
    image: str = "minlag/mermaid-cli",
    *,
    default_theme: str = "neutral",
) -> None:
    super().__init__("mermaid")
    self.image = image
    self.default_theme = default_theme

drawio2pdf

drawio2pdf(
    source: Path | str, output_dir: Path, **options: Any
) -> Path

Convert draw.io diagrams to PDF.

Source code in src/texsmith/adapters/transformers/__init__.py
86
87
88
def drawio2pdf(source: Path | str, output_dir: Path, **options: Any) -> Path:
    """Convert draw.io diagrams to PDF."""
    return registry.convert("drawio", source, output_dir=output_dir, **options)

fetch_image

fetch_image(
    url: str, output_dir: Path, **options: Any
) -> Path

Fetch a remote image and normalise it to PDF.

Source code in src/texsmith/adapters/transformers/__init__.py
96
97
98
def fetch_image(url: str, output_dir: Path, **options: Any) -> Path:
    """Fetch a remote image and normalise it to PDF."""
    return registry.convert("fetch-image", url, output_dir=output_dir, **options)

get_pdf_page_sizes

get_pdf_page_sizes(
    source: Path | str, **options: Any
) -> dict[str, Any]

Inspect a PDF and return structured metadata.

Source code in src/texsmith/adapters/transformers/__init__.py
101
102
103
104
105
106
def get_pdf_page_sizes(source: Path | str, **options: Any) -> dict[str, Any]:
    """Inspect a PDF and return structured metadata."""
    output_dir = options.pop(
        "output_dir", Path(source).parent if isinstance(source, Path) else Path.cwd()
    )
    return registry.convert("pdf-metadata", source, output_dir=output_dir, **options)

has_converter

has_converter(name: str) -> bool

Return True when a converter strategy is currently registered.

Source code in src/texsmith/adapters/transformers/__init__.py
71
72
73
def has_converter(name: str) -> bool:
    """Return True when a converter strategy is currently registered."""
    return registry.is_registered(name)

image2pdf

image2pdf(
    source: Path | str, output_dir: Path, **options: Any
) -> Path

Convert bitmap images to PDF.

Source code in src/texsmith/adapters/transformers/__init__.py
81
82
83
def image2pdf(source: Path | str, output_dir: Path, **options: Any) -> Path:
    """Convert bitmap images to PDF."""
    return registry.convert("image", source, output_dir=output_dir, **options)

mermaid2pdf

mermaid2pdf(
    source: Path | str, output_dir: Path, **options: Any
) -> Path

Convert Mermaid diagrams to PDF.

Source code in src/texsmith/adapters/transformers/__init__.py
91
92
93
def mermaid2pdf(source: Path | str, output_dir: Path, **options: Any) -> Path:
    """Convert Mermaid diagrams to PDF."""
    return registry.convert("mermaid", source, output_dir=output_dir, **options)

register_converter

register_converter(
    name: str, strategy: ConverterStrategy
) -> None

Expose a helper to register external strategies.

Source code in src/texsmith/adapters/transformers/__init__.py
66
67
68
def register_converter(name: str, strategy: ConverterStrategy) -> None:
    """Expose a helper to register external strategies."""
    registry.register(name, strategy)

svg2pdf

svg2pdf(
    source: Path | str, output_dir: Path, **options: Any
) -> Path

Convert SVG assets to PDF.

Source code in src/texsmith/adapters/transformers/__init__.py
76
77
78
def svg2pdf(source: Path | str, output_dir: Path, **options: Any) -> Path:
    """Convert SVG assets to PDF."""
    return registry.convert("svg", source, output_dir=output_dir, **options)

Primitives used by asset converter strategies.

CachedConversionStrategy

CachedConversionStrategy(
    namespace: str,
    *,
    max_attempts: int = 3,
    backoff: Callable[[int], float] | None = None,
)

Base class that adds caching and retry/backoff policies.

Source code in src/texsmith/adapters/transformers/base.py
38
39
40
41
42
43
44
45
46
47
def __init__(
    self,
    namespace: str,
    *,
    max_attempts: int = 3,
    backoff: Callable[[int], float] | None = None,
) -> None:
    self.namespace = namespace
    self.max_attempts = max_attempts
    self.backoff = backoff or exponential_backoff()

output_suffix

output_suffix(source: Any, options: dict[str, Any]) -> str

Allow subclasses to customise the output suffix.

Source code in src/texsmith/adapters/transformers/base.py
110
111
112
def output_suffix(self, source: Any, options: dict[str, Any]) -> str:
    """Allow subclasses to customise the output suffix."""
    return self.suffix

ConverterStrategy

Bases: Protocol

Protocol implemented by concrete converter strategies.

exponential_backoff

exponential_backoff(
    base_delay: float = 0.5,
    factor: float = 2.0,
    max_delay: float = 5.0,
) -> Callable[[int], float]

Return a simple exponential backoff policy.

Source code in src/texsmith/adapters/transformers/base.py
21
22
23
24
25
26
27
28
29
30
def exponential_backoff(
    base_delay: float = 0.5, factor: float = 2.0, max_delay: float = 5.0
) -> Callable[[int], float]:
    """Return a simple exponential backoff policy."""

    def policy(attempt: int) -> float:
        delay = base_delay * (factor ** (attempt - 1))
        return min(delay, max_delay)

    return policy

Concrete converter strategies with caching and error handling.

DrawioToPdfStrategy

DrawioToPdfStrategy(
    image: str = "rlespinasse/drawio-desktop-headless",
)

Bases: CachedConversionStrategy

Convert draw.io diagrams using selectable backends (playwright, local, docker).

Source code in src/texsmith/adapters/transformers/strategies.py
1187
1188
1189
1190
1191
1192
1193
def __init__(
    self,
    image: str = "rlespinasse/drawio-desktop-headless",
) -> None:
    super().__init__("drawio")
    self.image = image
    self.export_url = _EXPORT3_URL

FetchImageStrategy

FetchImageStrategy(timeout: float = 10.0)

Bases: CachedConversionStrategy

Fetch a remote image, normalise it to PDF, and cache the result.

Source code in src/texsmith/adapters/transformers/strategies.py
373
374
375
def __init__(self, timeout: float = 10.0) -> None:
    super().__init__("fetch-image")
    self.timeout = timeout

ImageToPdfStrategy

ImageToPdfStrategy()

Bases: CachedConversionStrategy

Convert bitmap images to PDF using Pillow.

Source code in src/texsmith/adapters/transformers/strategies.py
322
323
def __init__(self) -> None:
    super().__init__("image")

MermaidToPdfStrategy

MermaidToPdfStrategy(
    image: str = "minlag/mermaid-cli",
    *,
    default_theme: str = "neutral",
)

Bases: CachedConversionStrategy

Render Mermaid diagrams to PDF using the official CLI image.

Source code in src/texsmith/adapters/transformers/strategies.py
906
907
908
909
910
911
912
913
914
def __init__(
    self,
    image: str = "minlag/mermaid-cli",
    *,
    default_theme: str = "neutral",
) -> None:
    super().__init__("mermaid")
    self.image = image
    self.default_theme = default_theme

NotConfiguredStrategy

NotConfiguredStrategy(name: str)

Strategy used to signal that a converter must be provided by the host.

Source code in src/texsmith/adapters/transformers/strategies.py
754
755
def __init__(self, name: str) -> None:
    self.name = name

PdfMetadataStrategy

Inspect PDF files and expose structural metadata.

SvgToPdfStrategy

SvgToPdfStrategy()

Bases: CachedConversionStrategy

Convert inline SVG payloads or files to PDF using CairoSVG.

Source code in src/texsmith/adapters/transformers/strategies.py
231
232
def __init__(self) -> None:
    super().__init__("svg")

Utility helpers shared across transformer strategies.

normalise_pdf_version

normalise_pdf_version(
    pdf_path: Path, *, target_version: str = "1.5"
) -> None

Re-write a PDF so its header advertises the requested version.

Source code in src/texsmith/adapters/transformers/utils.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
def normalise_pdf_version(pdf_path: Path, *, target_version: str = "1.5") -> None:
    """Re-write a PDF so its header advertises the requested version."""
    try:
        import pypdf  # type: ignore[import]
    except ImportError as exc:  # pragma: no cover - optional dependency
        msg = "pypdf is required to normalise PDF artefacts."
        raise TransformerExecutionError(msg) from exc

    try:
        reader = pypdf.PdfReader(str(pdf_path))
    except Exception as exc:  # pragma: no cover - defensive
        msg = f"Failed to read generated PDF '{pdf_path}': {exc}"
        raise TransformerExecutionError(msg) from exc

    # Skip rewriting when already on the desired version.
    header = getattr(reader, "pdf_header", b"")
    if isinstance(header, bytes):
        header_text = header.decode("latin-1", "ignore")
        if header_text.startswith("%PDF-"):
            current_version = header_text[5:].strip()
            if current_version == target_version:
                return

    writer = pypdf.PdfWriter()
    for page in reader.pages:
        writer.add_page(page)
    writer.pdf_header = f"%PDF-{target_version}\n"

    metadata = reader.metadata or {}
    filtered_metadata = {k: v for k, v in metadata.items() if isinstance(v, str)}
    if filtered_metadata:
        writer.add_metadata(filtered_metadata)

    tmp_path = pdf_path.with_suffix(pdf_path.suffix + ".tmp")
    try:
        with tmp_path.open("wb") as handle:
            writer.write(handle)
    except Exception as exc:  # pragma: no cover - defensive
        msg = f"Failed to rewrite PDF '{pdf_path}' to version {target_version}: {exc}"
        raise TransformerExecutionError(msg) from exc

    tmp_path.replace(pdf_path)

points_to_mm

points_to_mm(points: float) -> float

Convert PDF points to millimetres.

Source code in src/texsmith/adapters/transformers/utils.py
10
11
12
def points_to_mm(points: float) -> float:
    """Convert PDF points to millimetres."""
    return points * 25.4 / 72