Examples¶

import pandoc
from pandoc.types import *

Uppercase¶

🚀 Change all text to upper case.

def uppercase(doc):
    for elt in pandoc.iter(doc):
        if isinstance(elt, Str):
            elt[0] = elt[0].upper() # elt: Str(Text)

>>> doc = pandoc.read("Hello world!")
>>> uppercase(doc)
>>> print(pandoc.write(doc).strip())
HELLO WORLD!

De-emphasize¶

🚀 Turn emphasized text into normal text.

def de_emphasize(doc):
    locations = []
    for elt, path in pandoc.iter(doc, path=True):
        if isinstance(elt, Emph):
            holder, index = path[-1]
            locations.append((elt, holder, index))
    # Perform the change in reverse document order 
    # not to invalidate the remaining matches.
    for elt, holder, index in reversed(locations):
        assert isinstance(elt, Emph)
        inlines = elt[0] # elt: Emph([Inline])
        holder[index:index+1] = inlines

>>> doc = pandoc.read("**strong**, *emphasized*, normal")
>>> de_emphasize(doc)
>>> print(pandoc.write(doc).strip())
**strong**, emphasized, normal

This implementation will remove nested layers of emphasis:

>>> doc = pandoc.read("0x _1x *2x*_")
>>> de_emphasize(doc)
>>> print(pandoc.write(doc).strip())
0x 1x 2x

To remove only one layer of emphasis instead (the outer layer), we can filter out all elements that are already emphasized.

from math import inf

def de_emphasize(doc):
    locations = []
    depth = inf
    for elt, path in pandoc.iter(doc, path=True):
        if len(path) <= depth: # not emphasized
            depth = inf
            if isinstance(elt, Emph):
                holder, index = path[-1]
                locations.append((elt, holder, index))
                depth = len(path)
    # Perform the change in reverse document order 
    # not to invalidate the remaining matches.
    for elt, holder, index in reversed(locations):
        assert isinstance(elt, Emph)
        inlines = elt[0] # elt: Emph([Inline])
        holder[index:index+1] = inlines

The behavior with simply emphasized items is unchanged:

>>> doc = pandoc.read("**strong**, *emphasized*, normal")
>>> de_emphasize(doc)
>>> print(pandoc.write(doc).strip())
**strong**, emphasized, normal

but differs for multiply emphasized text:

>>> doc = pandoc.read("0x _1x *2x*_")
>>> de_emphasize(doc)
>>> print(pandoc.write(doc).strip())
0x 1x *2x*

LaTeX theorems¶

🚀 Convert divs tagged as theorems into LaTeX theorems.

First we need to detect this kind of divs:

def is_theorem(elt):
    if isinstance(elt, Div):
        attrs = elt[0] # elt: Div(Attr, [Block])
        classes = attrs[1] # attrs: (Text, [Text], [(Text, Text)])
        if "theorem" in classes:
            return True
    return False

Or equivalenty, with Python 3.10 (or newer), using pattern matching:

def is_theorem(elt):
    match elt:
        case Div((_, classes, _), _) if "theorem" in classes:
            return True
        case _:
            return False

Now we can implement the transformation itself:

def LaTeX(text):
    return RawBlock(Format("latex"), text)

def theoremize(doc):
    for elt in pandoc.iter(doc):
        if is_theorem(elt):
            attr, blocks = elt # elt: Div(Attr, [Block])
            id_ = attr[0] # attrs: (Text, [Text], [(Text, Text)])
            label = r"\label{" + id_ + "}" if id_ else ""
            start_theorem = LaTeX(r'\begin{theorem}' + label)
            end_theorem   = LaTeX(r'\end{theorem}')
            blocks[:] = [start_theorem] + blocks + [end_theorem]

Here are the results:

markdown = r"""
<div id='cauchy-formula' class='theorem'>
$$f(z) = \frac{1}{i2\pi} \int \frac{f(w)}{w-z}\, dw$$
</div>
"""

>>> doc = pandoc.read(markdown)
>>> print(pandoc.write(doc, format="latex")) # doctest: +NORMALIZE_WHITESPACE
\phantomsection\label{cauchy-formula}
\[f(z) = \frac{1}{i2\pi} \int \frac{f(w)}{w-z}\, dw\]
>>> theoremize(doc)
>>> print(pandoc.write(doc, format="latex")) # doctest: +NORMALIZE_WHITESPACE
\phantomsection\label{cauchy-formula}
\begin{theorem}\label{cauchy-formula}
<BLANKLINE>
\[f(z) = \frac{1}{i2\pi} \int \frac{f(w)}{w-z}\, dw\]
<BLANKLINE>
\end{theorem}

Jupyter Notebooks¶

🚀 Transform a markdown document into a Jupyter notebook.

📖 Reference: the notebook file format

Jupyter notebook helpers (building blocks):

import copy
import uuid

def Notebook():
    return {
        "nbformat": 4,
        "nbformat_minor": 5,
        "cells": [],
        "metadata": {},
    }

def CodeCell():
    return {
        "cell_type": "code",
        "source": [],
        "execution_count": None,
        "outputs": [],
        "id": uuid.uuid4().hex,
        "metadata": {},
    }

def MarkdownCell(): 
    return {
        "cell_type": "markdown",
        "source": [],
        "id": uuid.uuid4().hex,
        "metadata": {},
    }

The core transformation code:

def notebookify(doc):
    notebook = Notebook()
    cells = notebook["cells"]
    blocks = doc[1] # doc: Pandoc(Meta, [Block])
    for block in blocks:
        source, cell = None, None
        if isinstance(block, CodeBlock):
            source = block[1] # block: CodeBlock(Attr, Text)
            cell = CodeCell()
        else:
            source = pandoc.write(block).strip()
            cell = MarkdownCell()
        cell["source"] = source.splitlines(keepends=True)
        cells.append(cell)
    return notebook

markdown = """
# Hello world!
Print `Hello world!`:

    >>> print("Hello world!")
"""
doc = pandoc.read(markdown)

>>> doc
Pandoc(Meta({}), [Header(1, ('hello-world', [], []), [Str('Hello'), Space(), Str('world!')]), Para([Str('Print'), Space(), Code(('', [], []), 'Hello world!'), Str(':')]), CodeBlock(('', [], []), '>>> print("Hello world!")')])
>>> ipynb = notebookify(doc)
>>> import pprint
>>> pprint.pprint(ipynb) # doctest: +ELLIPSIS
{'cells': [{'cell_type': 'markdown',
            'id': ...,
            'metadata': {},
            'source': ['# Hello world!']},
           {'cell_type': 'markdown',
            'id': ...,
            'metadata': {},
            'source': ['Print `Hello world!`:']},
           {'cell_type': 'code',
            'execution_count': None,
            'id': ...,
            'metadata': {},
            'outputs': [],
            'source': ['>>> print("Hello world!")']}],
 'metadata': {},
 'nbformat': 4,
 'nbformat_minor': 5}

To use notebookify from the command-line we may create a main entry point:

import json
from pathlib import Path
import sys

def main():
    filename = sys.argv[1]
    doc = pandoc.read(file=filename)
    notebook = notebookify(doc)
    ipynb = Path(filename).with_suffix(".ipynb")
    with open(ipynb, "w", encoding="utf-8") as output:
        json.dump(notebook, output, ensure_ascii=False, indent=2)

If we specify on the command-line a (temporary) markdown file, main() creates the corresponding notebook:

>>> import tempfile
>>> with tempfile.TemporaryDirectory() as tmp_dir: # doctest: +ELLIPSIS
...     md_path = Path(tmp_dir).joinpath("doc.md")
...     with open(md_path, "w", encoding="utf-8") as md_file:
...         _ = md_file.write(markdown)
...     sys.argv[:] = ["notebookify", str(md_path)]
...     main()
...     with open(md_path.with_suffix(".ipynb"), encoding="utf-8") as ipynb:
...         pprint.pprint(json.load(ipynb))
{'cells': [{'cell_type': 'markdown',
            'id': ...,
            'metadata': {},
            'source': ['# Hello world!']},
           {'cell_type': 'markdown',
            'id': ...,
            'metadata': {},
            'source': ['Print `Hello world!`:']},
           {'cell_type': 'code',
            'execution_count': None,
            'id': ...,
            'metadata': {},
            'outputs': [],
            'source': ['>>> print("Hello world!")']}],
 'metadata': {},
 'nbformat': 4,
 'nbformat_minor': 5}