Examples¶
import pandoc
from pandoc.types import *
Uppercase¶
๐ Change all text to upper case.
def uppercase(doc):
for elt in pandoc.iter(doc):
if isinstance(elt, Str):
elt[0] = elt[0].upper() # elt: Str(Text)
>>> doc = pandoc.read("Hello world!")
>>> uppercase(doc)
>>> print(pandoc.write(doc).strip())
HELLO WORLD!
De-emphasize¶
๐ Turn emphasized text into normal text.
def de_emphasize(doc):
locations = []
for elt, path in pandoc.iter(doc, path=True):
if isinstance(elt, Emph):
holder, index = path[-1]
locations.append((elt, holder, index))
# Perform the change in reverse document order
# not to invalidate the remaining matches.
for elt, holder, index in reversed(locations):
assert isinstance(elt, Emph)
inlines = elt[0] # elt: Emph([Inline])
holder[index:index+1] = inlines
>>> doc = pandoc.read("**strong**, *emphasized*, normal")
>>> de_emphasize(doc)
>>> print(pandoc.write(doc).strip())
**strong**, emphasized, normal
This implementation will remove nested layers of emphasis:
>>> doc = pandoc.read("0x _1x *2x*_")
>>> de_emphasize(doc)
>>> print(pandoc.write(doc).strip())
0x 1x 2x
To remove only one layer of emphasis instead (the outer layer), we can filter out all elements that are already emphasized.
from math import inf
def de_emphasize(doc):
locations = []
depth = inf
for elt, path in pandoc.iter(doc, path=True):
if len(path) <= depth: # not emphasized
depth = inf
if isinstance(elt, Emph):
holder, index = path[-1]
locations.append((elt, holder, index))
depth = len(path)
# Perform the change in reverse document order
# not to invalidate the remaining matches.
for elt, holder, index in reversed(locations):
assert isinstance(elt, Emph)
inlines = elt[0] # elt: Emph([Inline])
holder[index:index+1] = inlines
The behavior with simply emphasized items is unchanged:
>>> doc = pandoc.read("**strong**, *emphasized*, normal")
>>> de_emphasize(doc)
>>> print(pandoc.write(doc).strip())
**strong**, emphasized, normal
but differs for multiply emphasized text:
>>> doc = pandoc.read("0x _1x *2x*_")
>>> de_emphasize(doc)
>>> print(pandoc.write(doc).strip())
0x 1x *2x*
LaTeX theorems¶
๐ Convert divs tagged as theorems into LaTeX theorems.
First we need to detect this kind of divs:
def is_theorem(elt):
if isinstance(elt, Div):
attrs = elt[0] # elt: Div(Attr, [Block])
classes = attrs[1] # attrs: (Text, [Text], [(Text, Text)])
if "theorem" in classes:
return True
return False
Or equivalenty, with Python 3.10 (or newer), using pattern matching:
def is_theorem(elt):
match elt:
case Div((_, classes, _), _) if "theorem" in classes:
return True
case _:
return False
Now we can implement the transformation itself:
def LaTeX(text):
return RawBlock(Format("latex"), text)
def theoremize(doc):
for elt in pandoc.iter(doc):
if is_theorem(elt):
attr, blocks = elt # elt: Div(Attr, [Block])
id_ = attr[0] # attrs: (Text, [Text], [(Text, Text)])
label = r"\label{" + id_ + "}" if id_ else ""
start_theorem = LaTeX(r'\begin{theorem}' + label)
end_theorem = LaTeX(r'\end{theorem}')
blocks[:] = [start_theorem] + blocks + [end_theorem]
Here are the results:
markdown = r"""
<div id='cauchy-formula' class='theorem'>
$$f(z) = \frac{1}{i2\pi} \int \frac{f(w)}{w-z}\, dw$$
</div>
"""
>>> doc = pandoc.read(markdown)
>>> print(pandoc.write(doc, format="latex")) # doctest: +NORMALIZE_WHITESPACE
\phantomsection\label{cauchy-formula}
\[f(z) = \frac{1}{i2\pi} \int \frac{f(w)}{w-z}\, dw\]
>>> theoremize(doc)
>>> print(pandoc.write(doc, format="latex")) # doctest: +NORMALIZE_WHITESPACE
\phantomsection\label{cauchy-formula}
\begin{theorem}\label{cauchy-formula}
<BLANKLINE>
\[f(z) = \frac{1}{i2\pi} \int \frac{f(w)}{w-z}\, dw\]
<BLANKLINE>
\end{theorem}
Jupyter Notebooks¶
๐ Transform a markdown document into a Jupyter notebook.
๐ Reference: the notebook file format
Jupyter notebook helpers (building blocks):
import copy
import uuid
def Notebook():
return {
"nbformat": 4,
"nbformat_minor": 5,
"cells": [],
"metadata": {},
}
def CodeCell():
return {
"cell_type": "code",
"source": [],
"execution_count": None,
"outputs": [],
"id": uuid.uuid4().hex,
"metadata": {},
}
def MarkdownCell():
return {
"cell_type": "markdown",
"source": [],
"id": uuid.uuid4().hex,
"metadata": {},
}
The core transformation code:
def notebookify(doc):
notebook = Notebook()
cells = notebook["cells"]
blocks = doc[1] # doc: Pandoc(Meta, [Block])
for block in blocks:
source, cell = None, None
if isinstance(block, CodeBlock):
source = block[1] # block: CodeBlock(Attr, Text)
cell = CodeCell()
else:
source = pandoc.write(block).strip()
cell = MarkdownCell()
cell["source"] = source.splitlines(keepends=True)
cells.append(cell)
return notebook
markdown = """
# Hello world!
Print `Hello world!`:
>>> print("Hello world!")
"""
doc = pandoc.read(markdown)
>>> doc
Pandoc(Meta({}), [Header(1, ('hello-world', [], []), [Str('Hello'), Space(), Str('world!')]), Para([Str('Print'), Space(), Code(('', [], []), 'Hello world!'), Str(':')]), CodeBlock(('', [], []), '>>> print("Hello world!")')])
>>> ipynb = notebookify(doc)
>>> import pprint
>>> pprint.pprint(ipynb) # doctest: +ELLIPSIS
{'cells': [{'cell_type': 'markdown',
'id': ...,
'metadata': {},
'source': ['# Hello world!']},
{'cell_type': 'markdown',
'id': ...,
'metadata': {},
'source': ['Print `Hello world!`:']},
{'cell_type': 'code',
'execution_count': None,
'id': ...,
'metadata': {},
'outputs': [],
'source': ['>>> print("Hello world!")']}],
'metadata': {},
'nbformat': 4,
'nbformat_minor': 5}
To use notebookify
from the command-line we may create a main
entry point:
import json
from pathlib import Path
import sys
def main():
filename = sys.argv[1]
doc = pandoc.read(file=filename)
notebook = notebookify(doc)
ipynb = Path(filename).with_suffix(".ipynb")
with open(ipynb, "w", encoding="utf-8") as output:
json.dump(notebook, output, ensure_ascii=False, indent=2)
If we specify on the command-line a (temporary) markdown file,
main()
creates the corresponding notebook:
>>> import tempfile
>>> with tempfile.TemporaryDirectory() as tmp_dir: # doctest: +ELLIPSIS
... md_path = Path(tmp_dir).joinpath("doc.md")
... with open(md_path, "w", encoding="utf-8") as md_file:
... _ = md_file.write(markdown)
... sys.argv[:] = ["notebookify", str(md_path)]
... main()
... with open(md_path.with_suffix(".ipynb"), encoding="utf-8") as ipynb:
... pprint.pprint(json.load(ipynb))
{'cells': [{'cell_type': 'markdown',
'id': ...,
'metadata': {},
'source': ['# Hello world!']},
{'cell_type': 'markdown',
'id': ...,
'metadata': {},
'source': ['Print `Hello world!`:']},
{'cell_type': 'code',
'execution_count': None,
'id': ...,
'metadata': {},
'outputs': [],
'source': ['>>> print("Hello world!")']}],
'metadata': {},
'nbformat': 4,
'nbformat_minor': 5}