-
Notifications
You must be signed in to change notification settings - Fork 86
/
Copy pathparser.py
252 lines (195 loc) · 9.16 KB
/
parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
from docutils import nodes
import nbformat as nbf
from pathlib import Path
from sphinx.util import logging
from myst_parser.docutils_renderer import SphinxRenderer
from myst_parser.sphinx_parser import MystParser
from mistletoe.base_elements import BlockToken, Position, SourceLines
from mistletoe.parse_context import ParseContext, get_parse_context, set_parse_context
from mistletoe.block_tokenizer import tokenize_block
from mistletoe.block_tokens import Document, FrontMatter
from jupyter_sphinx.ast import get_widgets, JupyterWidgetStateNode
from jupyter_sphinx.execute import contains_widgets, write_notebook_output
from myst_nb.nb_glue import GLUE_PREFIX
from myst_nb.nb_glue.domain import NbGlueDomain
SPHINX_LOGGER = logging.getLogger(__name__)
class NotebookParser(MystParser):
"""Docutils parser for IPynb + CommonMark + Math + Tables + RST Extensions """
supported = ("ipynb",)
translate_section_name = None
default_config = {"known_url_schemes": None}
config_section = "ipynb parser"
config_section_dependencies = ("parsers",)
def parse(self, inputstring, document):
# de-serialize the notebook
ntbk = nbf.reads(inputstring, nbf.NO_CONVERT)
# This is a contaner for top level markdown tokens
# which we will add to as we walk the document
mkdown_tokens = [] # type: list[BlockToken]
# First we ensure that we are using a 'clean' global context
# for parsing, which is setup with the MyST parsing tokens
# the logger will report on duplicate link/footnote definitions, etc
parse_context = ParseContext(
find_blocks=SphinxNBRenderer.default_block_tokens,
find_spans=SphinxNBRenderer.default_span_tokens,
logger=SPHINX_LOGGER,
)
set_parse_context(parse_context)
for cell_index, nb_cell in enumerate(ntbk.cells):
# Skip empty cells
if len(nb_cell["source"].strip()) == 0:
continue
# skip cells tagged for removal
tags = nb_cell.metadata.get("tags", [])
if "remove_cell" in tags:
continue
if nb_cell["cell_type"] == "markdown":
# we add the document path and cell index
# to the source lines, so they can be included in the error logging
# NOTE: currently the logic to report metadata is not written
# into SphinxRenderer, but this will be introduced in a later update
lines = SourceLines(
nb_cell["source"],
uri=document["source"],
metadata={"cell_index": cell_index},
standardize_ends=True,
)
# parse the source markdown text;
# at this point span/inline level tokens are not yet processed, but
# link/footnote definitions are collected/stored in the global context
mkdown_tokens.extend(tokenize_block(lines))
# TODO for md cells, think of a way to implement the previous
# `if "hide_input" in tags:` logic
elif nb_cell["cell_type"] == "code":
# here we do nothing but store the cell as a custom token
mkdown_tokens.append(
NbCodeCell(
cell=nb_cell,
position=Position(
line_start=0,
uri=document["source"],
data={"cell_index": cell_index},
),
)
)
# Now all definitions have been gathered, we walk the tokens and
# process any inline text
for token in mkdown_tokens + list(
get_parse_context().foot_definitions.values()
):
token.expand_spans()
# If there are widgets, this will embed the state of all widgets in a script
if contains_widgets(ntbk):
mkdown_tokens.insert(0, JupyterWidgetState(state=get_widgets(ntbk)))
# create the front matter token
front_matter = FrontMatter(content=ntbk.metadata, position=None)
# Finally, we create the top-level markdown document
markdown_doc = Document(
children=mkdown_tokens,
front_matter=front_matter,
link_definitions=parse_context.link_definitions,
footnotes=parse_context.foot_definitions,
footref_order=parse_context.foot_references,
)
self.reporter = document.reporter
self.config = self.default_config.copy()
try:
new_cfg = document.settings.env.config.myst_config
self.config.update(new_cfg)
except AttributeError:
pass
# Remove all the mime prefixes from "glue" step.
# This way, writing properly captures the glued images
replace_mime = []
for cell in ntbk.cells:
if hasattr(cell, "outputs"):
for out in cell.outputs:
if "data" in out:
# Only do the mimebundle replacing for the scrapbook outputs
mime_prefix = (
out.get("metadata", {})
.get("scrapbook", {})
.get("mime_prefix")
)
if mime_prefix:
out["data"] = {
key.replace(mime_prefix, ""): val
for key, val in out["data"].items()
}
replace_mime.append(out)
# Write the notebook's output to disk. This changes metadata in notebook cells
path_doc = Path(document.settings.env.docname)
doc_relpath = path_doc.parent
doc_filename = path_doc.name
build_dir = Path(document.settings.env.app.outdir).parent
output_dir = build_dir.joinpath("jupyter_execute", doc_relpath)
write_notebook_output(ntbk, str(output_dir), doc_filename)
# Now add back the mime prefixes to the right outputs so they aren't rendered
# until called from the role/directive
for out in replace_mime:
out["data"] = {
f"{GLUE_PREFIX}{key}": val for key, val in out["data"].items()
}
# Update our glue key list with new ones defined in this page
glue_domain = NbGlueDomain.from_env(document.settings.env)
glue_domain.add_notebook(ntbk, path_doc)
# render the Markdown AST to docutils AST
renderer = SphinxNBRenderer(
parse_context=parse_context, document=document, current_node=None
)
renderer.render(markdown_doc)
class JupyterWidgetState(BlockToken):
def __init__(self, state):
self.state = state
class NbCodeCell(BlockToken):
def __init__(self, cell, position):
self.cell = cell
self.position = position
class SphinxNBRenderer(SphinxRenderer):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.render_map["NbCodeCell"] = self.render_nb_code_cell
self.render_map["JupyterWidgetState"] = self.render_jupyter_widget_state
def render_jupyter_widget_state(self, token):
self.document.append(JupyterWidgetStateNode(state=token.state))
def render_nb_code_cell(self, token: NbCodeCell):
"""Render a Jupyter notebook cell."""
cell = token.cell
tags = cell.metadata.get("tags", [])
# Cell container will wrap whatever is in the cell
classes = ["cell"]
for tag in tags:
classes.append(f"tag_{tag}")
sphinx_cell = CellNode(classes=classes, cell_type=cell["cell_type"])
self.current_node += sphinx_cell
if "remove_input" not in tags:
cell_input = CellInputNode(classes=["cell_input"])
sphinx_cell += cell_input
# Input block
code_block = nodes.literal_block(text=cell["source"])
cell_input += code_block
# ==================
# Cell output
# ==================
if "remove_output" not in tags:
cell_output = CellOutputNode(classes=["cell_output"])
sphinx_cell += cell_output
outputs = CellOutputBundleNode(cell["outputs"])
cell_output += outputs
class CellNode(nodes.container):
"""Represent a cell in the Sphinx AST."""
def __init__(self, rawsource="", *children, **attributes):
super().__init__("", **attributes)
class CellInputNode(nodes.container):
"""Represent an input cell in the Sphinx AST."""
def __init__(self, rawsource="", *children, **attributes):
super().__init__("", **attributes)
class CellOutputNode(nodes.container):
"""Represent an output cell in the Sphinx AST."""
def __init__(self, rawsource="", *children, **attributes):
super().__init__("", **attributes)
class CellOutputBundleNode(nodes.container):
"""Represent a MimeBundle in the Sphinx AST, to be transformed later."""
def __init__(self, outputs, rawsource="", *children, **attributes):
self.outputs = outputs
super().__init__("", **attributes)