-
Notifications
You must be signed in to change notification settings - Fork 556
/
exporter.py
342 lines (279 loc) · 11.8 KB
/
exporter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
"""This module defines a base Exporter class. For Jinja template-based export,
see templateexporter.py.
"""
# Copyright (c) Jupyter Development Team.
# Distributed under the terms of the Modified BSD License.
import collections
import copy
import datetime
import os
import sys
from typing import Optional
import nbformat
from traitlets import Bool, HasTraits, List, TraitError, Unicode
from traitlets.config import Config
from traitlets.config.configurable import LoggingConfigurable
from traitlets.utils.importstring import import_item
class ResourcesDict(collections.defaultdict):
def __missing__(self, key):
return ""
class FilenameExtension(Unicode):
"""A trait for filename extensions."""
default_value = ""
info_text = "a filename extension, beginning with a dot"
def validate(self, obj, value):
# cast to proper unicode
value = super().validate(obj, value)
# check that it starts with a dot
if value and not value.startswith("."):
msg = "FileExtension trait '{}' does not begin with a dot: {!r}"
raise TraitError(msg.format(self.name, value))
return value
class Exporter(LoggingConfigurable):
"""
Class containing methods that sequentially run a list of preprocessors on a
NotebookNode object and then return the modified NotebookNode object and
accompanying resources dict.
"""
enabled = Bool(True, help="Disable this exporter (and any exporters inherited from it).").tag(
config=True
)
file_extension = FilenameExtension(
help="Extension of the file that should be written to disk"
).tag(config=True)
optimistic_validation = Bool(
False,
help="Reduces the number of validation steps so that it only occurs after all preprocesors have run.",
).tag(config=True)
# MIME type of the result file, for HTTP response headers.
# This is *not* a traitlet, because we want to be able to access it from
# the class, not just on instances.
output_mimetype = ""
# Should this converter be accessible from the notebook front-end?
# If so, should be a friendly name to display (and possibly translated).
export_from_notebook = None
# Configurability, allows the user to easily add filters and preprocessors.
preprocessors = List(help="""List of preprocessors, by name or namespace, to enable.""").tag(
config=True
)
_preprocessors = List()
default_preprocessors = List(
[
"nbconvert.preprocessors.TagRemovePreprocessor",
"nbconvert.preprocessors.RegexRemovePreprocessor",
"nbconvert.preprocessors.ClearOutputPreprocessor",
"nbconvert.preprocessors.ExecutePreprocessor",
"nbconvert.preprocessors.coalesce_streams",
"nbconvert.preprocessors.SVG2PDFPreprocessor",
"nbconvert.preprocessors.LatexPreprocessor",
"nbconvert.preprocessors.HighlightMagicsPreprocessor",
"nbconvert.preprocessors.ExtractOutputPreprocessor",
"nbconvert.preprocessors.ClearMetadataPreprocessor",
],
help="""List of preprocessors available by default, by name, namespace,
instance, or type.""",
).tag(config=True)
def __init__(self, config=None, **kw):
"""
Public constructor
Parameters
----------
config : ``traitlets.config.Config``
User configuration instance.
`**kw`
Additional keyword arguments passed to parent __init__
"""
with_default_config = self.default_config
if config:
with_default_config.merge(config)
super().__init__(config=with_default_config, **kw)
self._init_preprocessors()
self._nb_metadata = {}
@property
def default_config(self):
return Config()
def from_notebook_node(self, nb, resources=None, **kw):
"""
Convert a notebook from a notebook node instance.
Parameters
----------
nb : :class:`~nbformat.NotebookNode`
Notebook node (dict-like with attr-access)
resources : dict
Additional resources that can be accessed read/write by
preprocessors and filters.
`**kw`
Ignored
"""
nb_copy = copy.deepcopy(nb)
resources = self._init_resources(resources)
if "language" in nb["metadata"]:
resources["language"] = nb["metadata"]["language"].lower()
# Preprocess
nb_copy, resources = self._preprocess(nb_copy, resources)
notebook_name = ""
if resources is not None:
name = resources.get("metadata", {}).get("name", "")
path = resources.get("metadata", {}).get("path", "")
notebook_name = os.path.join(path, name)
self._nb_metadata[notebook_name] = nb_copy.metadata
return nb_copy, resources
def from_filename(self, filename: str, resources: Optional[dict] = None, **kw):
"""
Convert a notebook from a notebook file.
Parameters
----------
filename : str
Full filename of the notebook file to open and convert.
resources : dict
Additional resources that can be accessed read/write by
preprocessors and filters.
`**kw`
Ignored
"""
# Pull the metadata from the filesystem.
if resources is None:
resources = ResourcesDict()
if "metadata" not in resources or resources["metadata"] == "":
resources["metadata"] = ResourcesDict()
path, basename = os.path.split(filename)
notebook_name = os.path.splitext(basename)[0]
resources["metadata"]["name"] = notebook_name
resources["metadata"]["path"] = path
modified_date = datetime.datetime.fromtimestamp(os.path.getmtime(filename))
# datetime.strftime date format for ipython
if sys.platform == "win32":
date_format = "%B %d, %Y"
else:
date_format = "%B %-d, %Y"
resources["metadata"]["modified_date"] = modified_date.strftime(date_format)
with open(filename, encoding="utf-8") as f:
return self.from_file(f, resources=resources, **kw)
def from_file(self, file_stream, resources=None, **kw):
"""
Convert a notebook from a notebook file.
Parameters
----------
file_stream : file-like object
Notebook file-like object to convert.
resources : dict
Additional resources that can be accessed read/write by
preprocessors and filters.
`**kw`
Ignored
"""
return self.from_notebook_node(
nbformat.read(file_stream, as_version=4), resources=resources, **kw
)
def register_preprocessor(self, preprocessor, enabled=False):
"""
Register a preprocessor.
Preprocessors are classes that act upon the notebook before it is
passed into the Jinja templating engine. Preprocessors are also
capable of passing additional information to the Jinja
templating engine.
Parameters
----------
preprocessor : `nbconvert.preprocessors.Preprocessor`
A dotted module name, a type, or an instance
enabled : bool
Mark the preprocessor as enabled
"""
if preprocessor is None:
raise TypeError("preprocessor must not be None")
isclass = isinstance(preprocessor, type)
constructed = not isclass
# Handle preprocessor's registration based on it's type
if constructed and isinstance(
preprocessor,
str,
):
# Preprocessor is a string, import the namespace and recursively call
# this register_preprocessor method
preprocessor_cls = import_item(preprocessor)
return self.register_preprocessor(preprocessor_cls, enabled)
if constructed and hasattr(preprocessor, "__call__"): # noqa
# Preprocessor is a function, no need to construct it.
# Register and return the preprocessor.
if enabled:
preprocessor.enabled = True
self._preprocessors.append(preprocessor)
return preprocessor
elif isclass and issubclass(preprocessor, HasTraits):
# Preprocessor is configurable. Make sure to pass in new default for
# the enabled flag if one was specified.
self.register_preprocessor(preprocessor(parent=self), enabled)
elif isclass:
# Preprocessor is not configurable, construct it
self.register_preprocessor(preprocessor(), enabled)
else:
# Preprocessor is an instance of something without a __call__
# attribute.
raise TypeError(
"preprocessor must be callable or an importable constructor, got %r" % preprocessor
)
def _init_preprocessors(self):
"""
Register all of the preprocessors needed for this exporter, disabled
unless specified explicitly.
"""
self._preprocessors = []
# Load default preprocessors (not necessarily enabled by default).
for preprocessor in self.default_preprocessors:
self.register_preprocessor(preprocessor)
# Load user-specified preprocessors. Enable by default.
for preprocessor in self.preprocessors:
self.register_preprocessor(preprocessor, enabled=True)
def _init_resources(self, resources):
# Make sure the resources dict is of ResourcesDict type.
if resources is None:
resources = ResourcesDict()
if not isinstance(resources, ResourcesDict):
new_resources = ResourcesDict()
new_resources.update(resources)
resources = new_resources
# Make sure the metadata extension exists in resources
if "metadata" in resources:
if not isinstance(resources["metadata"], ResourcesDict):
new_metadata = ResourcesDict()
new_metadata.update(resources["metadata"])
resources["metadata"] = new_metadata
else:
resources["metadata"] = ResourcesDict()
if not resources["metadata"]["name"]:
resources["metadata"]["name"] = "Notebook"
# Set the output extension
resources["output_extension"] = self.file_extension
return resources
def _validate_preprocessor(self, nbc, preprocessor):
try:
nbformat.validate(nbc, relax_add_props=True)
except nbformat.ValidationError:
self.log.error("Notebook is invalid after preprocessor %s", preprocessor)
raise
def _preprocess(self, nb, resources):
"""
Preprocess the notebook before passing it into the Jinja engine.
To preprocess the notebook is to successively apply all the
enabled preprocessors. Output from each preprocessor is passed
along to the next one.
Parameters
----------
nb : notebook node
notebook that is being exported.
resources : a dict of additional resources that
can be accessed read/write by preprocessors
"""
# Do a copy.deepcopy first,
# we are never safe enough with what the preprocessors could do.
nbc = copy.deepcopy(nb)
resc = copy.deepcopy(resources)
# Run each preprocessor on the notebook. Carry the output along
# to each preprocessor
for preprocessor in self._preprocessors:
nbc, resc = preprocessor(nbc, resc)
if not self.optimistic_validation:
self._validate_preprocessor(nbc, preprocessor)
if self.optimistic_validation:
self._validate_preprocessor(nbc, preprocessor)
return nbc, resc