PK ACKT nbclean/__init__.py"""Tools for preprocessing and cleaning Jupyter Notebooks."""
__version__ = "0.2.1"
from .clean import NotebookCleaner
from .run import run_notebook_directory, run_notebook
PK CK>;Q nbclean/clean.py"""Functions to assist with grading."""
import nbformat as nbf
import os
from nbgrader.preprocessors import ClearSolutions
from .preprocessors import RemoveCells, ClearCells
from .utils import _check_nb_file
class NotebookCleaner(object):
"""Prepare Jupyter notebooks for distribution to students.
Parameters
----------
ntbk : string | instance of NotebookNode
The input notebook.
"""
def __init__(self, ntbk):
self.ntbk = _check_nb_file(ntbk)
self.preprocessors = []
def __repr__(self):
s = "Number of preprocessors: {}\n---".format(
len(self.preprocessors))
for pre in self.preprocessors:
s += '\n' + str(pre)
return s
def clear(self, content=False, output=False, output_image=False,
output_text=False, stderr=False, tag=None):
"""Clear the components of a notebook cell.
Parameters
----------
content : bool
Whether to clear the content of cells.
output : bool
Whether to clear the entire output of cells.
output_text : bool
Whether to clear the text output of cells.
output_image : bool
Whether to clear the image output of cells.
stderr : bool
Whether to clear the stderr of cells.
tag : string | None
Only apply clearing to cells with a certain tag. If
None, apply clearing to all cells.
"""
if not any([output, output_image, output_text, content, stderr]):
raise ValueError("At least of the clear options must be True.")
# See if the cell matches the string
pre = ClearCells(content=content, output=output,
output_text=output_text, output_image=output_image,
stderr=stderr, tag=str(tag))
self.ntbk = pre.preprocess(self.ntbk, {})[0]
self.preprocessors.append(pre)
return self
def remove_cells(self, tag):
"""Remove cells that contain a specific string.
Parameters
----------
match_text : str
A string to search for in input cells. Any cells with the
`match_text` inside will be removed.
"""
# See if the cell matches the string
pre = RemoveCells(tag=tag)
self.ntbk = pre.preprocess(self.ntbk, {})[0]
self.preprocessors.append(pre)
return self
def replace_text(self, text_replace_begin=u'### SOLUTION BEGIN',
text_replace_end=u'### SOLUTION END',
replace_code=None, replace_md=None):
"""Create answer cells for students to fill out.
This will remove all text after `match_string`. Students should then
give their answers in this section. Alternatively, a markdown cell will
replace the student answer cell
Parameters
----------
text_replace_begin : str
A string to search for in input cells. If the string is
found, then anything between it and `text_replace_end` is removed.
text_replace_end : str
The ending delimiter for solution cells.
replace_code : str | None
Text to add to code solution cells. If None, `nbgrader`
default is used.
replace_md : str | None
Text to add to markdown solution cells. If None, a default template
will be used.
"""
kwargs = dict(begin_solution_delimeter=text_replace_begin,
end_solution_delimeter=text_replace_end,
enforce_metadata=False)
if replace_code is not None:
kwargs['code_stub'] = dict(python=replace_code)
if replace_md is None:
replace_md = ('---\n## Student Answer'
'\n\n*Double-click and add your answer between the '
'lines*\n\n---')
kwargs['text_stub'] = replace_md
pre = ClearSolutions(**kwargs)
self.ntbk = pre.preprocess(self.ntbk, {})[0]
self.preprocessors.append(pre)
return self
def save(self, path_save):
"""Save the notebook to disk.
Parameters
----------
path_save : string
The path for saving the file.
"""
dir_save = os.path.dirname(path_save)
print('Saving to {}'.format(path_save))
if not os.path.isdir(dir_save):
os.makedirs(dir_save)
nbf.write(self.ntbk, path_save)
PK CKSi&g g nbclean/preprocessors.pyfrom traitlets import Unicode, Bool
from nbgrader.preprocessors import NbGraderPreprocessor
class RemoveCells(NbGraderPreprocessor):
"""A helper class to remove cells from a notebook.
This should not be used directly, instead, use the
NotebookCleaner class.
"""
tag = Unicode("None")
def preprocess(self, nb, resources):
new_cells = []
for ii, cell in enumerate(nb['cells']):
if self.tag != 'None':
tags = cell['metadata'].get('tags', [])
# Only keep the cell if the tag doesn't match
if self.tag not in tags:
new_cells.append(cell)
nb['cells'] = new_cells
return nb, resources
class ClearCells(NbGraderPreprocessor):
"""A helper class to remove cells from a notebook.
This should not be used directly, instead, use the
NotebookCleaner class.
"""
output = Bool(True)
output_image = Bool(False)
output_text = Bool(False)
content = Bool(False)
stderr = Bool(True)
tag = Unicode('None')
def preprocess(self, nb, resources):
for cell in nb['cells']:
# Check to see whether we process this cell
if self.tag != 'None':
tags = cell['metadata'].get('tags', [])
if self.tag not in tags:
continue
# Clear all cell output
if self.output is True:
if 'outputs' in cell.keys():
cell['outputs'] = []
# Clear cell text output
if self.output_text is True:
if 'outputs' in cell.keys():
for output in cell['outputs']:
data = output.get('data', {})
for key in list(data.keys()):
if 'text/' in key:
data.pop(key)
# Clear cell image output
if self.output_image is True:
if 'outputs' in cell.keys():
for output in cell['outputs']:
data = output.get('data', {})
for key in list(data.keys()):
if 'image/' in key:
data.pop(key)
# Clear cell content
if self.content is True:
cell['source'] = ''
# Clear stdout
if self.stderr is True:
new_outputs = []
if 'outputs' not in cell.keys():
continue
for output in cell['outputs']:
name = output.get('name', None)
if name != 'stderr':
new_outputs.append(output)
cell['outputs'] = new_outputs
return nb, resources
def __repr__(self):
s = " Tag: {}".format(self.tag)
return s
PK ݎCKu nbclean/run.pyimport nbformat as nbf
import os
import os.path as op
from nbgrader.preprocessors import LimitOutput, Execute
from .utils import _check_nb_file
from glob import glob
from tqdm import tqdm
def run_notebook_directory(path, path_save=None, max_output_lines=1000,
overwrite=False):
"""Run all the notebooks in a directory and save them somewhere else.
Parameters
----------
path : str
A path to a directory that contains jupyter notebooks.
All notebooks in this folder ending in `.ipynb` will be run,
and the outputs will be placed in `path_save`. This may
optionally contain a wildcard matching ``.ipynb`` in which
case only notebooks that match will be run.
path_save : str | None
A path to a directory to save the notebooks. If this doesn't exist,
it will be created. If `None`, notebooks will not be saved.
max_output_lines : int | None
The maximum number of lines allowed in notebook outputs.
overwrite : bool
Whether to overwrite the output directory if it exists.
Returns
-------
notebooks : list
A list of the `NotebookNode` instances, one for each notebook.
"""
if not op.exists(path):
raise ValueError("You've specified an input path that doesn't exist")
to_glob = op.join(path, '*.ipynb') if '.ipynb' not in path else path
notebooks = glob(to_glob)
# Execute notebooks
outputs = []
for notebook in tqdm(notebooks):
outputs.append(run_notebook(notebook,
max_output_lines=max_output_lines))
# Now save them
if path_save is not None:
print('Saving {} notebooks to: {}'.format(len(notebooks), path_save))
if not op.exists(path_save):
os.makedirs(path_save)
elif overwrite is True:
print('Overwriting output directory')
for ifile in glob(path_save + '*-exe.ipynb'):
os.remove(ifile)
else:
raise ValueError('path_save exists and overwrite is not True')
for filename, notebook in zip(notebooks, outputs):
this_name = op.basename(filename)
left, right = this_name.split('.')
left += '-exe'
this_name = '.'.join([left, right])
nbf.write(notebook, op.join(path_save, this_name))
def run_notebook(ntbk, max_output_lines=1000):
"""Run the cells in a notebook and limit the output length.
Parameters
----------
ntbk : string | instance of NotebookNode
The input notebook.
max_output_lines : int | None
The maximum number of lines allowed in notebook outputs.
"""
ntbk = _check_nb_file(ntbk)
preprocessors = [Execute()]
if max_output_lines is not None:
preprocessors.append(LimitOutput(max_lines=max_output_lines,
max_traceback=max_output_lines))
for prep in preprocessors:
ntbk, _ = prep.preprocess(ntbk, {})
return ntbk
PK ݎCKAg g nbclean/utils.pyimport nbformat as nbf
from nbformat.notebooknode import NotebookNode
from copy import deepcopy
def _check_nb_file(ntbk):
if isinstance(ntbk, str):
ntbk = nbf.read(ntbk, nbf.NO_CONVERT)
elif not isinstance(ntbk, NotebookNode):
raise TypeError('`ntbk` must be type string or `NotebookNode`')
ntbk = deepcopy(ntbk)
return ntbk
PK \CK5 \ - nbclean/.ipynb_checkpoints/demo-checkpoint.pyimport matplotlib.pyplot as plt
import numpy as np
plt.ion()
plt.scatter(*np.random.randn(2, 1000), c=np.random.randn(1000))
plt.show()PK CK D D nbclean/tests/test_nbclean.pyimport nbclean as nbc
import pytest
import os
# We'll use the test notebook in `examples`
path = os.path.dirname(__file__)
path_notebook = path + '/../../examples/test_notebooks/test_notebook.ipynb'
# Clear different parts of the notebook cells based on tags
ntbk = nbc.NotebookCleaner(path_notebook)
ntbk.clear(output=True, tag='hide_output')
ntbk.clear(output=False, content=True, tag='hide_content')
ntbk.clear(output=False, stderr=True, tag='hide_stderr')
# Removing entire cells
ntbk.remove_cells(tag='remove')
# Replacing text
text_replace_begin = '### SOLUTION BEGIN'
text_replace_end = '### SOLUTION END'
ntbk.replace_text(text_replace_begin, text_replace_end)
def test_nbclean():
# Make sure we're testing for all of these
TEST_KINDS = ['hide_output', 'hide_content', 'hide_stderr']
for kind in TEST_KINDS:
assert any(kind in cell['metadata'].get('tags', [])
for cell in ntbk.ntbk.cells)
for cell in ntbk.ntbk.cells:
# Tag removal
tags = cell['metadata'].get('tags', None)
if tags is None:
continue
if 'hide_output' in tags:
assert len(cell['outputs']) == 0
if 'hide_content' in tags:
assert len(cell['source']) == 0
if 'hide_stderr' in tags:
assert all('stderr' != output.get('name', '')
for output in cell['outputs'])
assert 'remove' not in tags
# Text replacing
if "# First we'll create 'a'" in cell['source']:
assert '### SOLUTION BEGIN' not in cell['source']
# Make sure final cell has all this stuff
cell = ntbk.ntbk.cells[-1]
assert len(cell['outputs']) != 0
assert any('stderr' == output.get('name', '') for output in cell['outputs'])
assert len(cell['source']) != 0
if __name__ == '__main__':
test_nbclean()
PK aCK D D ; nbclean/tests/.ipynb_checkpoints/test_nbclean-checkpoint.pyimport nbclean as nbc
import pytest
import os
# We'll use the test notebook in `examples`
path = os.path.dirname(__file__)
path_notebook = path + '/../../examples/test_notebooks/test_notebook.ipynb'
# Clear different parts of the notebook cells based on tags
ntbk = nbc.NotebookCleaner(path_notebook)
ntbk.clear(output=True, tag='hide_output')
ntbk.clear(output=False, content=True, tag='hide_content')
ntbk.clear(output=False, stderr=True, tag='hide_stderr')
# Removing entire cells
ntbk.remove_cells(tag='remove')
# Replacing text
text_replace_begin = '### SOLUTION BEGIN'
text_replace_end = '### SOLUTION END'
ntbk.replace_text(text_replace_begin, text_replace_end)
def test_nbclean():
# Make sure we're testing for all of these
TEST_KINDS = ['hide_output', 'hide_content', 'hide_stderr']
for kind in TEST_KINDS:
assert any(kind in cell['metadata'].get('tags', [])
for cell in ntbk.ntbk.cells)
for cell in ntbk.ntbk.cells:
# Tag removal
tags = cell['metadata'].get('tags', None)
if tags is None:
continue
if 'hide_output' in tags:
assert len(cell['outputs']) == 0
if 'hide_content' in tags:
assert len(cell['source']) == 0
if 'hide_stderr' in tags:
assert all('stderr' != output.get('name', '')
for output in cell['outputs'])
assert 'remove' not in tags
# Text replacing
if "# First we'll create 'a'" in cell['source']:
assert '### SOLUTION BEGIN' not in cell['source']
# Make sure final cell has all this stuff
cell = ntbk.ntbk.cells[-1]
assert len(cell['outputs']) != 0
assert any('stderr' == output.get('name', '') for output in cell['outputs'])
assert len(cell['source']) != 0
if __name__ == '__main__':
test_nbclean()
PK ݎCK
9 9 nbclean-0.2.1.dist-info/LICENSEThe MIT License (MIT)
Copyright (c) 2017 Chris Holdgraf
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
PK !H١W d nbclean-0.2.1.dist-info/WHEELHM
K-*ϳR03rOK-J,/RH,Q0343
/,
(-JLR()*M
ILR(4KM̫#D PK !HiIm nbclean-0.2.1.dist-info/METADATAePAr0@ѧ2 m 3g'V2ܙNzv]퐕V/ɐU%e윊#M0P1D1%O?<01Fh#:ZlaԩCMSYuvdTwӣOe}k߭Xe)JX$ؐէ+\SJ+a<ũFڪr0XWc-B)zwx&b^Lb 93+p\ixKlMwE;PK !H\D* nbclean-0.2.1.dist-info/RECORDr@}
Er*
]O2j*,] 9˪р}I8$e}&j.Z>xs#klT# (