Papermill
Papermill is a Python library that allows you to parameterize and execute Jupyter and nteract Notebooks. It's widely used for creating dynamic reports, running ETL jobs, and orchestrating data science workflows where notebook execution is a core component. The current version is 2.7.0, and it generally follows a release cadence tied to new features or critical bug fixes.
Warnings
- breaking Papermill 2.0.0 introduced significant breaking changes, including the removal of `pm.view` and modifications to argument names/defaults in `execute_notebook`. Code written for Papermill 1.x will likely require updates.
- gotcha The environment where Papermill is run is separate from the kernel environment used by the executed notebook. Ensure the kernel specified in your notebook (e.g., 'python3') has all necessary libraries installed, not just the environment where Papermill itself is installed.
- gotcha A `papermill.execute_notebook` call might complete successfully even if the underlying notebook cells raise errors. Papermill records the errors within the output notebook but doesn't necessarily propagate them as an exception by default.
- breaking Papermill 2.x, including the current 2.7.0, requires Python >=3.10. Older versions of Papermill supported earlier Python versions (e.g., 3.6+).
Install
-
pip install papermill
Imports
- execute_notebook
import papermill as pm pm.execute_notebook(...)
Quickstart
import papermill as pm
import os
# Create a dummy input notebook file for demonstration
notebook_content = '''
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {"tags": ["parameters"]},
"outputs": [],
"source": ["# Parameters injected here"] \
+ ["message = 'default'"],
"user_expression": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": ["print(f'Hello from Papermill: {message}')"],
"user_expression": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7" # Placeholder, will use current env's python
}
},
"nbformat": 4,
"nbformat_minor": 5
}
'''
with open('input_notebook.ipynb', 'w') as f:
f.write(notebook_content.replace('3.9.7', f'{os.sys.version_info.major}.{os.sys.version_info.minor}.{os.sys.version_info.micro}'))
input_path = 'input_notebook.ipynb'
output_path = 'output_notebook.ipynb'
parameters = {'message': 'Papermill is awesome!'}
try:
# Execute the notebook with parameters
pm.execute_notebook(
input_path,
output_path,
parameters=parameters,
report_mode=True # Saves additional metadata for reporting
)
print(f"Notebook executed successfully. Output saved to {output_path}")
# Optionally read the output notebook to verify content
# import nbformat
# with open(output_path, 'r') as f:
# nb = nbformat.read(f, as_version=4)
# print("\nOutput Notebook Content (first few lines):\n")
# for cell in nb.cells:
# if cell.cell_type == 'code' and cell.outputs:
# for output in cell.outputs:
# if 'text' in output.data:
# print(output.data['text']['plain'])
except pm.exceptions.PapermillExecutionError as e:
print(f"Papermill execution failed: {e}")
except Exception as e:
print(f"An unexpected error occurred: {e}")
finally:
# Clean up dummy files
if os.path.exists('input_notebook.ipynb'):
os.remove('input_notebook.ipynb')
if os.path.exists('output_notebook.ipynb'):
os.remove('output_notebook.ipynb')