Papermill

2.7.0 · active · verified Thu Apr 09

Papermill is a Python library that allows you to parameterize and execute Jupyter and nteract Notebooks. It's widely used for creating dynamic reports, running ETL jobs, and orchestrating data science workflows where notebook execution is a core component. The current version is 2.7.0, and it generally follows a release cadence tied to new features or critical bug fixes.

Warnings

Install

Imports

Quickstart

This quickstart demonstrates how to use `papermill.execute_notebook` to run a Jupyter notebook with injected parameters. It creates a simple input notebook on the fly, executes it, and saves the output. The `parameters` dictionary is used to override variables defined in cells tagged with 'parameters' in the input notebook. Error handling is included for robust execution.

import papermill as pm
import os

# Create a dummy input notebook file for demonstration
notebook_content = '''
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {"tags": ["parameters"]},
   "outputs": [],
   "source": ["# Parameters injected here"] \
      + ["message = 'default'"],
   "user_expression": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": ["print(f'Hello from Papermill: {message}')"],
   "user_expression": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7" # Placeholder, will use current env's python
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
'''

with open('input_notebook.ipynb', 'w') as f:
    f.write(notebook_content.replace('3.9.7', f'{os.sys.version_info.major}.{os.sys.version_info.minor}.{os.sys.version_info.micro}'))


input_path = 'input_notebook.ipynb'
output_path = 'output_notebook.ipynb'
parameters = {'message': 'Papermill is awesome!'}

try:
    # Execute the notebook with parameters
    pm.execute_notebook(
        input_path,
        output_path,
        parameters=parameters,
        report_mode=True # Saves additional metadata for reporting
    )
    print(f"Notebook executed successfully. Output saved to {output_path}")

    # Optionally read the output notebook to verify content
    # import nbformat
    # with open(output_path, 'r') as f:
    #     nb = nbformat.read(f, as_version=4)
    #     print("\nOutput Notebook Content (first few lines):\n")
    #     for cell in nb.cells:
    #         if cell.cell_type == 'code' and cell.outputs:
    #             for output in cell.outputs:
    #                 if 'text' in output.data:
    #                     print(output.data['text']['plain'])

except pm.exceptions.PapermillExecutionError as e:
    print(f"Papermill execution failed: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")
finally:
    # Clean up dummy files
    if os.path.exists('input_notebook.ipynb'):
        os.remove('input_notebook.ipynb')
    if os.path.exists('output_notebook.ipynb'):
        os.remove('output_notebook.ipynb')

view raw JSON →