PyPDFtk

0.5 · active · verified Sat Apr 11

PyPDFtk is a Python wrapper for the powerful command-line tool PDFtk (PDF Toolkit). It provides a Pythonic interface to manipulate PDF documents, enabling operations such as filling forms with data, concatenating multiple PDFs, splitting a single PDF into individual pages, extracting specific page ranges, replacing pages, generating XFDF data, stamping, and adding backgrounds. The library is currently at version 0.5, with its last release in April 2021, and primarily follows a maintenance release cadence.

Warnings

Install

Imports

Quickstart

This quickstart demonstrates basic operations like getting the number of pages and concatenating PDFs using `pypdftk`. It highlights the necessity of the `pdftk` binary and provides comments on how to set `PDFTK_PATH`. A form-filling example is commented out as it requires a specific fillable PDF and proper `pdftk` configuration.

import pypdftk
import os
import tempfile

# NOTE: You must have the 'pdftk' binary installed on your system.
# You may also need to set the PDFTK_PATH environment variable if pdftk is not in your system's PATH.
# For example: os.environ['PDFTK_PATH'] = '/usr/local/bin/pdftk' on macOS or '/usr/bin/pdftk' on Linux.
# For Windows, it might be 'C:/Program Files (x86)/PDFtk/bin/pdftk.exe'

# Create a dummy PDF file for demonstration (in a real scenario, this would be an existing PDF form)
# This example can't create a fillable PDF, so it will demonstrate other functions.
# We'll use get_num_pages and concat for a runnable example.

# Create a dummy PDF file (simplified for quickstart, typically this would be a pre-existing PDF)
# In a real scenario, you'd use an actual PDF path.
dummy_pdf_content = b'%PDF-1.4\n1 0 obj<</Type/Catalog/Pages 2 0 R>>endobj 2 0 obj<</Type/Pages/Count 1/Kids[3 0 R]>>endobj 3 0 obj<</Type/Page/Parent 2 0 R/MediaBox[0 0 612 792]/Contents 4 0 R>>endobj 4 0 obj<</Length 16>>stream\nBT /F1 12 Tf 100 700 Td (Hello PyPDFtk!) Tj ET\nendstream\nendobj\nxref\n0 5\n0000000000 65535 f\n0000000009 00000 n\n0000000055 00000 n\n0000000107 00000 n\n0000000194 00000 n\ntrailer<</Size 5/Root 1 0 R>>startxref\n240\n%%EOF'

with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as f1:
    f1.write(dummy_pdf_content)
    pdf_path1 = f1.name

with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as f2:
    f2.write(dummy_pdf_content)
    pdf_path2 = f2.name

try:
    # Get number of pages
    num_pages = pypdftk.get_num_pages(pdf_path1)
    print(f"Number of pages in {pdf_path1}: {num_pages}")

    # Concatenate two dummy PDFs
    output_concat_pdf = pypdftk.concat([pdf_path1, pdf_path2])
    print(f"Concatenated PDFs to: {output_concat_pdf}")

    # Example of filling a form (requires a real fillable PDF and pdftk to be correctly configured)
    # To run this, replace 'path/to/model.pdf' with a path to an actual fillable PDF
    # and ensure PDFTK_PATH is correctly set.
    # datas = {'firstname': 'John', 'lastname': 'Doe'}
    # try:
    #     filled_pdf = pypdftk.fill_form('path/to/model.pdf', datas)
    #     print(f"Filled form saved to: {filled_pdf}")
    # except Exception as e:
    #     print(f"Could not fill form (requires a real fillable PDF and pdftk setup): {e}")

finally:
    # Clean up temporary files
    os.remove(pdf_path1)
    os.remove(pdf_path2)
    if 'output_concat_pdf' in locals() and os.path.exists(output_concat_pdf):
        os.remove(output_concat_pdf)
    # if 'filled_pdf' in locals() and os.path.exists(filled_pdf):
    #     os.remove(filled_pdf)

view raw JSON →