{"library":"ocrmypdf","title":"OCRmyPDF","description":"OCRmyPDF is a Python library and application that adds an invisible OCR text layer to scanned PDF files, making them searchable. It utilizes the Tesseract OCR engine and other external tools to process documents, capable of producing highly optimized and archived-ready (PDF/A) files. The project is actively maintained with frequent updates, typically seeing major version releases annually and minor/patch releases more often.","language":"python","status":"active","last_verified":"Fri May 15","install":{"commands":["pip install ocrmypdf"],"cli":{"name":"ocrmypdf","version":"17.4.2"}},"imports":["from ocrmypdf import ocr","from ocrmypdf import OcrOptions"],"auth":{"required":false,"env_vars":[]},"quickstart":{"code":"import ocrmypdf\nfrom ocrmypdf import OcrOptions\nimport os\n\n# Create dummy input.pdf for demonstration\nwith open('input.pdf', 'wb') as f:\n    f.write(b'%PDF-1.4\\n1 0 obj<</Type/Catalog/Pages 2 0 R>>endobj 2 0 obj<</Type/Pages/Count 1/Kids[3 0 R]>>endobj 3 0 obj<</Type/Page/MediaBox[0 0 612 792]/Contents 4 0 R>>endobj 4 0 obj<</Length 11>>stream\\nBT /F1 12 Tf 72 712 Td (Hello World)Tj ET\\nendstream\\nendobj\\nxref\\n0 5\\n0000000000 65535 f\\n0000000009 00000 n\\n0000000055 00000 n\\n0000000109 00000 n\\n0000000171 00000 n\\ntrailer<</Size 5/Root 1 0 R>>startxref\\n200\\n%%EOF')\n\n# The recommended way to call ocrmypdf.ocr() is to construct an OcrOptions object.\n# This provides type hints and validation. (v17.0.0+)\noptions = OcrOptions(\n    input_file='input.pdf',\n    output_file='output_ocr.pdf',\n    deskew=True,\n    languages=['eng'],\n    # Example: use environment variable for Tesseract path if needed for CI/local testing\n    # tesseract_path=os.environ.get('TESSERACT_PATH', None)\n)\n\ntry:\n    ocrmypdf.ocr(options)\n    print(\"OCR processing complete. Output saved to output_ocr.pdf\")\nexcept ocrmypdf.exceptions.BadArgs as e:\n    print(f\"Error with OCRmyPDF arguments: {e}\")\nexcept ocrmypdf.exceptions.InputFileError as e:\n    print(f\"Error with input file: {e}\")\nexcept Exception as e:\n    print(f\"An unexpected error occurred: {e}\")\nfinally:\n    # Clean up dummy files\n    if os.path.exists('input.pdf'):\n        os.remove('input.pdf')\n    if os.path.exists('output_ocr.pdf'):\n        os.remove('output_ocr.pdf')\n","lang":"python","description":"This quickstart demonstrates how to use the modern API introduced in OCRmyPDF v17.0.0, which involves passing an `OcrOptions` object to the `ocrmypdf.ocr()` function. This provides better type hinting and argument validation. It includes basic error handling and uses dummy files for immediate runnable testing. Remember that `ocrmypdf` heavily relies on external system dependencies (like Tesseract and Ghostscript) which must be installed separately.","tag":null,"tag_description":null,"last_tested":null,"results":[]},"compatibility":{"tag":null,"tag_description":null,"last_tested":"2026-05-15","installed_version":"15.4.4","pypi_latest":"17.4.2","is_stale":true,"summary":{"python_range":"3.10–3.9","success_rate":100,"avg_install_s":8.9,"avg_import_s":1.76,"wheel_type":"wheel"},"results":[{"runtime":"python:3.10-alpine","python_version":"3.10","os_libc":"alpine (musl)","variant":"ocrmypdf","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":null,"import_time_s":1.39,"mem_mb":22.9,"disk_size":"111.9M"},{"runtime":"python:3.10-slim","python_version":"3.10","os_libc":"slim (glibc)","variant":"ocrmypdf","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":8,"import_time_s":1.06,"mem_mb":22.9,"disk_size":"103M"},{"runtime":"python:3.11-alpine","python_version":"3.11","os_libc":"alpine (musl)","variant":"ocrmypdf","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":null,"import_time_s":2.14,"mem_mb":29.1,"disk_size":"175.0M"},{"runtime":"python:3.11-slim","python_version":"3.11","os_libc":"slim (glibc)","variant":"ocrmypdf","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":10.3,"import_time_s":1.95,"mem_mb":29.1,"disk_size":"163M"},{"runtime":"python:3.12-alpine","python_version":"3.12","os_libc":"alpine (musl)","variant":"ocrmypdf","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":null,"import_time_s":2.23,"mem_mb":28.9,"disk_size":"165.5M"},{"runtime":"python:3.12-slim","python_version":"3.12","os_libc":"slim (glibc)","variant":"ocrmypdf","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":8.4,"import_time_s":2.21,"mem_mb":28.9,"disk_size":"153M"},{"runtime":"python:3.13-alpine","python_version":"3.13","os_libc":"alpine (musl)","variant":"ocrmypdf","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":null,"import_time_s":2.07,"mem_mb":29.4,"disk_size":"165.3M"},{"runtime":"python:3.13-slim","python_version":"3.13","os_libc":"slim (glibc)","variant":"ocrmypdf","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":8.6,"import_time_s":2.01,"mem_mb":29.4,"disk_size":"153M"},{"runtime":"python:3.9-alpine","python_version":"3.9","os_libc":"alpine (musl)","variant":"ocrmypdf","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":null,"import_time_s":1.37,"mem_mb":26.3,"disk_size":"107.3M"},{"runtime":"python:3.9-slim","python_version":"3.9","os_libc":"slim (glibc)","variant":"ocrmypdf","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":9.1,"import_time_s":1.22,"mem_mb":26.3,"disk_size":"104M"}]}}