{"library":"petastorm","title":"Petastorm","description":"Petastorm is a Python library that enables single-node or distributed training of machine learning models directly from datasets stored in Parquet format. It provides data access for popular frameworks like TensorFlow, PyTorch, and Apache Spark. The current stable version is 0.13.1, with releases typically following a feature-driven cadence, often including release candidates before stable versions.","language":"python","status":"active","last_verified":"Sun May 17","install":{"commands":["pip install petastorm","pip install petastorm[tensorflow]","pip install petastorm[pytorch]","pip install petastorm[spark]"],"cli":null},"imports":["from petastorm import make_reader","from petastorm import make_writer","from petastorm.unischema import Unischema","from petastorm.pytorch import DataLoader","from petastorm.spark import SparkDatasetConverter","from petastorm import make_reader"],"auth":{"required":false,"env_vars":[]},"quickstart":{"code":"import os\nimport shutil\nimport numpy as np\nfrom petastorm import make_reader, make_writer\nfrom petastorm.unischema import Unischema, UnischemaField, ScalarCodec\nfrom petastorm.codecs import CompressedNdarrayCodec\n\n# 1. Define a schema for your data\nMySchema = Unischema(\n    'MySchema',\n    [\n        UnischemaField('id', np.int32, (), ScalarCodec(np.int32), False),\n        UnischemaField('value', np.float64, (), ScalarCodec(np.float64), False),\n        UnischemaField('image', np.uint8, (10, 10, 3), CompressedNdarrayCodec(), False),\n    ]\n)\n\n# 2. Define a dataset path (using a temporary local directory for example)\ndataset_url = 'file:///tmp/petastorm_example_data'\n# Clean up previous data if it exists\nif os.path.exists('/tmp/petastorm_example_data'):\n    shutil.rmtree('/tmp/petastorm_example_data')\n\n# 3. Write some dummy data to the Parquet dataset\nprint(f\"Writing dummy data to {dataset_url}...\")\nwith make_writer(dataset_url, MySchema, row_group_size_bytes=2 * 1024 * 1024) as writer:\n    for i in range(10):\n        writer.write(\n            MySchema.make_row(\n                id=i,\n                value=float(i * 10),\n                image=np.random.randint(0, 256, size=(10, 10, 3), dtype=np.uint8)\n            )\n        )\nprint(f\"Successfully wrote 10 rows.\")\n\n# 4. Read data using make_reader\n# reader_pool_type='thread' is often suitable for local development.\n# For production, 'process' might be preferred depending on data access patterns.\nprint(\"\\nReading data from the dataset:\")\nwith make_reader(dataset_url, reader_pool_type='thread', num_epochs=1) as reader:\n    for i, row in enumerate(reader):\n        print(f\"Row {i}: id={row.id}, value={row.value}, image_shape={row.image.shape}\")\n        if i >= 2: # Print only a few rows for brevity\n            break\nprint(\"Finished reading example data.\")\n\n# Clean up the temporary dataset\nshutil.rmtree('/tmp/petastorm_example_data')\n","lang":"python","description":"This quickstart demonstrates how to define a data schema, write sample data to a Parquet dataset using `make_writer`, and then read it back using `make_reader`. The example cleans up the temporary directory after execution. For real-world usage, consider configuring `reader_pool_type` and `num_epochs` based on your training requirements.","tag":null,"tag_description":null,"last_tested":null,"results":[]},"compatibility":{"tag":null,"tag_description":null,"last_tested":"2026-05-17","installed_version":"0.13.1","pypi_latest":"0.13.1","is_stale":false,"summary":{"python_range":"3.10–3.9","success_rate":100,"avg_install_s":21,"avg_import_s":null,"wheel_type":"sdist"},"results":[{"runtime":"python:3.10-alpine","python_version":"3.10","os_libc":"alpine (musl)","variant":"petastorm","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":0.1,"import_time_s":null,"mem_mb":null,"disk_size":"845.1M"},{"runtime":"python:3.10-alpine","python_version":"3.10","os_libc":"alpine (musl)","variant":"pytorch","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":0.1,"import_time_s":null,"mem_mb":null,"disk_size":"845.1M"},{"runtime":"python:3.10-alpine","python_version":"3.10","os_libc":"alpine (musl)","variant":"spark","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":0.1,"import_time_s":null,"mem_mb":null,"disk_size":"845.1M"},{"runtime":"python:3.10-alpine","python_version":"3.10","os_libc":"alpine (musl)","variant":"tensorflow","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":0.1,"import_time_s":null,"mem_mb":null,"disk_size":"845.1M"},{"runtime":"python:3.10-slim","python_version":"3.10","os_libc":"slim (glibc)","variant":"petastorm","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":43.2,"import_time_s":null,"mem_mb":null,"disk_size":"810M"},{"runtime":"python:3.10-slim","python_version":"3.10","os_libc":"slim (glibc)","variant":"pytorch","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":43.2,"import_time_s":null,"mem_mb":null,"disk_size":"810M"},{"runtime":"python:3.10-slim","python_version":"3.10","os_libc":"slim (glibc)","variant":"spark","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":43.1,"import_time_s":null,"mem_mb":null,"disk_size":"810M"},{"runtime":"python:3.10-slim","python_version":"3.10","os_libc":"slim (glibc)","variant":"tensorflow","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":43.1,"import_time_s":null,"mem_mb":null,"disk_size":"810M"},{"runtime":"python:3.11-alpine","python_version":"3.11","os_libc":"alpine (musl)","variant":"petastorm","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":0.1,"import_time_s":null,"mem_mb":null,"disk_size":"867.2M"},{"runtime":"python:3.11-alpine","python_version":"3.11","os_libc":"alpine (musl)","variant":"pytorch","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":0.1,"import_time_s":null,"mem_mb":null,"disk_size":"867.2M"},{"runtime":"python:3.11-alpine","python_version":"3.11","os_libc":"alpine (musl)","variant":"spark","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":0.1,"import_time_s":null,"mem_mb":null,"disk_size":"867.2M"},{"runtime":"python:3.11-alpine","python_version":"3.11","os_libc":"alpine (musl)","variant":"tensorflow","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":0.1,"import_time_s":null,"mem_mb":null,"disk_size":"867.2M"},{"runtime":"python:3.11-slim","python_version":"3.11","os_libc":"slim (glibc)","variant":"petastorm","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":40,"import_time_s":null,"mem_mb":null,"disk_size":"831M"},{"runtime":"python:3.11-slim","python_version":"3.11","os_libc":"slim (glibc)","variant":"pytorch","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":41.6,"import_time_s":null,"mem_mb":null,"disk_size":"831M"},{"runtime":"python:3.11-slim","python_version":"3.11","os_libc":"slim (glibc)","variant":"spark","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":41.9,"import_time_s":null,"mem_mb":null,"disk_size":"831M"},{"runtime":"python:3.11-slim","python_version":"3.11","os_libc":"slim (glibc)","variant":"tensorflow","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":40.9,"import_time_s":null,"mem_mb":null,"disk_size":"831M"},{"runtime":"python:3.12-alpine","python_version":"3.12","os_libc":"alpine (musl)","variant":"petastorm","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":0.1,"import_time_s":null,"mem_mb":null,"disk_size":"854.9M"},{"runtime":"python:3.12-alpine","python_version":"3.12","os_libc":"alpine (musl)","variant":"pytorch","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":0.1,"import_time_s":null,"mem_mb":null,"disk_size":"854.9M"},{"runtime":"python:3.12-alpine","python_version":"3.12","os_libc":"alpine (musl)","variant":"spark","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":0.1,"import_time_s":null,"mem_mb":null,"disk_size":"854.9M"},{"runtime":"python:3.12-alpine","python_version":"3.12","os_libc":"alpine (musl)","variant":"tensorflow","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":0.1,"import_time_s":null,"mem_mb":null,"disk_size":"854.9M"},{"runtime":"python:3.12-slim","python_version":"3.12","os_libc":"slim (glibc)","variant":"petastorm","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":41.4,"import_time_s":null,"mem_mb":null,"disk_size":"819M"},{"runtime":"python:3.12-slim","python_version":"3.12","os_libc":"slim (glibc)","variant":"pytorch","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":41.7,"import_time_s":null,"mem_mb":null,"disk_size":"819M"},{"runtime":"python:3.12-slim","python_version":"3.12","os_libc":"slim (glibc)","variant":"spark","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":41.2,"import_time_s":null,"mem_mb":null,"disk_size":"819M"},{"runtime":"python:3.12-slim","python_version":"3.12","os_libc":"slim (glibc)","variant":"tensorflow","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":40.6,"import_time_s":null,"mem_mb":null,"disk_size":"819M"},{"runtime":"python:3.13-alpine","python_version":"3.13","os_libc":"alpine (musl)","variant":"petastorm","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":0.1,"import_time_s":null,"mem_mb":null,"disk_size":"853.4M"},{"runtime":"python:3.13-alpine","python_version":"3.13","os_libc":"alpine (musl)","variant":"pytorch","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":0.1,"import_time_s":null,"mem_mb":null,"disk_size":"853.4M"},{"runtime":"python:3.13-alpine","python_version":"3.13","os_libc":"alpine (musl)","variant":"spark","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":0.1,"import_time_s":null,"mem_mb":null,"disk_size":"853.4M"},{"runtime":"python:3.13-alpine","python_version":"3.13","os_libc":"alpine (musl)","variant":"tensorflow","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":0.1,"import_time_s":null,"mem_mb":null,"disk_size":"853.4M"},{"runtime":"python:3.13-slim","python_version":"3.13","os_libc":"slim (glibc)","variant":"petastorm","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":39.1,"import_time_s":null,"mem_mb":null,"disk_size":"817M"},{"runtime":"python:3.13-slim","python_version":"3.13","os_libc":"slim (glibc)","variant":"pytorch","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":42.2,"import_time_s":null,"mem_mb":null,"disk_size":"817M"},{"runtime":"python:3.13-slim","python_version":"3.13","os_libc":"slim (glibc)","variant":"spark","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":40.5,"import_time_s":null,"mem_mb":null,"disk_size":"817M"},{"runtime":"python:3.13-slim","python_version":"3.13","os_libc":"slim (glibc)","variant":"tensorflow","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":40.7,"import_time_s":null,"mem_mb":null,"disk_size":"817M"},{"runtime":"python:3.9-alpine","python_version":"3.9","os_libc":"alpine (musl)","variant":"petastorm","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":0.1,"import_time_s":null,"mem_mb":null,"disk_size":"812.8M"},{"runtime":"python:3.9-alpine","python_version":"3.9","os_libc":"alpine (musl)","variant":"pytorch","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":0.1,"import_time_s":null,"mem_mb":null,"disk_size":"812.8M"},{"runtime":"python:3.9-alpine","python_version":"3.9","os_libc":"alpine (musl)","variant":"spark","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":0.1,"import_time_s":null,"mem_mb":null,"disk_size":"812.8M"},{"runtime":"python:3.9-alpine","python_version":"3.9","os_libc":"alpine (musl)","variant":"tensorflow","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":0.1,"import_time_s":null,"mem_mb":null,"disk_size":"812.8M"},{"runtime":"python:3.9-slim","python_version":"3.9","os_libc":"slim (glibc)","variant":"petastorm","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":43.7,"import_time_s":null,"mem_mb":null,"disk_size":"786M"},{"runtime":"python:3.9-slim","python_version":"3.9","os_libc":"slim (glibc)","variant":"pytorch","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":43.5,"import_time_s":null,"mem_mb":null,"disk_size":"786M"},{"runtime":"python:3.9-slim","python_version":"3.9","os_libc":"slim (glibc)","variant":"spark","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":43.8,"import_time_s":null,"mem_mb":null,"disk_size":"786M"},{"runtime":"python:3.9-slim","python_version":"3.9","os_libc":"slim (glibc)","variant":"tensorflow","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":43.7,"import_time_s":null,"mem_mb":null,"disk_size":"786M"}]}}