PyTables (Hierarchical Datasets)

3.11.1 · active · verified Fri Apr 10

PyTables is a Python library for managing hierarchical datasets, designed for efficient handling of extremely large amounts of data. It builds on the HDF5 library and NumPy, providing high-performance I/O for scientific data. The current version is 3.11.1, and it maintains an active release cadence with regular updates.

Warnings

Install

Imports

Quickstart

This quickstart demonstrates how to create an HDF5 file, define a table structure using `IsDescription`, create a table, append data, and read data using PyTables. It also shows the importance of using context managers (`with`) for file handling to ensure proper closing.

import tables as tb
import numpy as np
import os

# Define a table description
class MyTableDescription(tb.IsDescription):
    col1 = tb.StringCol(16, pos=1)
    col2 = tb.Int32Col(pos=2)
    col3 = tb.Float64Col(pos=3)

filename = "mytable.h5"
if os.path.exists(filename):
    os.remove(filename)

try:
    # Open the HDF5 file in write mode
    with tb.open_file(filename, mode="w", title="Test File") as h5f:
        # Create a group for organization
        group = h5f.create_group(h5f.root, "data")

        # Create a table within the group
        table = h5f.create_table(group, 'table1', MyTableDescription, "My First Table")

        # Append data to the table
        table.append([("row_a", 1, 1.1), ("row_b", 2, 2.2)])
        table.flush() # Ensure data is written to disk

        print("\nData in table1 after first append:")
        for row in table.iterrows():
            print(f"  col1: {row['col1']}, col2: {row['col2']}, col3: {row['col3']}")

        # Add more data
        table.append([("row_c", 3, 3.3), ("row_d", 4, 4.4)])
        table.flush()

        print("\nAll data in table1 (as NumPy record array):")
        print(table[:]) # Read all data into a NumPy record array

    print(f"\nSuccessfully created and written to {filename}")

    # Re-open the file in read mode to verify
    with tb.open_file(filename, mode="r") as h5f_read:
        read_table = h5f_read.root.data.table1
        print("\nData read from file:")
        for row in read_table.iterrows():
            print(f"  col1: {row['col1']}, col2: {row['col2']}, col3: {row['col3']}")

except Exception as e:
    print(f"An error occurred: {e}")
finally:
    # Clean up the created file
    if os.path.exists(filename):
        os.remove(filename)
        print(f"Cleaned up {filename}")

view raw JSON →