PyTables (Hierarchical Datasets)
PyTables is a Python library for managing hierarchical datasets, designed for efficient handling of extremely large amounts of data. It builds on the HDF5 library and NumPy, providing high-performance I/O for scientific data. The current version is 3.11.1, and it maintains an active release cadence with regular updates.
Warnings
- breaking The `File.rename_node()` and `File.move_node()` methods no longer return the new node. They now return `None`.
- deprecated The `obj` argument for `File.create_table()` is deprecated.
- gotcha The default compression level for `zlib` and `blosc` filters changed from `zlib.Z_DEFAULT_COMPRESSION` to `1`.
- gotcha Always ensure `tables.File` objects are properly closed to prevent data corruption, resource leaks, or incomplete writes.
Install
-
pip install tables
Imports
- tables
import tables
- open_file
tables.open_file
- IsDescription
tables.IsDescription
Quickstart
import tables as tb
import numpy as np
import os
# Define a table description
class MyTableDescription(tb.IsDescription):
col1 = tb.StringCol(16, pos=1)
col2 = tb.Int32Col(pos=2)
col3 = tb.Float64Col(pos=3)
filename = "mytable.h5"
if os.path.exists(filename):
os.remove(filename)
try:
# Open the HDF5 file in write mode
with tb.open_file(filename, mode="w", title="Test File") as h5f:
# Create a group for organization
group = h5f.create_group(h5f.root, "data")
# Create a table within the group
table = h5f.create_table(group, 'table1', MyTableDescription, "My First Table")
# Append data to the table
table.append([("row_a", 1, 1.1), ("row_b", 2, 2.2)])
table.flush() # Ensure data is written to disk
print("\nData in table1 after first append:")
for row in table.iterrows():
print(f" col1: {row['col1']}, col2: {row['col2']}, col3: {row['col3']}")
# Add more data
table.append([("row_c", 3, 3.3), ("row_d", 4, 4.4)])
table.flush()
print("\nAll data in table1 (as NumPy record array):")
print(table[:]) # Read all data into a NumPy record array
print(f"\nSuccessfully created and written to {filename}")
# Re-open the file in read mode to verify
with tb.open_file(filename, mode="r") as h5f_read:
read_table = h5f_read.root.data.table1
print("\nData read from file:")
for row in read_table.iterrows():
print(f" col1: {row['col1']}, col2: {row['col2']}, col3: {row['col3']}")
except Exception as e:
print(f"An error occurred: {e}")
finally:
# Clean up the created file
if os.path.exists(filename):
os.remove(filename)
print(f"Cleaned up {filename}")