h5py

3.16.0 · active · verified Sat Mar 28

The h5py package provides a Pythonic interface to the HDF5 binary data format, allowing users to store and manipulate large amounts of numerical data efficiently, often integrating seamlessly with NumPy arrays. It offers both high-level and low-level access to HDF5 files, datasets, and groups. The current version is 3.16.0, with development actively maintained through frequent releases.

Warnings

Install

Imports

Quickstart

This quickstart demonstrates how to create an HDF5 file, add groups and datasets, store NumPy arrays, attach metadata as attributes, and then read the data and attributes back. It emphasizes using context managers (`with h5py.File(...)`) for proper file handling.

import h5py
import numpy as np
import os

file_path = 'my_data.h5'

# Create a new HDF5 file (mode 'w' will overwrite if exists)
with h5py.File(file_path, 'w') as f:
    # Create a group (like a directory)
    group = f.create_group('my_group')
    
    # Create a dataset within the group (like a NumPy array)
    data = np.arange(100).reshape(10, 10)
    dset = group.create_dataset('dataset_1', data=data)
    
    # Add attributes to the dataset (metadata)
    dset.attrs['units'] = 'arbitrary'
    dset.attrs['description'] = 'Sample 2D integer array'
    
    # You can also create datasets directly at the root level
    f.create_dataset('another_dataset', data=np.random.rand(5))

print(f"File '{file_path}' created successfully.")

# Read data from the HDF5 file
with h5py.File(file_path, 'r') as f:
    # List all top-level objects
    print(f"\nKeys in file: {list(f.keys())}")
    
    # Access a group
    group_read = f['my_group']
    print(f"Keys in 'my_group': {list(group_read.keys())}")
    
    # Access a dataset
    dset_read = group_read['dataset_1']
    
    # Read data into memory (using array-style slicing for the whole dataset)
    read_data = dset_read[()]
    print(f"\nShape of read_data: {read_data.shape}")
    print(f"First 5 elements of read_data: {read_data.flatten()[:5]}")
    
    # Access attributes
    print(f"Units attribute: {dset_read.attrs['units']}")
    
    # Read a slice of the data
    slice_data = dset_read[0:5, 0:5]
    print(f"Slice (0:5, 0:5) of dataset_1:\n{slice_data}")

# Clean up the created file
os.remove(file_path)

view raw JSON →