dirhash: Directory Hashing Utility

0.5.0 · active · verified Thu Apr 09

dirhash is a Python module and CLI tool for computing the hash of file system directories based on their structure and content. It supports all hashing algorithms available in Python's `hashlib` module, offers `.gitignore`-style glob/wildcard path matching for filtering files, and leverages multiprocessing for performance. The library computes hashes according to the Dirhash Standard, aiming for consistent and collision-resistant directory hash generation. It is actively maintained with irregular, feature-driven releases, currently at version 0.5.0.

Warnings

Install

Imports

Quickstart

This quickstart demonstrates how to compute directory hashes using different algorithms and filtering options (`match` and `ignore` for `.gitignore` style patterns) using the `dirhash` function. It also shows the effect of including empty directories.

import os
import tempfile
import shutil
from dirhash import dirhash

# Create a temporary directory structure for demonstration
with tempfile.TemporaryDirectory() as tmpdir:
    test_dir = os.path.join(tmpdir, 'my_project')
    os.makedirs(os.path.join(test_dir, 'src'))
    os.makedirs(os.path.join(test_dir, 'data'))

    with open(os.path.join(test_dir, 'src', 'main.py'), 'w') as f:
        f.write('print("Hello, dirhash!")')

    with open(os.path.join(test_dir, 'data', 'config.json'), 'w') as f:
        f.write('{"key": "value"}')

    with open(os.path.join(test_dir, '.gitignore'), 'w') as f:
        f.write('*.json')

    # Calculate the MD5 hash of the entire directory
    full_md5_hash = dirhash(test_dir, 'md5')
    print(f"MD5 hash of {test_dir}: {full_md5_hash}")

    # Calculate SHA1 hash, excluding .json files using .gitignore style patterns
    sha1_hash_no_json = dirhash(test_dir, 'sha1', ignore=['*.json'])
    print(f"SHA1 hash (excluding *.json): {sha1_hash_no_json}")

    # Calculate SHA256 hash, only including .py files
    sha256_hash_only_py = dirhash(test_dir, 'sha256', match=['*.py'])
    print(f"SHA256 hash (only *.py): {sha256_hash_only_py}")

    # Demonstrate including empty directories (default is to exclude if no content included by filters)
    # First, a hash without explicitly including empty dirs
    empty_dir_path = os.path.join(test_dir, 'empty_folder')
    os.makedirs(empty_dir_path)
    hash_without_empty = dirhash(test_dir, 'md5')
    print(f"MD5 hash (without explicit empty dirs): {hash_without_empty}")
    # Now, a hash explicitly including empty dirs
    hash_with_empty = dirhash(test_dir, 'md5', empty_dirs=True)
    print(f"MD5 hash (with empty dirs): {hash_with_empty}")

    # Cleanup is handled by TemporaryDirectory

view raw JSON →