Git Filter Repo

2.47.0 · active · verified Tue Apr 14

git-filter-repo is a powerful and fast tool for rewriting Git repository history, designed as a modern and more efficient replacement for the deprecated `git filter-branch`. It is currently at version 2.47.0 and receives active development, with releases typically occurring every few weeks to months, addressing common repository maintenance tasks like removing sensitive data, extracting subdirectories, and reorganizing history.

Warnings

Install

Imports

Quickstart

This quickstart demonstrates how to use `git-filter-repo` via `subprocess` to remove a specific file (`secret.txt`) from a repository's entire history. It sets up a temporary Git repository, adds a 'secret' file, commits it, then uses `git-filter-repo --path-rename secret.txt:--delete --force` to eradicate its presence. Note that `git-filter-repo` modifies history irreversibly, so always back up your repository before use in production.

import subprocess
import os
import shutil

# --- Setup: Create a dummy repo for demonstration ---
repo_name = "test_repo_filter"
repo_path = os.path.join(os.getcwd(), repo_name)

# Clean up previous run if exists
if os.path.exists(repo_path):
    shutil.rmtree(repo_path)

os.makedirs(repo_path)
os.chdir(repo_path)

subprocess.run(["git", "init", "-b", "main"], check=True, capture_output=True)
subprocess.run(["git", "config", "user.email", "test@example.com"], check=True, capture_output=True)
subprocess.run(["git", "config", "user.name", "Test User"], check=True, capture_output=True)

with open("file1.txt", "w") as f:
    f.write("initial content")
subprocess.run(["git", "add", "file1.txt"], check=True, capture_output=True)
subprocess.run(["git", "commit", "-m", "Initial commit"], check=True, capture_output=True)

with open("secret.txt", "w") as f:
    f.write("super secret info")
subprocess.run(["git", "add", "secret.txt"], check=True, capture_output=True)
subprocess.run(["git", "commit", "-m", "Add secret file"], check=True, capture_output=True)

with open("file1.txt", "a") as f:
    f.write("\nmore content")
subprocess.run(["git", "add", "file1.txt"], check=True, capture_output=True)
subprocess.run(["git", "commit", "-m", "Update file1"], check=True, capture_output=True)

print("Original log (last 3 commits):")
subprocess.run(["git", "log", "--oneline", "-3"], check=True)

print("\n--- Running git-filter-repo to remove 'secret.txt' ---")
# IMPORTANT: git-filter-repo *modifies history irreversibly*. Always back up your repository.
# For this demo, we run directly. In a real scenario, consider cloning a backup first.

try:
    # Ensure a clean working directory, which git-filter-repo often requires.
    subprocess.run(["git", "reset", "--hard"], check=True, capture_output=True)
    
    # The actual filter-repo command to remove 'secret.txt' from all history.
    # '--force' is often needed to bypass safety checks in non-fresh clones or testing.
    filter_repo_cmd = ["git-filter-repo", "--path-rename", "secret.txt:--delete", "--force"]
    print(f"Executing: {' '.join(filter_repo_cmd)}")
    subprocess.run(filter_repo_cmd, check=True)

    print("\nFiltered log (last 3 commits):")
    subprocess.run(["git", "log", "--oneline", "-3"], check=True)

    # Verify the file is gone and not in history
    search_log_cmd = ["git", "log", "--all", "--", "secret.txt"]
    result = subprocess.run(search_log_cmd, capture_output=True, text=True)
    if not result.stdout:
        print("\n'secret.txt' successfully removed from history.")
    else:
        print("\nERROR: 'secret.txt' still found in history. Output:\n" + result.stdout)

except subprocess.CalledProcessError as e:
    print(f"Error running git-filter-repo: {e}")
    print(f"Stdout: {e.stdout.decode()}")
    print(f"Stderr: {e.stderr.decode()}")

finally:
    # Clean up the dummy repo
    os.chdir("../")
    if os.path.exists(repo_path):
        shutil.rmtree(repo_path)

view raw JSON →