pyhf - Pure-Python HistFactory

0.7.6 · active · verified Thu Apr 16

pyhf is a pure-Python implementation of the HistFactory statistical model for binned data analysis, widely used in particle physics. It leverages modern tensor libraries like NumPy, TensorFlow, PyTorch, and JAX with automatic differentiation for efficient and scalable statistical inference. The current version is 0.7.6, and it follows a regular release cadence with patch releases addressing fixes and minor improvements, and minor versions introducing new features and sometimes API changes.

Common errors

Warnings

Install

Imports

Quickstart

This quickstart demonstrates how to define a simple HistFactory workspace, set a backend, create a pyhf model, and perform a basic maximum likelihood fit to extract best-fit parameters and their uncertainties. It also shows how to set and fix specific parameters for hypothesis testing.

import pyhf
import json

# Define a simple workspace (example adapted from pyhf documentation)
workspace_data = {
  "channels": [
    {
      "name": "singlechannel",
      "samples": [
        {
          "name": "signal",
          "data": [12.0],
          "modifiers": [
            {"name": "mu", "type": "normfactor", "data": None},
            {"name": "lumi", "type": "lumi", "data": {"correlated": True, "nom_data": 1.0, "rel_data": 0.1}}
          ]
        },
        {
          "name": "background",
          "data": [100.0],
          "modifiers": [
            {"name": "lumi", "type": "lumi", "data": {"correlated": True, "nom_data": 1.0, "rel_data": 0.1}},
            {"name": "bkg_norm", "type": "normfactor", "data": None}
          ]
        }
      ]
    }
  ],
  "observations": [
    {
      "name": "singlechannel",
      "data": [120.0],
      "modifier_data": [
        {"name": "lumi", "type": "lumi", "data": 1.0}
      ]
    }
  ]
}

# Set the backend (e.g., 'numpy', 'tensorflow', 'torch', or 'jax')
pyhf.set_backend("numpy")

# Create a model from the workspace data
workspace = pyhf.Workspace(workspace_data)
model = workspace.model(modifier_settings={'lumi': {'type': 'lumi', 'decorrelate': False}})

# Prepare data and initial parameters for the fit
# The model's data method handles observation and auxiliary data.
actual_data = model.data(workspace.data)
init_pars = model.config.suggested_init()
fixed_pars = model.config.suggested_fixed()
bounds = model.config.suggested_bounds()

# Perform Maximum Likelihood Estimation (MLE)
# This fits the model to the data to find the best-fit parameters.
fit_results = pyhf.infer.mle.fit(
    data=actual_data,
    pdf=model,
    init_pars=init_pars,
    fixed_params=fixed_pars,
    par_bounds=bounds
)

print(f"Fitted parameters: {fit_results[0]}")
print(f"Parameter uncertainties: {fit_results[1]}")

# Example for hypothesis testing: fixing 'mu' (signal strength) to 0 (background-only hypothesis)
mu_index = model.config.modifier_index('mu') # Get index of the 'mu' parameter

bkg_only_init_pars = list(init_pars) # Create a mutable copy
bkg_only_init_pars[mu_index] = 0.0 # Set mu to 0

bkg_only_fixed_params = list(fixed_pars) # Create a mutable copy
bkg_only_fixed_params[mu_index] = True # Fix mu at 0

bkg_only_fit_results = pyhf.infer.mle.fit(
    data=actual_data,
    pdf=model,
    init_pars=bkg_only_init_pars,
    fixed_params=bkg_only_fixed_params,
    par_bounds=bounds
)
print(f"Fitted parameters (mu=0 fixed): {bkg_only_fit_results[0]}")

view raw JSON →