{"id":28064,"library":"psmpy","title":"psmpy - Propensity Score Matching for Python","description":"psmpy provides propensity score matching for observational studies, including functions to compute propensity scores, perform matching (with/without replacement, caliper), and generate graphical plots (balancing, histogram). Version 0.3.16 is the latest release. Release cadence is low; last update was 2024.","status":"active","version":"0.3.16","language":"python","source_language":"en","source_url":"https://github.com/adriennekline/psmpy","tags":["propensity-score-matching","causal-inference","observational-study","statistics"],"install":[{"cmd":"pip install psmpy","lang":"bash","label":"PyPI"}],"dependencies":[{"reason":"Data handling for propensity score computation and matching","package":"pandas","optional":false},{"reason":"Numerical operations","package":"numpy","optional":false},{"reason":"Logistic regression for propensity score estimation","package":"scikit-learn","optional":false},{"reason":"Plotting balance and histogram graphs","package":"matplotlib","optional":false}],"imports":[{"note":"Direct import fails; must import the class explicitly.","wrong":"import psmpy","symbol":"Psmpy","correct":"from psmpy import Psmpy"},{"note":"Older documentation suggested submodule path; top-level import works.","wrong":"from psmpy.psmpy import Psmpy","symbol":"Psmpy","correct":"from psmpy import Psmpy"}],"quickstart":{"code":"import pandas as pd\nfrom psmpy import Psmpy\nfrom sklearn.linear_model import LogisticRegression\n\n# Sample data\ndf = pd.DataFrame({\n    'treatment': [0, 1, 0, 1, 0, 1],\n    'age': [30, 40, 35, 45, 25, 50],\n    'income': [50000, 60000, 55000, 65000, 45000, 70000]\n})\n\n# Initialize Psmpy\npsm = Psmpy(data=df, treatment='treatment', indx='age',\n            exclude_cols=['income'],\n            logistic_model=LogisticRegression(solver='liblinear'))\n\n# Compute propensity scores\npsm.pscore()\n\n# Perform matching\npsm.match(method='nearest', caliper=None, replace=False)\n\n# View matched pairs\nprint(psm.matched_ids)","lang":"python","description":"Basic workflow: initialize with data, compute scores, match, and inspect matches."},"warnings":[{"fix":"Use 'indx' instead of 'idcol' when initializing Psmpy.","message":"In version 0.3.15 and earlier, the 'indx' parameter was called 'idcol'. It was renamed in 0.3.16. Using 'idcol' raises TypeError.","severity":"breaking","affected_versions":">=0.3.16"},{"fix":"Explicitly instantiate LogisticRegression(solver='liblinear').","message":"The 'logistic_model' parameter expects a scikit-learn LogisticRegression instance with solver='liblinear' (or similar) that supports predict_proba. Using default solver may raise warnings or errors on small datasets.","severity":"gotcha","affected_versions":"all"},{"fix":"List all columns that are not predictors in 'exclude_cols'.","message":"The 'exclude_cols' parameter should include any columns not used for propensity score estimation (e.g., outcome variables, IDs). Forgetting to exclude the treatment column can cause errors.","severity":"gotcha","affected_versions":"all"}],"env_vars":null,"last_verified":"2026-05-09T00:00:00.000Z","next_check":"2026-08-07T00:00:00.000Z","problems":[{"fix":"Use 'indx' parameter instead of 'idcol'.","cause":"Parameter 'idcol' renamed to 'indx' in version 0.3.16.","error":"TypeError: __init__() got an unexpected keyword argument 'idcol'"},{"fix":"Use 'from psmpy import Psmpy'.","cause":"Wrong import statement; psmpy is a package, not a module.","error":"AttributeError: module 'psmpy' has no attribute 'Psmpy'"}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}