{"id":23148,"library":"pybaseball","title":"pybaseball","description":"pybaseball is a Python library for retrieving and analyzing baseball data from sources like Baseball Savant, FanGraphs, and Baseball-Reference. Version 2.2.7 (current) fixes FanGraphs leaderboard URLs and adds new features like PitchingBot/Stuff+ stat enums and strike zone plotting. Release cadence is irregular, with minor patches every few months.","status":"active","version":"2.2.7","language":"python","source_language":"en","source_url":"https://github.com/jldbc/pybaseball","tags":["baseball","statcast","fangraphs","baseball-reference","sports analytics","data scraping"],"install":[{"cmd":"pip install pybaseball","lang":"bash","label":"Install from PyPI"}],"dependencies":[{"reason":"Data manipulation and DataFrame returns","package":"pandas","optional":false},{"reason":"HTTP requests to baseball data sources","package":"requests","optional":false},{"reason":"Numerical operations","package":"numpy","optional":false},{"reason":"HTML parsing for web scraping","package":"lxml","optional":false}],"imports":[{"note":"statcast is a top-level function, not a submodule","wrong":"from pybaseball.statcast import statcast","symbol":"statcast","correct":"from pybaseball import statcast"},{"note":"batting_stats is a top-level function; direct submodule imports may break","wrong":"from pybaseball.fangraphs import batting_stats","symbol":"batting_stats","correct":"from pybaseball import batting_stats"},{"note":"player lookup is at top level","wrong":"from pybaseball.lookup import playerid_lookup","symbol":"playerid_lookup","correct":"from pybaseball import playerid_lookup"}],"quickstart":{"code":"from pybaseball import statcast\nimport pandas as pd\n# Disable cache to avoid stale data\nfrom pybaseball import cache\ndf = statcast(start_dt='2024-05-01', end_dt='2024-05-02')\nprint(df.head())","lang":"python","description":"Retrieve Statcast data for two days in May 2024. Cache is disabled by default, but if enabled, use cache.disable() to ensure fresh data."},"warnings":[{"fix":"Inspect df.columns after fetching and handle missing/renamed columns gracefully.","message":"Statcast data schema changes frequently: column names, data types, and null handling can change without notice. Always check the actual columns after fetching.","severity":"breaking","affected_versions":"all"},{"fix":"Upgrade to pybaseball>=2.2.6.","message":"FanGraphs leaderboard URL changed in v2.2.6; older versions cannot retrieve FanGraphs data.","severity":"breaking","affected_versions":"< 2.2.6"},{"fix":"Use Python 3.8+.","message":"Python 3.6 support dropped in v2.2.5; 3.7 also dropped later.","severity":"deprecated","affected_versions":">= 2.2.5"},{"fix":"Use cache.enable() then cache.disable() or delete cache files manually.","message":"Caching is disabled by default, but enabling it can cause stale data if not cleared. Manual cache clearing is required.","severity":"gotcha","affected_versions":">= 2.1.0"},{"fix":"Wrap calls in retry logic; check GitHub issues for known outages.","message":"Web scraping can be unreliable: frequent HTTP errors (429, 503) and HTML structure changes may break scraping functions.","severity":"gotcha","affected_versions":"all"}],"env_vars":null,"last_verified":"2026-05-01T00:00:00.000Z","next_check":"2026-07-30T00:00:00.000Z","problems":[{"fix":"Wait and retry, or check the pybaseball GitHub issue tracker for known API issues.","cause":"Statcast API endpoint temporarily down or changed.","error":"ValueError: URL does not return valid JSON"},{"fix":"Use from pybaseball import statcast and upgrade to latest version.","cause":"Importing from submodule instead of top level, or very old version (<2.0.0).","error":"AttributeError: module 'pybaseball' has no attribute 'statcast'"},{"fix":"Add delays between requests (time.sleep(1)). Or use cached data after first successful fetch.","cause":"Exceeding rate limits on Baseball Savant or FanGraphs.","error":"HTTPError: 429 Client Error: Too Many Requests"},{"fix":"Check df.columns for available columns; the 'events' column may be named differently or absent for certain date ranges.","cause":"Statcast data column missing; schema changed.","error":"KeyError: 'events'"}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}