{"library":"stable-baselines3","title":"Stable Baselines3","type":"library","description":"Stable Baselines3 (SB3) is a comprehensive Python library offering reliable implementations of reinforcement learning (RL) algorithms in PyTorch. It provides a clean and simple API, adhering to a scikit-learn-like syntax for training, evaluating, and deploying RL agents. SB3 is actively maintained with frequent releases, supporting state-of-the-art model-free RL algorithms like A2C, PPO, SAC, DQN, and TD3.","language":"python","status":"active","last_verified":"Fri May 22","install":{"commands":["pip install stable-baselines3 gymnasium","pip install stable-baselines3[extra] gymnasium"],"cli":null},"imports":["from stable_baselines3 import PPO","from stable_baselines3 import A2C","from stable_baselines3 import SAC","from stable_baselines3 import DQN","from stable_baselines3.common.env_util import make_vec_env","from stable_baselines3.common.evaluation import evaluate_policy"],"auth":{"required":false,"env_vars":[]},"links":{"homepage":"https://stable-baselines3.readthedocs.io","github":"https://github.com/DLR-RM/stable-baselines3","docs":"https://stable-baselines3.readthedocs.io/","changelog":"https://stable-baselines3.readthedocs.io/en/master/misc/changelog.html","pypi":"https://pypi.org/project/stable-baselines3/","npm":null,"openapi_spec":null,"status_page":null,"smithery":null},"quickstart":{"code":"import gymnasium as gym\nfrom stable_baselines3 import A2C\n\n# Create environment\nenv = gym.make(\"CartPole-v1\")\n\n# Instantiate the agent\nmodel = A2C(\"MlpPolicy\", env, verbose=1)\n\n# Train the agent\nmodel.learn(total_timesteps=10000)\n\n# Save the model\nmodel.save(\"a2c_cartpole\")\n\n# Delete model and reload it to demonstrate saving and loading\ndel model\nmodel = A2C.load(\"a2c_cartpole\")\n\n# Evaluate the trained agent\nobs, info = env.reset()\nfor i in range(1000):\n    action, _states = model.predict(obs, deterministic=True)\n    obs, reward, terminated, truncated, info = env.step(action)\n    if terminated or truncated:\n        obs, info = env.reset()\nenv.close()\n","lang":"python","description":"This quickstart demonstrates how to create a Gymnasium environment, instantiate an A2C agent, train it for a specified number of timesteps, save and load the trained model, and finally evaluate its performance.","tag":null,"tag_description":null,"last_tested":null,"results":[]},"compatibility":{"tag":null,"tag_description":null,"last_tested":"2026-05-22","installed_version":"2.8.0","pypi_latest":"2.8.0","is_stale":false,"summary":{"python_range":"3.10–3.9","success_rate":40,"avg_install_s":77.9,"avg_import_s":9.5,"wheel_type":"wheel"},"results":[{"runtime":"python:3.10-alpine","python_version":"3.10","os_libc":"alpine (musl)","variant":"stable-baselines3","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.10-alpine","python_version":"3.10","os_libc":"alpine (musl)","variant":"extra","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.10-slim","python_version":"3.10","os_libc":"slim (glibc)","variant":"stable-baselines3","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":86.1,"import_time_s":7.32,"mem_mb":101.2,"disk_size":"4.9G"},{"runtime":"python:3.10-slim","python_version":"3.10","os_libc":"slim (glibc)","variant":"extra","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":93.2,"import_time_s":7.88,"mem_mb":110.9,"disk_size":"5.2G"},{"runtime":"python:3.11-alpine","python_version":"3.11","os_libc":"alpine (musl)","variant":"stable-baselines3","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.11-alpine","python_version":"3.11","os_libc":"alpine (musl)","variant":"extra","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.11-slim","python_version":"3.11","os_libc":"slim (glibc)","variant":"stable-baselines3","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":77.7,"import_time_s":10.64,"mem_mb":111.7,"disk_size":"4.9G"},{"runtime":"python:3.11-slim","python_version":"3.11","os_libc":"slim (glibc)","variant":"extra","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":87,"import_time_s":10.51,"mem_mb":121.1,"disk_size":"5.2G"},{"runtime":"python:3.12-alpine","python_version":"3.12","os_libc":"alpine (musl)","variant":"stable-baselines3","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.12-alpine","python_version":"3.12","os_libc":"alpine (musl)","variant":"extra","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.12-slim","python_version":"3.12","os_libc":"slim (glibc)","variant":"stable-baselines3","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":68.5,"import_time_s":10.87,"mem_mb":109.5,"disk_size":"4.9G"},{"runtime":"python:3.12-slim","python_version":"3.12","os_libc":"slim (glibc)","variant":"extra","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":77.3,"import_time_s":10.19,"mem_mb":119.2,"disk_size":"5.2G"},{"runtime":"python:3.13-alpine","python_version":"3.13","os_libc":"alpine (musl)","variant":"stable-baselines3","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.13-alpine","python_version":"3.13","os_libc":"alpine (musl)","variant":"extra","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.13-slim","python_version":"3.13","os_libc":"slim (glibc)","variant":"stable-baselines3","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":64.3,"import_time_s":8.87,"mem_mb":111.4,"disk_size":"4.9G"},{"runtime":"python:3.13-slim","python_version":"3.13","os_libc":"slim (glibc)","variant":"extra","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":69.4,"import_time_s":9.72,"mem_mb":118.9,"disk_size":"5.2G"},{"runtime":"python:3.9-alpine","python_version":"3.9","os_libc":"alpine (musl)","variant":"stable-baselines3","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.9-alpine","python_version":"3.9","os_libc":"alpine (musl)","variant":"extra","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.9-slim","python_version":"3.9","os_libc":"slim (glibc)","variant":"stable-baselines3","exit_code":1,"wheel_type":null,"failure_reason":"timeout","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.9-slim","python_version":"3.9","os_libc":"slim (glibc)","variant":"extra","exit_code":1,"wheel_type":null,"failure_reason":"timeout","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null}]}}