{"library":"swebench","type":"library","category":null,"description":"The official SWE-bench package (current version 4.1.0) provides a benchmark for evaluating large language models (LLMs) on software engineering tasks. It focuses on automatically testing model-generated code fixes against real-world software bugs and is actively developed with frequent updates, often involving significant changes between major versions.","language":"python","status":"active","version":"4.1.0","tags":["LLM","benchmark","software-engineering","evaluation","AI","NLP"],"last_verified":"Wed May 20","install":[{"cmd":"pip install swebench","imports":["from swebench import get_tasks","from swebench.harness.runner import SWEBenchRunner","from swebench.harness.engine_wrappers import ModelEngine"]}],"homepage":"https://swebench.com","github":"http://github.com/swe-bench/SWE-bench","docs":"https://github.com/swe-bench/SWE-bench","changelog":null,"pypi":"https://pypi.org/project/swebench/","npm":null,"openapi_spec":null,"status_page":null,"smithery":null,"compatibility":{"summary":{"python_range":"3.10–3.9","success_rate":50,"avg_install_s":20.6,"avg_import_s":null,"wheel_type":"wheel"},"url":"https://checklist.day/v1/registry/swebench/compatibility"}}