{"library":"lm-eval","type":"library","category":null,"description":"LM Evaluation Harness (lm-eval) is a comprehensive framework for evaluating language models on a wide range of benchmarks and tasks. It supports various model backends (HuggingFace, vLLM, SGLang, etc.) and provides a standardized way to compare model performance. The current version is 0.4.11, and it maintains a rapid release cadence with frequent minor updates and occasional breaking changes.","language":"python","status":"active","version":"0.4.11","tags":["LLM","evaluation","NLP","machine-learning","benchmark"],"last_verified":"Fri May 22","install":[{"cmd":"pip install \"lm-eval[main]\"","imports":["from lm_eval import models","from lm_eval import tasks","from lm_eval import evaluator"]},{"cmd":"pip install lm-eval # Core only\npip install \"lm-eval[hf]\" # Add HuggingFace backend","imports":[]}],"homepage":null,"github":"https://github.com/EleutherAI/lm-evaluation-harness","docs":null,"changelog":null,"pypi":"https://pypi.org/project/lm-eval/","npm":null,"openapi_spec":null,"status_page":null,"smithery":null,"compatibility":{"summary":{"python_range":"3.10–3.9","success_rate":40,"avg_install_s":34.2,"avg_import_s":0.15,"wheel_type":"sdist"},"url":"https://checklist.day/v1/registry/lm-eval/compatibility"}}