{"library":"pydantic-evals","title":"Pydantic Evals","description":"Pydantic Evals is a framework for defining and executing evaluations of stochastic code, particularly useful for LLM-based applications. It allows users to create datasets, define custom evaluators, and run evaluations to assess model performance and behavior. It is part of the broader Pydantic AI ecosystem, currently at version 1.78.0, with a rapid release cadence reflecting active development.","language":"python","status":"active","last_verified":"Wed May 20","install":{"commands":["pip install pydantic-evals","pip install pydantic-ai[openai]"],"cli":{"name":"pydantic_evals","version":"sh: 1: pydantic_evals: not found"}},"imports":["from pydantic_evals import Dataset","from pydantic_evals import Evaluator","from pydantic_evals import LLMProvider","from pydantic_evals import Case","from pydantic_evals import Evaluation"],"auth":{"required":false,"env_vars":[]},"quickstart":{"code":"import os\nfrom pydantic_evals import Dataset, Evaluation, Evaluator, LLMProvider, Case\nfrom typing import ClassVar\n\n# 1. Define your LLM provider (mocked for a runnable example without API keys)\nclass MockLLM(LLMProvider):\n    name: ClassVar[str] = \"mock_llm\"\n    model_name: ClassVar[str] = \"mock_model\"\n\n    def get_completion(self, prompt: str) -> str:\n        if \"capital of France\" in prompt:\n            return \"The capital of France is Paris.\"\n        elif \"Python programming\" in prompt:\n            return \"Python is named after the British sketch comedy group Monty Python.\"\n        return f\"Mock LLM response to: {prompt[:50]}...\"\n\n# 2. Define your evaluation logic\nclass SimpleKeywordEvaluator(Evaluator):\n    def evaluate_case(self, case: Case, actual_output: str, llm: MockLLM) -> Evaluation:\n        # For simplicity, check for specific keywords based on input\n        score = 0.0\n        if \"capital of France\" in case.input and \"Paris\" in actual_output:\n            score = 1.0\n        elif \"Python programming\" in case.input and \"Python\" in actual_output:\n            score = 1.0\n        return Evaluation(score=score, details={\"actual_output\": actual_output})\n\n# 3. Create a Dataset with evaluation cases\ndataset = Dataset(\n    cases=[\n        Case(input=\"What is the capital of France?\"),\n        Case(input=\"Tell me a fun fact about Python programming.\"),\n        Case(input=\"What is 2 + 2?\") # This case is designed to fail the evaluator\n    ]\n)\n\n# 4. Run the evaluation\nif __name__ == \"__main__\":\n    llm = MockLLM()\n    results = dataset.evaluate(\n        llm=llm,\n        evaluators=[SimpleKeywordEvaluator()],\n        batch_size=1,\n        num_workers=1,\n    )\n    print(\"\\nEvaluation Summary:\")\n    for result in results:\n        print(f\"  Input: '{result.case.input}'\")\n        print(f\"  Output: '{result.evaluations[0].details['actual_output']}'\")\n        print(f\"  Score: {result.evaluations[0].score}\")\n        print(f\"  Total Case Score: {result.score}\")","lang":"python","description":"This example demonstrates how to define a mock LLM provider, a simple keyword-based evaluator, create a dataset of test cases, and run an evaluation. For real LLM interactions, replace `MockLLM` with an actual `pydantic-ai` LLM client and ensure API keys are set.","tag":null,"tag_description":null,"last_tested":"2026-04-24","results":[{"runtime":"python:3.10-alpine","exit_code":1},{"runtime":"python:3.10-slim","exit_code":1},{"runtime":"python:3.11-alpine","exit_code":1},{"runtime":"python:3.11-slim","exit_code":1},{"runtime":"python:3.12-alpine","exit_code":1},{"runtime":"python:3.12-slim","exit_code":1},{"runtime":"python:3.13-alpine","exit_code":1},{"runtime":"python:3.13-slim","exit_code":1},{"runtime":"python:3.9-alpine","exit_code":1},{"runtime":"python:3.9-slim","exit_code":1}]},"compatibility":{"tag":null,"tag_description":null,"last_tested":"2026-05-20","installed_version":"0.8.1","pypi_latest":"1.99.0","is_stale":true,"summary":{"python_range":"3.10–3.9","success_rate":70,"avg_install_s":18.8,"avg_import_s":3.38,"wheel_type":"wheel"},"results":[{"runtime":"python:3.10-alpine","python_version":"3.10","os_libc":"alpine (musl)","variant":"openai","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.10-alpine","python_version":"3.10","os_libc":"alpine (musl)","variant":"openai","exit_code":1,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.10-alpine","python_version":"3.10","os_libc":"alpine (musl)","variant":"pydantic-evals","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":null,"import_time_s":2.04,"mem_mb":30.5,"disk_size":"58.2M"},{"runtime":"python:3.10-alpine","python_version":"3.10","os_libc":"alpine (musl)","variant":"pydantic-evals","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":2.21,"mem_mb":30.6,"disk_size":"57.3M"},{"runtime":"python:3.10-slim","python_version":"3.10","os_libc":"slim (glibc)","variant":"openai","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":37.6,"import_time_s":5.53,"mem_mb":64.4,"disk_size":"358M"},{"runtime":"python:3.10-slim","python_version":"3.10","os_libc":"slim (glibc)","variant":"openai","exit_code":1,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.10-slim","python_version":"3.10","os_libc":"slim (glibc)","variant":"pydantic-evals","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":6.6,"import_time_s":1.46,"mem_mb":30.5,"disk_size":"59M"},{"runtime":"python:3.10-slim","python_version":"3.10","os_libc":"slim (glibc)","variant":"pydantic-evals","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":1.46,"mem_mb":30.6,"disk_size":"58M"},{"runtime":"python:3.11-alpine","python_version":"3.11","os_libc":"alpine (musl)","variant":"openai","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.11-alpine","python_version":"3.11","os_libc":"alpine (musl)","variant":"openai","exit_code":1,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.11-alpine","python_version":"3.11","os_libc":"alpine (musl)","variant":"pydantic-evals","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":null,"import_time_s":2.85,"mem_mb":33.5,"disk_size":"64.4M"},{"runtime":"python:3.11-alpine","python_version":"3.11","os_libc":"alpine (musl)","variant":"pydantic-evals","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":3.42,"mem_mb":33.8,"disk_size":"63.4M"},{"runtime":"python:3.11-slim","python_version":"3.11","os_libc":"slim (glibc)","variant":"openai","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":38.2,"import_time_s":7.39,"mem_mb":69.5,"disk_size":"385M"},{"runtime":"python:3.11-slim","python_version":"3.11","os_libc":"slim (glibc)","variant":"openai","exit_code":1,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.11-slim","python_version":"3.11","os_libc":"slim (glibc)","variant":"pydantic-evals","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":6,"import_time_s":2.53,"mem_mb":33.5,"disk_size":"65M"},{"runtime":"python:3.11-slim","python_version":"3.11","os_libc":"slim (glibc)","variant":"pydantic-evals","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":2.73,"mem_mb":33.8,"disk_size":"64M"},{"runtime":"python:3.12-alpine","python_version":"3.12","os_libc":"alpine (musl)","variant":"openai","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.12-alpine","python_version":"3.12","os_libc":"alpine (musl)","variant":"openai","exit_code":1,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.12-alpine","python_version":"3.12","os_libc":"alpine (musl)","variant":"pydantic-evals","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":null,"import_time_s":2.65,"mem_mb":33.2,"disk_size":"55.4M"},{"runtime":"python:3.12-alpine","python_version":"3.12","os_libc":"alpine (musl)","variant":"pydantic-evals","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":3.04,"mem_mb":33.5,"disk_size":"54.4M"},{"runtime":"python:3.12-slim","python_version":"3.12","os_libc":"slim (glibc)","variant":"openai","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":29.3,"import_time_s":7.61,"mem_mb":68.5,"disk_size":"372M"},{"runtime":"python:3.12-slim","python_version":"3.12","os_libc":"slim (glibc)","variant":"openai","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":8.1,"mem_mb":67.6,"disk_size":"365M"},{"runtime":"python:3.12-slim","python_version":"3.12","os_libc":"slim (glibc)","variant":"pydantic-evals","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":5.3,"import_time_s":2.63,"mem_mb":33.2,"disk_size":"56M"},{"runtime":"python:3.12-slim","python_version":"3.12","os_libc":"slim (glibc)","variant":"pydantic-evals","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":3.14,"mem_mb":33.5,"disk_size":"55M"},{"runtime":"python:3.13-alpine","python_version":"3.13","os_libc":"alpine (musl)","variant":"openai","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.13-alpine","python_version":"3.13","os_libc":"alpine (musl)","variant":"openai","exit_code":1,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.13-alpine","python_version":"3.13","os_libc":"alpine (musl)","variant":"pydantic-evals","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":null,"import_time_s":2.63,"mem_mb":33.6,"disk_size":"55.1M"},{"runtime":"python:3.13-alpine","python_version":"3.13","os_libc":"alpine (musl)","variant":"pydantic-evals","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":3.2,"mem_mb":33.9,"disk_size":"54.1M"},{"runtime":"python:3.13-slim","python_version":"3.13","os_libc":"slim (glibc)","variant":"openai","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":25.8,"import_time_s":7.16,"mem_mb":68.8,"disk_size":"371M"},{"runtime":"python:3.13-slim","python_version":"3.13","os_libc":"slim (glibc)","variant":"openai","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":7.59,"mem_mb":67.9,"disk_size":"364M"},{"runtime":"python:3.13-slim","python_version":"3.13","os_libc":"slim (glibc)","variant":"pydantic-evals","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":5.3,"import_time_s":2.44,"mem_mb":33.6,"disk_size":"55M"},{"runtime":"python:3.13-slim","python_version":"3.13","os_libc":"slim (glibc)","variant":"pydantic-evals","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":2.76,"mem_mb":33.9,"disk_size":"54M"},{"runtime":"python:3.9-alpine","python_version":"3.9","os_libc":"alpine (musl)","variant":"openai","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.9-alpine","python_version":"3.9","os_libc":"alpine (musl)","variant":"openai","exit_code":1,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.9-alpine","python_version":"3.9","os_libc":"alpine (musl)","variant":"pydantic-evals","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"noisy","install_time_s":null,"import_time_s":1.71,"mem_mb":28.4,"disk_size":"52.5M"},{"runtime":"python:3.9-alpine","python_version":"3.9","os_libc":"alpine (musl)","variant":"pydantic-evals","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":1.78,"mem_mb":28.3,"disk_size":"52.4M"},{"runtime":"python:3.9-slim","python_version":"3.9","os_libc":"slim (glibc)","variant":"openai","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":26,"import_time_s":1.61,"mem_mb":30.1,"disk_size":"253M"},{"runtime":"python:3.9-slim","python_version":"3.9","os_libc":"slim (glibc)","variant":"openai","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":1.56,"mem_mb":30.1,"disk_size":"248M"},{"runtime":"python:3.9-slim","python_version":"3.9","os_libc":"slim (glibc)","variant":"pydantic-evals","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"noisy","install_time_s":7.7,"import_time_s":1.47,"mem_mb":28.4,"disk_size":"53M"},{"runtime":"python:3.9-slim","python_version":"3.9","os_libc":"slim (glibc)","variant":"pydantic-evals","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":1.97,"mem_mb":28.3,"disk_size":"53M"}]}}