{"id":173,"library":"ragas","title":"Ragas","description":"RAG evaluation framework — measures faithfulness, answer relevancy, context precision/recall and more. Current version: 0.4.3 (Mar 2026). Still pre-1.0. v0.2 was a major breaking change from v0.1: metrics are now class instances initialized with LLM, evaluate() takes EvaluationDataset not HuggingFace Dataset, answer_relevancy renamed to ResponseRelevancy, fields renamed (question→user_input, answer→response, contexts→retrieved_contexts). Legacy API still works but deprecated — will be removed in v1.0.","status":"active","version":"0.4.3","language":"python","source_language":"en","source_url":"https://github.com/explodinggradients/ragas","tags":["ragas","rag","evaluation","llm","faithfulness","python","testing"],"install":[{"cmd":"pip install ragas","lang":"bash","label":"Python"}],"dependencies":[{"reason":"Required if using LangChain LLM wrapper for metrics. Optional — can also use ragas.llms.llm_factory with openai directly.","package":"langchain-openai","optional":true},{"reason":"Required for LLM-as-judge metrics (Faithfulness, ResponseRelevancy, etc.). Metrics call LLM API during evaluation.","package":"openai","optional":false}],"imports":[{"note":"v0.1 field names: question/answer/contexts. v0.2+ field names: user_input/response/retrieved_contexts. answer_relevancy renamed to ResponseRelevancy. Metrics are now class instances with llm= argument.","wrong":"# v0.1 style — deprecated, removed in v1.0\nfrom datasets import Dataset\nfrom ragas import evaluate\nfrom ragas.metrics import faithfulness, answer_relevancy\n\ndata = {\n    'question': ['When was the first Super Bowl?'],\n    'answer': ['Jan 15, 1967'],\n    'contexts': [['The game was played on January 15, 1967.']]\n}\nds = Dataset.from_dict(data)\nresult = evaluate(ds, metrics=[faithfulness, answer_relevancy])","symbol":"evaluate (v0.2+ style)","correct":"from ragas import EvaluationDataset, SingleTurnSample, evaluate\nfrom ragas.metrics import Faithfulness, ResponseRelevancy\nfrom ragas.llms import LangchainLLMWrapper\nfrom langchain_openai import ChatOpenAI\n\nllm = LangchainLLMWrapper(ChatOpenAI(model='gpt-4o-mini'))\n\nsamples = [\n    SingleTurnSample(\n        user_input='When was the first Super Bowl?',\n        response='The first Super Bowl was held on Jan 15, 1967.',\n        retrieved_contexts=[\n            'The First AFL-NFL World Championship Game was played on January 15, 1967.'\n        ]\n    )\n]\n\ndataset = EvaluationDataset(samples=samples)\n\nresult = evaluate(\n    dataset,\n    metrics=[\n        Faithfulness(llm=llm),\n        ResponseRelevancy(llm=llm)\n    ]\n)\nprint(result)"},{"note":"Import Faithfulness (class) not faithfulness (deprecated singleton). Initialize with llm= argument. Use single_turn_ascore() for single samples.","wrong":"from ragas.metrics import faithfulness  # lowercase — deprecated singleton\nfrom ragas import evaluate\n# Using deprecated singleton instance","symbol":"single metric scoring","correct":"from ragas import SingleTurnSample\nfrom ragas.metrics import Faithfulness\nfrom ragas.llms import LangchainLLMWrapper\nfrom langchain_openai import ChatOpenAI\nimport asyncio\n\nllm = LangchainLLMWrapper(ChatOpenAI(model='gpt-4o-mini'))\nscorer = Faithfulness(llm=llm)\n\nsample = SingleTurnSample(\n    user_input='What year was Python created?',\n    response='Python was created in 1991.',\n    retrieved_contexts=['Python was first released in 1991 by Guido van Rossum.']\n)\n\n# Async score\nscore = asyncio.run(scorer.single_turn_ascore(sample))\nprint(score)"}],"quickstart":{"code":"# pip install ragas langchain-openai\nfrom ragas import EvaluationDataset, SingleTurnSample, evaluate\nfrom ragas.metrics import Faithfulness, ResponseRelevancy, LLMContextRecall\nfrom ragas.llms import LangchainLLMWrapper\nfrom langchain_openai import ChatOpenAI\nimport os\n\nos.environ['OPENAI_API_KEY'] = 'your-key'\n\nllm = LangchainLLMWrapper(ChatOpenAI(model='gpt-4o-mini'))\n\nsamples = [\n    SingleTurnSample(\n        user_input='What is the capital of France?',\n        response='The capital of France is Paris.',\n        retrieved_contexts=['Paris is the capital and most populous city of France.'],\n        reference='Paris'  # ground truth — needed for recall\n    )\n]\n\ndataset = EvaluationDataset(samples=samples)\n\nresult = evaluate(\n    dataset,\n    metrics=[\n        Faithfulness(llm=llm),\n        ResponseRelevancy(llm=llm),\n        LLMContextRecall(llm=llm)\n    ]\n)\nprint(result)\n# {'faithfulness': 1.0, 'response_relevancy': 0.97, 'context_recall': 1.0}","lang":"python","description":"Ragas v0.2+ RAG evaluation with EvaluationDataset and class-based metrics."},"warnings":[{"fix":"SingleTurnSample(user_input=..., response=..., retrieved_contexts=[...])","message":"v0.2 renamed all field names: question→user_input, answer→response, contexts→retrieved_contexts. Using old field names silently produces empty/wrong evaluations.","severity":"breaking","affected_versions":">= 0.2"},{"fix":"from ragas.metrics import ResponseRelevancy; ResponseRelevancy(llm=llm)","message":"answer_relevancy metric renamed to ResponseRelevancy in v0.2. 'from ragas.metrics import answer_relevancy' still works but is deprecated and will be removed in v1.0.","severity":"breaking","affected_versions":">= 0.2"},{"fix":"eval_dataset = EvaluationDataset.from_hf_dataset(hf_dataset) then evaluate(eval_dataset, ...)","message":"evaluate() now takes EvaluationDataset not a HuggingFace Dataset. Passing HuggingFace Dataset directly raises TypeError in v0.2+.","severity":"breaking","affected_versions":">= 0.2"},{"fix":"Faithfulness(llm=llm) not faithfulness. Pass LLM explicitly to each metric.","message":"Metrics must be initialized as class instances with llm= argument. Old pattern of using lowercase singleton (faithfulness, answer_relevancy) deprecated — will be removed in v1.0.","severity":"breaking","affected_versions":">= 0.2"},{"fix":"from ragas.llms import LangchainLLMWrapper; llm = LangchainLLMWrapper(ChatOpenAI(...))","message":"All LLM-judge metrics require an async LLM. Ragas uses async internally — synchronous LLM wrappers will cause errors. Use LangchainLLMWrapper or ragas.llms.llm_factory.","severity":"gotcha","affected_versions":">= 0.2"},{"fix":"Include reference='ground truth answer' in SingleTurnSample for recall metrics.","message":"Context recall (LLMContextRecall) requires a reference (ground truth) field. Running it without reference gives a score of 0 or error.","severity":"gotcha","affected_versions":"all"},{"fix":"export RAGAS_DO_NOT_TRACK=true","message":"Ragas collects anonymized telemetry by default. Set RAGAS_DO_NOT_TRACK=true to opt out.","severity":"gotcha","affected_versions":"all"},{"fix":"Upgrade your Python environment to 3.10 or newer. If staying on Python 3.9 is strictly required, you might be able to resolve this by installing the `eval_type_backport` package (`pip install eval_type_backport`) if the library's usage allows for it.","message":"When using `ragas` (or its dependencies like `instructor`) with Python 3.9, a `TypeError: unsupported operand type(s) for |: 'type' and 'type'` may occur during module import. This is caused by dependencies utilizing the Python 3.10+ type union syntax (`TypeA | TypeB`) without the necessary `from __future__ import annotations` or `eval_type_backport` package in a Python 3.9 environment.","severity":"breaking","affected_versions":"ragas >= 0.2, Python < 3.10"},{"fix":"Ensure build essentials are installed in your Dockerfile (e.g., for Alpine: 'apk add build-base').","message":"Building libraries with C/C++/Cython extensions (like scikit-network) may fail on minimal Docker images like 'alpine' due to missing build essential tools (e.g., g++, make).","severity":"breaking","affected_versions":"all"}],"env_vars":null,"last_verified":"2026-05-12T09:36:14.325Z","next_check":"2026-06-25T00:00:00.000Z","problems":[{"fix":"Update the keys in your dataset dictionary to the new names before creating the `EvaluationDataset`.\n```python\ndata_samples = {\n    'user_input': ['What is RAG?'],\n    'response': ['RAG is Retrieval-Augmented Generation.'],\n    'retrieved_contexts': [['RAG combines retrieval and generation models.']],\n    'ground_truth': ['RAG is a technique to improve LLM outputs.']\n}\n# Then proceed to create EvaluationDataset\n# dataset = EvaluationDataset(data_samples)\n```","cause":"In ragas v0.2+, the required field names for the evaluation dataset were changed. 'question' became 'user_input', 'answer' became 'response', and 'contexts' became 'retrieved_contexts'.","error":"KeyError: 'question'"},{"fix":"Instantiate the metric class by providing an LLM (often a `RagasLLM` wrapper around a Langchain LLM) during initialization.\n```python\nfrom ragas.metrics import Faithfulness\nfrom ragas.llms import RagasLLM\nfrom langchain_openai import ChatOpenAI\n\nopenai_model = ChatOpenAI(model=\"gpt-3.5-turbo\")\nragas_llm = RagasLLM(llm=openai_model)\n\n# Initialize the metric as a class instance with the LLM\nfaithfulness_metric = Faithfulness(llm=ragas_llm)\n\n# Then pass the instance to evaluate\n# result = evaluate(dataset, metrics=[faithfulness_metric])\n```","cause":"In ragas v0.2+, metrics are no longer simple functions but class instances that must be initialized, typically requiring an LLM argument.","error":"TypeError: Faithfulness.__init__ missing 1 required positional argument: 'llm'"},{"fix":"Use `ResponseRelevancy` instead of `answer_relevancy` and import it as a class.\n```python\nfrom ragas.metrics import ResponseRelevancy\n# Then instantiate it with an LLM as shown above:\n# response_relevancy_metric = ResponseRelevancy(llm=ragas_llm)\n```","cause":"The `answer_relevancy` metric was renamed to `ResponseRelevancy` in ragas v0.2+.","error":"NameError: name 'answer_relevancy' is not defined"},{"fix":"Install `ragas` using pip.\n```bash\npip install ragas\n# If you need specific integrations like OpenAI:\npip install ragas[openai]\n```","cause":"The `ragas` library is not installed in your current Python environment, or the environment is not activated.","error":"ModuleNotFoundError: No module named 'ragas'"}],"ecosystem":"pypi","meta_description":null,"install_score":28,"install_tag":"stale","quickstart_score":0,"quickstart_tag":"stale","pypi_latest":null,"install_checks":{"last_tested":"2026-05-12","tag":"stale","tag_description":"widespread failures or data too old to trust","results":[{"runtime":"python:3.10-alpine","python_version":"3.10","os_libc":"alpine (musl)","variant":"default","exit_code":1,"wheel_type":null,"failure_reason":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.10-slim","python_version":"3.10","os_libc":"slim (glibc)","variant":"default","exit_code":0,"wheel_type":null,"failure_reason":null,"install_time_s":null,"import_time_s":5.82,"mem_mb":94.7,"disk_size":"704M"},{"runtime":"python:3.11-alpine","python_version":"3.11","os_libc":"alpine (musl)","variant":"default","exit_code":1,"wheel_type":null,"failure_reason":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.11-slim","python_version":"3.11","os_libc":"slim (glibc)","variant":"default","exit_code":0,"wheel_type":null,"failure_reason":null,"install_time_s":null,"import_time_s":8,"mem_mb":104.9,"disk_size":"751M"},{"runtime":"python:3.12-alpine","python_version":"3.12","os_libc":"alpine (musl)","variant":"default","exit_code":1,"wheel_type":null,"failure_reason":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.12-slim","python_version":"3.12","os_libc":"slim (glibc)","variant":"default","exit_code":0,"wheel_type":null,"failure_reason":null,"install_time_s":null,"import_time_s":8.54,"mem_mb":102.5,"disk_size":"727M"},{"runtime":"python:3.13-alpine","python_version":"3.13","os_libc":"alpine (musl)","variant":"default","exit_code":1,"wheel_type":null,"failure_reason":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.13-slim","python_version":"3.13","os_libc":"slim (glibc)","variant":"default","exit_code":0,"wheel_type":null,"failure_reason":null,"install_time_s":null,"import_time_s":8.16,"mem_mb":104.9,"disk_size":"725M"},{"runtime":"python:3.9-alpine","python_version":"3.9","os_libc":"alpine (musl)","variant":"default","exit_code":1,"wheel_type":null,"failure_reason":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.9-slim","python_version":"3.9","os_libc":"slim (glibc)","variant":"default","exit_code":1,"wheel_type":null,"failure_reason":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null}]},"quickstart_checks":{"last_tested":"2026-04-23","tag":"stale","tag_description":"widespread failures or data too old to trust","results":[{"runtime":"python:3.10-alpine","exit_code":-1},{"runtime":"python:3.10-slim","exit_code":-1},{"runtime":"python:3.11-alpine","exit_code":-1},{"runtime":"python:3.11-slim","exit_code":-1},{"runtime":"python:3.12-alpine","exit_code":-1},{"runtime":"python:3.12-slim","exit_code":0},{"runtime":"python:3.13-alpine","exit_code":-1},{"runtime":"python:3.13-slim","exit_code":-1},{"runtime":"python:3.9-alpine","exit_code":-1},{"runtime":"python:3.9-slim","exit_code":-1}]}}