{"library":"datasets","type":"library","category":null,"description":"HuggingFace library for loading, processing, and sharing datasets for ML. Provides load_dataset() for one-line access to 100k+ public datasets on the Hub, plus local file loading (CSV, JSON, Parquet, Arrow, audio, image, etc.). Built on Apache Arrow for memory-efficient, zero-copy data access. Package name on PyPI is 'datasets' (not 'huggingface-datasets'). Import name is also 'datasets'. CRITICAL: datasets 4.0 (July 2025) removed dataset loading scripts and trust_remote_code entirely. Many older community datasets relying on .py loading scripts now fail with datasets>=4.","language":"python","status":"active","version":"4.6.0","tags":["datasets","huggingface","load-dataset","arrow","parquet","nlp","data-loading","streaming","preprocessing","rag"],"last_verified":"Tue Jun 09","install":[{"cmd":"pip install datasets","imports":["from datasets import load_dataset","from datasets import Dataset, DatasetDict","from datasets import Audio, Image"]},{"cmd":"pip install datasets[audio]","imports":[]},{"cmd":"pip install datasets[vision]","imports":[]},{"cmd":"pip install datasets[torch]","imports":[]},{"cmd":"pip install 'datasets<4'","imports":[]}],"homepage":"https://huggingface.co/docs/datasets","github":"https://github.com/huggingface/datasets","docs":null,"changelog":null,"pypi":"https://pypi.org/project/datasets/","npm":null,"openapi_spec":null,"status_page":null,"smithery":null,"compatibility":{"summary":{"python_range":"3.10–3.9","success_rate":68,"avg_install_s":39.3,"avg_import_s":5.43,"wheel_type":"wheel"},"url":"https://checklist.day/v1/registry/datasets/compatibility"}}