{"library":"webdataset","type":"library","category":null,"description":"WebDataset is a high-performance Python-based I/O system for deep learning and data processing, current version 1.0.2. It implements the PyTorch IterableDataset interface, enabling efficient streaming access to datasets stored in POSIX tar archives. It supports sharding for large datasets and is compatible with PyTorch's DataLoader, facilitating scalable and latency-insensitive data pipelines for various data types including images, audio, and video. The library is actively maintained with frequent releases adding new features and bug fixes.","language":"python","status":"active","version":"1.0.2","tags":["deep learning","pytorch","data loading","I/O","sharding","tar archives","machine learning","distributed training","streaming"],"last_verified":"Thu May 21","install":[{"cmd":"pip install webdataset","imports":["import webdataset as wds","dataset = wds.WebDataset(url)"]},{"cmd":"pip install git+https://github.com/webdataset/webdataset.git","imports":[]}],"homepage":null,"github":"http://github.com/webdataset/webdataset","docs":null,"changelog":null,"pypi":"https://pypi.org/project/webdataset/","npm":null,"openapi_spec":null,"status_page":null,"smithery":null,"compatibility":{"summary":{"python_range":"3.10–3.9","success_rate":50,"avg_install_s":3.8,"avg_import_s":0.54,"wheel_type":"wheel"},"url":"https://checklist.day/v1/registry/webdataset/compatibility"}}