{"id":24560,"library":"scikit-datasets","title":"scikit-datasets","description":"scikit-datasets provides a collection of ready-to-use datasets compatible with scikit-learn, wrapping common benchmarks like MNIST, CIFAR, and more. Version 0.2.5, updated irregularly.","status":"active","version":"0.2.5","language":"python","source_language":"en","source_url":"https://github.com/jcrudy/scikit-datasets","tags":["datasets","scikit-learn","machine-learning","benchmarks"],"install":[{"cmd":"pip install scikit-datasets","lang":"bash","label":"PyPI"}],"dependencies":[{"reason":"Core dependency for dataset compatibility and return types (Bunch objects).","package":"scikit-learn","optional":false},{"reason":"Used internally for data arrays.","package":"numpy","optional":false},{"reason":"Optional for DataFrame conversion.","package":"pandas","optional":true}],"imports":[{"note":"Module package is 'skdata' not 'scikit_datasets'.","wrong":"from scikit_datasets import load_mnist","symbol":"load_mnist","correct":"from skdata import load_mnist"},{"note":"Functions are directly under skdata, not nested.","wrong":"import skdata.datasets.load_cifar10","symbol":"load_cifar10","correct":"from skdata import load_cifar10"}],"quickstart":{"code":"from skdata import load_mnist\nfrom sklearn.svm import SVC\n\ndata = load_mnist()\nX, y = data.data, data.target\nclf = SVC().fit(X, y)\nprint(clf.score(X, y))","lang":"python","description":"Loads MNIST and fits an SVM classifier."},"warnings":[{"fix":"Change import to 'from skdata import ...'.","message":"In version 0.2.5, the package name changed from 'scikit_datasets' to 'skdata' for imports. Code using 'from scikit_datasets import ...' will break.","severity":"breaking","affected_versions":"<0.2.5 -> 0.2.5"},{"fix":"Use 'as_frame=True' instead of 'as_frames=True'.","message":"The 'as_frames' parameter for returning pandas DataFrames is deprecated in favor of 'as_frame=True', which will become the default in future versions.","severity":"deprecated","affected_versions":"0.2.x"},{"fix":"Ensure internet access for first-time download, or cache the data directory.","message":"Dataset downloads can be large (e.g., MNIST ~15MB, CIFAR-10 ~170MB) and are cached in ~/skdata_data/. Not all datasets are bundled; some require an internet connection on first load.","severity":"gotcha","affected_versions":"all"}],"env_vars":null,"last_verified":"2026-05-01T00:00:00.000Z","next_check":"2026-07-30T00:00:00.000Z","problems":[{"fix":"Use 'from skdata import load_...' or downgrade to version 0.1.x (not recommended).","cause":"The import package was renamed to 'skdata' in version 0.2.5, but older tutorials use 'scikit_datasets'.","error":"ModuleNotFoundError: No module named 'scikit_datasets'"},{"fix":"Use 'from skdata import load_mnist' directly, not 'skdata.datasets.load_mnist'.","cause":"Users try to access datasets via a submodule that does not exist; functions are directly on the skdata module.","error":"AttributeError: module 'skdata' has no attribute 'datasets'"}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}