{"id":22341,"library":"scikit-surprise","title":"scikit-surprise","description":"scikit-surprise (Surprise) is a Python scikit for building and analyzing recommender systems. Version 1.1.4 is the current release. It supports prediction-based and neighborhood-based collaborative filtering, matrix factorization, and evaluation metrics. Releases are infrequent (last stable was 1.1.1 in 2020, then 1.1.3/1.1.4 in 2025).","status":"active","version":"1.1.4","language":"python","source_language":"en","source_url":"https://github.com/NicolasHug/Surprise","tags":["recommender-system","collaborative-filtering","matrix-factorization","machine-learning"],"install":[{"cmd":"pip install scikit-surprise","lang":"bash","label":"PyPI"},{"cmd":"conda install -c conda-forge scikit-surprise","lang":"bash","label":"Conda"}],"dependencies":[{"reason":"Required for numerical operations","package":"numpy","optional":false},{"reason":"Required for sparse matrix operations","package":"scipy","optional":false},{"reason":"Used for parallel execution","package":"joblib","optional":false}],"imports":[{"note":"Correct import path.","symbol":"Dataset","correct":"from surprise import Dataset"},{"note":"Used to parse ratings from file or dataframe.","symbol":"Reader","correct":"from surprise import Reader"},{"note":"Standard matrix factorization algorithm.","symbol":"SVD","correct":"from surprise import SVD"},{"note":"For RMSE, MAE, etc.","symbol":"accuracy","correct":"from surprise import accuracy"},{"note":"Common mistake: importing from surprise directly (no cross_validate at top level).","wrong":"from surprise.model_selection import cross_validate","symbol":"cross_validate","correct":"from surprise.model_selection import cross_validate"},{"note":"Same as above.","wrong":"from surprise.model_selection import GridSearchCV","symbol":"GridSearchCV","correct":"from surprise.model_selection import GridSearchCV"}],"quickstart":{"code":"from surprise import Dataset, Reader, SVD, accuracy\nfrom surprise.model_selection import train_test_split\n\n# Load the built-in movielens dataset\ndata = Dataset.load_builtin('ml-100k')\ntrainset, testset = train_test_split(data, test_size=0.25)\n\nalgo = SVD()\nalgo.fit(trainset)\npredictions = algo.test(testset)\nrmse = accuracy.rmse(predictions)\nprint(f\"RMSE: {rmse}\")","lang":"python","description":"Loads the Movielens 100k dataset, trains SVD, and evaluates RMSE."},"warnings":[{"fix":"Set env var SURPRISE_DATA_FOLDER to a writable directory.","message":"Dataset.load_builtin() downloads data to ~/.surprise_data by default. If the disk is full or permission denied, it raises an error. Ensure sufficient space or set SURPRISE_DATA_FOLDER environment variable.","severity":"gotcha","affected_versions":"all"},{"fix":"sim_options = {'name': 'msd', 'user_based': True}","message":"In version 1.1.0+, the default similarity measure in KNNBasic changed from 'msd' to 'cosine'. If you rely on old behavior, specify 'msd' explicitly.","severity":"breaking","affected_versions":">=1.1.0"},{"fix":"Always set rating_scale=(min_rating, max_rating) in Reader.","message":"When using custom datasets with Reader, the rating_scale must match the actual ratings. Mismatch leads to inaccurate predictions or errors.","severity":"gotcha","affected_versions":"all"},{"fix":"import joblib; joblib.dump(algo, 'model.pkl')","message":"The surprise.dump module is deprecated and may be removed in future versions. Use pickle or joblib directly on algorithm objects.","severity":"deprecated","affected_versions":">=1.1.4"},{"fix":"Use Dataset.load_from_df() with DataFrame columns: user, item, rating.","message":"SVD and other algorithms require the dataset to have user and item IDs as integers or strings. If IDs are not consecutive integers, the algorithm still works but may be less efficient.","severity":"gotcha","affected_versions":"all"}],"env_vars":null,"last_verified":"2026-04-27T00:00:00.000Z","next_check":"2026-07-26T00:00:00.000Z","problems":[{"fix":"pip install scikit-surprise. The import is 'import surprise' or 'from surprise import ...'.","cause":"Package not installed or installed under 'scikit-surprise' but imported as 'surprise'.","error":"ModuleNotFoundError: No module named 'surprise'"},{"fix":"reader = Reader(rating_scale=(1, 5))","cause":"Reader not initialized with rating_scale when using custom dataset.","error":"ValueError: `rating_scale` must be a tuple (low, high)."},{"fix":"from surprise.model_selection import cross_validate","cause":"cross_validate is in surprise.model_selection, not top-level surprise.","error":"AttributeError: module 'surprise' has no attribute 'cross_validate'"},{"fix":"Run Dataset.load_builtin('ml-100k') with internet; or set SURPRISE_DATA_FOLDER to an existing directory.","cause":"Built-in dataset not downloaded (network issue or missing folder).","error":"FileNotFoundError: [Errno 2] No such file or directory: '~/.surprise_data/ml-100k/...'"}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}