{"id":27296,"library":"pytabkit","title":"PyTabKit","description":"PyTabKit provides ML models and a benchmark for tabular data classification and regression. Current version is 1.7.3, supporting Python >=3.9, with PyTorch-based models like RealMLP, xRFM, TabM, and TabICL wrappers, plus scikit-learn-style interfaces, HPO, and ensembling. Release cadence is irregular, with several updates in 2024-2025.","status":"active","version":"1.7.3","language":"python","source_language":"en","source_url":"https://github.com/dholzmueller/pytabkit","tags":["tabular-data","machine-learning","classification","regression","realmlp","pytorch"],"install":[{"cmd":"pip install pytabkit","lang":"bash","label":"Install base package (RealMLP only)"},{"cmd":"pip install pytabkit[extra]","lang":"bash","label":"Install with extra models (GBDT, TabM, CatBoost, etc.)"}],"dependencies":[{"reason":"Core dependency for all neural network models.","package":"torch","optional":false},{"reason":"Used for compatibility (>=1.8).","package":"scikit-learn","optional":false},{"reason":"Data handling.","package":"pandas","optional":false},{"reason":"Numerical operations.","package":"numpy","optional":false}],"imports":[{"note":"Top-level import does not exist; must import from the sklearn submodule.","wrong":"from pytabkit import RealMLP_Tabular_Classifier","symbol":"RealMLP_Tabular_Classifier","correct":"from pytabkit.models.sklearn.realmlp import RealMLP_Tabular_Classifier"},{"note":"Models live under pytabkit.models.sklearn, not directly under models.","wrong":"from pytabkit.models.realmlp import RealMLP_Tabular_Regressor","symbol":"RealMLP_Tabular_Regressor","correct":"from pytabkit.models.sklearn.realmlp import RealMLP_Tabular_Regressor"},{"note":"TabM is in its own submodule.","wrong":"from pytabkit import TabM_Classifier","symbol":"TabM_Classifier","correct":"from pytabkit.models.sklearn.tabm import TabM_Classifier"}],"quickstart":{"code":"from pytabkit.models.sklearn.realmlp import RealMLP_Tabular_Classifier\nfrom sklearn.datasets import make_classification\nfrom sklearn.model_selection import train_test_split\n\nX, y = make_classification(n_samples=100, n_features=20, random_state=0)\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n\nmodel = RealMLP_Tabular_Classifier(\n    time_limit_s=30,  # limit training time\n    n_refit=1,\n    device='cpu'\n)\nmodel.fit(X_train, y_train)\ny_pred = model.predict(X_test)\nprint(f'Accuracy: {sum(y_pred == y_test) / len(y_test):.3f}')","lang":"python","description":"Quick example: train a RealMLP classifier on synthetic data with a time limit."},"warnings":[{"fix":"Replace 'from pytabkit.models.sklearn.realmlp import RealMLP_Ensemble_Classifier' with 'from pytabkit.models.sklearn.realmlp import RealMLP_Tabular_Classifier' and add 'use_caruana_ensembling=True' to the constructor.","message":"In v1.5.0, RealMLP_Ensemble_Classifier and RealMLP_Ensemble_Regressor were removed. Use RealMLP_Tabular_Classifier with use_caruana_ensembling=True instead.","severity":"breaking","affected_versions":">=1.5.0"},{"fix":"To revert to logit averaging, set 'ens_av_before_softmax=True' in the model constructor.","message":"In v1.6.1, default classification ensembling changed from averaging logits to averaging probabilities. This may change results.","severity":"breaking","affected_versions":">=1.6.1"},{"fix":"Install the extra dependencies: 'pip install pytabkit[extra]'.","message":"When using extra models (GBDT, CatBoost, TabM), ensure you install pytabkit with the 'extra' option: pip install pytabkit[extra]. The base install only includes RealMLP.","severity":"gotcha","affected_versions":">=1.4.0"}],"env_vars":null,"last_verified":"2026-05-01T00:00:00.000Z","next_check":"2026-07-30T00:00:00.000Z","problems":[{"fix":"Run 'pip install pytabkit' for base version, or 'pip install pytabkit[extra]' for full functionality.","cause":"Package not installed or installed in wrong environment.","error":"ModuleNotFoundError: No module named 'pytabkit'"},{"fix":"Use 'RealMLP_Tabular_Classifier' with 'use_caruana_ensembling=True'.","cause":"RealMLP_Ensemble_* classes removed in v1.5.0.","error":"ImportError: cannot import name 'RealMLP_Ensemble_Classifier' from 'pytabkit.models.sklearn.realmlp'"},{"fix":"Ensure you pass NumPy arrays or DataFrames as appropriate. Use .values if needed.","cause":"CuML's RandomForestClassifier expects differently formatted data? Actually this may come from pandas usage; but often occurs when passing DataFrame where array expected.","error":"ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all()."}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}