{"id":24412,"library":"pyTDC","title":"Therapeutics Data Commons (TDC)","description":"Therapeutics Data Commons (TDC) is a unified, open-source framework for machine learning in therapeutics. It provides standardized datasets, benchmarks, and tools for tasks like drug-target interaction, ADMET prediction, and clinical trial outcome prediction. Current version 1.1.15, with frequent updates.","status":"active","version":"1.1.15","language":"python","source_language":"en","source_url":"https://github.com/mims-harvard/TDC","tags":["therapeutics","drug-discovery","machine-learning","datasets","benchmarks"],"install":[{"cmd":"pip install PyTDC","lang":"bash","label":"Install from PyPI"}],"dependencies":[],"imports":[{"note":"Main entry point for accessing datasets.","wrong":"","symbol":"TDC","correct":"from tdc import TDC"},{"note":"Older imports from top-level tdc may not work; use submodule.","wrong":"from tdc import SinglePredDataset","symbol":"SinglePredDataset","correct":"from tdc.single_pred import SinglePredDataset"},{"note":"Utility for loading local files.","wrong":"","symbol":"DataLoader","correct":"from tdc.utils import DataLoader"}],"quickstart":{"code":"from tdc.single_pred import ADME\ndata = ADME(name = 'Caco2_Wang')\nsplit = data.get_split(method = 'random', seed = 42)\ntrain, valid, test = split['train'], split['valid'], split['test']\nprint(train.head())","lang":"python","description":"Load Caco2 permeability dataset and split into train/valid/test."},"warnings":[{"fix":"Upgrade Python to 3.8 or higher.","message":"Python 3.7 support dropped after version 0.9.6. Use Python 3.8+.","severity":"breaking","affected_versions":">=1.0.0"},{"fix":"Set environment variable TDC_CACHE_DIR to a writable directory, or use data.DataLoader for custom files.","message":"Data download may fail behind corporate firewall or with slow network. TDC downloads from remote servers.","severity":"gotcha","affected_versions":"all"},{"fix":"Always provide seed parameter to get_split(method='random', seed=42).","message":"The split method 'random' produces same splits across runs without seed? Actually you must pass seed explicitly for reproducibility.","severity":"gotcha","affected_versions":"all"}],"env_vars":null,"last_verified":"2026-05-01T00:00:00.000Z","next_check":"2026-07-30T00:00:00.000Z","problems":[{"fix":"Run: pip install PyTDC","cause":"pyTDC not installed or installed in wrong environment.","error":"ModuleNotFoundError: No module named 'tdc'"},{"fix":"Use: from tdc.single_pred import ADME","cause":"Attempting to import from top-level tdc incorrectly.","error":"AttributeError: module 'tdc' has no attribute 'single_pred'"},{"fix":"Check internet connection or set TDC_CACHE_DIR to a local path. Alternatively, use a local data file with DataLoader.","cause":"Network issue when downloading datasets.","error":"urllib.error.URLError: <urlopen error [Errno 111] Connection refused>"}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}