{"id":22446,"library":"tensorflow-data-validation","title":"TensorFlow Data Validation","description":"TensorFlow Data Validation (TFDV) is a library for exploring and validating machine learning data. It computes descriptive statistics, infers a schema, detects anomalies, and supports data drift/skew detection. Current version is 1.17.0 (requires Python 3.9+), with releases following TensorFlow's cadence.","status":"active","version":"1.17.0","language":"python","source_language":"en","source_url":"https://github.com/tensorflow/data-validation","tags":["data-validation","tensorflow","ml-pipeline","schema-inference","anomaly-detection"],"install":[{"cmd":"pip install tensorflow-data-validation","lang":"bash","label":"Install TFDV"}],"dependencies":[{"reason":"TFDV relies on TF for internal ops and data handling","package":"tensorflow","optional":false},{"reason":"Required for distributed processing (optional in some installs)","package":"apache-beam","optional":true}],"imports":[{"note":"tfdv is not a top-level module; must import tensorflow_data_validation","wrong":"import tfdv","symbol":"tensorflow_data_validation","correct":"import tensorflow_data_validation as tfdv"},{"note":"StatsOptions is used to customize statistics computation","symbol":"StatsOptions","correct":"from tensorflow_data_validation.utils.stats_options import StatsOptions"}],"quickstart":{"code":"import tensorflow_data_validation as tfdv\nimport pandas as pd\n\ndata = pd.DataFrame({\n    'feature1': [1, 2, 3, 4, 5],\n    'feature2': ['a', 'b', 'c', 'd', 'e']\n})\n\n# Generate statistics\nstats = tfdv.generate_statistics_from_dataframe(data)\n\n# Infer schema\nschema = tfdv.infer_schema(stats)\nprint(schema)\n\n# Validate new data\ntest_data = pd.DataFrame({\n    'feature1': [1, 2, 6],\n    'feature2': ['x', 'y', 'z']\n})\nanomalies = tfdv.validate_statistics(\n    tfdv.generate_statistics_from_dataframe(test_data),\n    schema\n)\nprint(anomalies)","lang":"python","description":"Compute statistics from a DataFrame, infer schema, and validate new data."},"warnings":[{"fix":"Use `generate_statistics_from_csv` or `generate_statistics_from_dataframe` instead of `generate_statistics`.","message":"TFDV 1.0+ changed the API for `generate_statistics_from_csv` and `generate_statistics_from_dataframe`. The old `tfdv.generate_statistics` is deprecated.","severity":"breaking","affected_versions":"<1.0"},{"fix":"Use `from tensorflow_data_validation.utils.display_util import display_stats`.","message":"`tfdv.visualize_statistics` is deprecated in favor of using `tfdv.utils.display_util.display_stats` for Jupyter visualization.","severity":"deprecated","affected_versions":">=1.10"},{"fix":"Install apache-beam (`pip install apache-beam`) and use `generate_statistics_from_csv` with `beam_pipeline_args`.","message":"TFDV statistics generation may be slow on large datasets; use Apache Beam for distributed processing.","severity":"gotcha","affected_versions":"all"},{"fix":"Consider using `tfdv.update_schema_with_stats` to relax schema constraints based on full statistics.","message":"Schema inference from small samples may produce overly strict constraints; use `tfdv.update_schema_with_stats` or manual tuning.","severity":"gotcha","affected_versions":"all"}],"env_vars":null,"last_verified":"2026-04-27T00:00:00.000Z","next_check":"2026-07-26T00:00:00.000Z","problems":[{"fix":"Use `import tensorflow_data_validation as tfdv`.","cause":"Importing the library incorrectly as 'tfdv' instead of 'tensorflow_data_validation'.","error":"ModuleNotFoundError: No module named 'tfdv'"},{"fix":"Use `tfdv.generate_statistics_from_dataframe(dataframe, stats_options=StatsOptions(...))` after importing from `tensorflow_data_validation.utils.stats_options`.","cause":"Passing `stats_options` as keyword argument incorrectly in older versions or with wrong signature.","error":"TypeError: generate_statistics_from_dataframe() got an unexpected keyword argument 'stats_options'"}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}