{"library":"sagemaker-data-insights","title":"SageMaker Data Insights","description":"The SageMaker Data Insights library (current version 0.4.0) is an open-source Python library designed by AWS to help users analyze and understand their data for various SageMaker workloads. It provides utilities for extracting insights from datasets used in SageMaker Labeling Jobs and other data processing tasks, helping identify potential data quality issues or patterns. Given its 0.x.x version, it maintains a relatively agile release cadence, with API changes possible between minor versions.","language":"python","status":"active","last_verified":"Fri May 15","install":{"commands":["pip install sagemaker-data-insights"],"cli":null},"imports":["from sagemaker_data_insights.labeling_job.data_insights import LabelingJobDataInsights","from sagemaker_data_insights.tabular_data_insights.data_insights import TabularDataInsights","from sagemaker_data_insights.labeling_job.data_insights_result import LabelingJobDataInsightsResult"],"auth":{"required":false,"env_vars":[]},"quickstart":{"code":"import os\nimport sagemaker\nimport boto3\nfrom sagemaker_data_insights.labeling_job.data_insights import LabelingJobDataInsights\n\n# Initialize SageMaker session using default boto3 credential chain\n# Ensure your AWS credentials are configured (e.g., via AWS CLI, environment variables)\n# The region can be specified if not configured globally.\nregion = os.environ.get('AWS_REGION', 'us-east-1')\nboto_session = boto3.Session(region_name=region)\nsagemaker_session = sagemaker.Session(boto_session=boto_session)\n\n# Define placeholders for your specific S3 input data and IAM role\n# Replace with actual S3 URI to your labeling job manifest file\ninput_s3_uri = os.environ.get(\n    'SAGEMAKER_DATA_INSIGHTS_INPUT_URI',\n    's3://your-bucket-name/path/to/manifest-file/output.manifest'\n)\n# Replace with the ARN of an IAM role with S3 read/write and SageMaker permissions\nrole_arn = os.environ.get(\n    'SAGEMAKER_EXECUTION_ROLE_ARN',\n    'arn:aws:iam::123456789012:role/YourSageMakerExecutionRole' # Placeholder, replace with actual role\n)\n\nprint(f\"Analyzing data from: {input_s3_uri}\")\nprint(f\"Using SageMaker execution role: {role_arn}\")\n\ntry:\n    # Instantiate the insights calculator for a Labeling Job\n    insights_calculator = LabelingJobDataInsights(\n        sagemaker_session=sagemaker_session,\n        s3_input_uri=input_s3_uri,\n        role_arn=role_arn,\n        number_of_samples=10 # Use a small number of samples for quick demo\n    )\n\n    # Get insights (this will perform data sampling and analysis)\n    # NOTE: This call requires valid S3 URI, role, and data.\n    # It might take some time to run and will likely fail if placeholders are not replaced.\n    print(\"Attempting to get insights (this may take a moment)...\")\n    insights_result = insights_calculator.get_insights()\n\n    print(\"\\n--- Data Insights Summary ---\")\n    print(f\"Total entries analyzed: {insights_result.number_of_samples}\")\n    if insights_result.annotation_label_distribution:\n        print(\"Annotation Label Distribution:\")\n        for label, count in insights_result.annotation_label_distribution.items():\n            print(f\"  - {label}: {count}\")\n    else:\n        print(\"No annotation label distribution found (check input data/sampling).\")\n\nexcept Exception as e:\n    print(f\"\\nAn error occurred during insights calculation: {e}\")\n    print(\"Please ensure your AWS credentials, S3 input URI, and IAM role are correctly configured and point to valid data.\")\n","lang":"python","description":"Initializes a SageMaker session, instantiates `LabelingJobDataInsights` with user-provided S3 URI and IAM role, and demonstrates how to retrieve and print a basic summary of the data insights. Requires configured AWS credentials and a valid S3 path to a labeling job manifest file.","tag":null,"tag_description":null,"last_tested":null,"results":[]},"compatibility":{"tag":null,"tag_description":null,"last_tested":"2026-05-15","installed_version":"0.4.0","pypi_latest":"0.4.0","is_stale":false,"summary":{"python_range":"3.10–3.9","success_rate":50,"avg_install_s":14.8,"avg_import_s":null,"wheel_type":"sdist"},"results":[{"runtime":"python:3.10-alpine","python_version":"3.10","os_libc":"alpine (musl)","variant":"sagemaker-data-insights","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.10-slim","python_version":"3.10","os_libc":"slim (glibc)","variant":"sagemaker-data-insights","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":14.2,"import_time_s":null,"mem_mb":null,"disk_size":"345M"},{"runtime":"python:3.11-alpine","python_version":"3.11","os_libc":"alpine (musl)","variant":"sagemaker-data-insights","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.11-slim","python_version":"3.11","os_libc":"slim (glibc)","variant":"sagemaker-data-insights","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":13.6,"import_time_s":null,"mem_mb":null,"disk_size":"368M"},{"runtime":"python:3.12-alpine","python_version":"3.12","os_libc":"alpine (musl)","variant":"sagemaker-data-insights","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.12-slim","python_version":"3.12","os_libc":"slim (glibc)","variant":"sagemaker-data-insights","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":15.2,"import_time_s":null,"mem_mb":null,"disk_size":"347M"},{"runtime":"python:3.13-alpine","python_version":"3.13","os_libc":"alpine (musl)","variant":"sagemaker-data-insights","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.13-slim","python_version":"3.13","os_libc":"slim (glibc)","variant":"sagemaker-data-insights","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":15,"import_time_s":null,"mem_mb":null,"disk_size":"345M"},{"runtime":"python:3.9-alpine","python_version":"3.9","os_libc":"alpine (musl)","variant":"sagemaker-data-insights","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.9-slim","python_version":"3.9","os_libc":"slim (glibc)","variant":"sagemaker-data-insights","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":16.1,"import_time_s":null,"mem_mb":null,"disk_size":"360M"}]}}