{"id":22330,"library":"sagemaker-feature-store-pyspark","title":"Amazon SageMaker Feature Store PySpark Bindings","description":"PySpark bindings for Amazon SageMaker Feature Store, enabling large-scale feature engineering and serving with Spark DataFrames. Current version 1.2.0, released monthly.","status":"active","version":"1.2.0","language":"python","source_language":"en","source_url":"https://github.com/aws/sagemaker-feature-store-pyspark","tags":["amazon-sagemaker","feature-store","pyspark","machine-learning","aws"],"install":[{"cmd":"pip install sagemaker-feature-store-pyspark","lang":"bash","label":"Install from PyPI"}],"dependencies":[{"reason":"Runtime dependency for Spark operations","package":"pyspark","optional":false},{"reason":"AWS SDK for SageMaker","package":"sagemaker","optional":false}],"imports":[{"note":"Common mistake: using the package name directly instead of the full SDK path","wrong":"from sagemaker_feature_store_pyspark import FeatureStoreManager","symbol":"FeatureStoreManager","correct":"from sagemaker.feature_store.feature_store import FeatureStoreManager"},{"note":"Incorrect subpackage path","wrong":"from pyspark.feature_store import PySparkFeatureStore","symbol":"PySparkFeatureStore","correct":"from sagemaker_feature_store_pyspark import PySparkFeatureStore"}],"quickstart":{"code":"from pyspark.sql import SparkSession\nfrom sagemaker.feature_store.feature_store import FeatureStoreManager\n\nspark = SparkSession.builder.getOrCreate()\nfs = FeatureStoreManager()\n\ndf = spark.createDataFrame([(1, 'a'), (2, 'b')], ['id', 'value'])\nrecord_id = 'id'\nfeature_group_name = 'my-feature-group'\nfs.ingest(df, feature_group_name, record_identifier_name=record_id)","lang":"python","description":"Creates a Spark DataFrame and ingests it into a SageMaker Feature Group using PySpark bindings."},"warnings":[{"fix":"Use SparkSession.builder.config('spark.jars.packages', 'org.apache.hadoop:hadoop-aws:3.3.4').getOrCreate() or provide JARs via --jars.","message":"PySpark session must be configured with the correct Hadoop AWS JARs for S3 access; missing JARs cause silent failures on ingest.","severity":"gotcha","affected_versions":"all"},{"fix":"Ensure the record identifier column has no nulls; use df.na.drop(subset=['id']) before ingest.","message":"Ingesting DataFrames with columns containing null values in the record identifier column will fail with a Spark exception.","severity":"gotcha","affected_versions":">=1.0.0"},{"fix":"Update imports to use the new path. Old code will raise ImportError.","message":"In version 1.0.0, the module was restructured: `from sagemaker_feature_store_pyspark import FeatureStore` changed to `from sagemaker.feature_store.feature_store import FeatureStoreManager`.","severity":"breaking","affected_versions":"<1.0.0 to >=1.0.0"}],"env_vars":null,"last_verified":"2026-04-27T00:00:00.000Z","next_check":"2026-07-26T00:00:00.000Z","problems":[{"fix":"Use 'from sagemaker.feature_store.feature_store import FeatureStoreManager' instead.","cause":"The package name differs from import path; the correct import uses submodules under 'sagemaker'.","error":"ImportError: No module named 'sagemaker_feature_store_pyspark'"},{"fix":"Add hadoop-aws JAR via spark.jars.packages or --jars.","cause":"Missing Hadoop AWS JARs in Spark classpath.","error":"Py4JJavaError: An error occurred while calling o123.ingest. : org.apache.spark.SparkException: Job aborted due to stage failure"}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}