{"id":27273,"library":"pydantic-spark","title":"pydantic-spark","description":"Converts Pydantic models to PySpark schemas. Current version 1.0.1 supports Pydantic v2. Release cadence is irregular. Designed for data engineering pipelines where Pydantic models define data contracts and Spark schemas must be inferred.","status":"active","version":"1.0.1","language":"python","source_language":"en","source_url":"https://github.com/godatadriven/pydantic-spark","tags":["pydantic","spark","schema","pyspark","data-engineering"],"install":[{"cmd":"pip install pydantic-spark","lang":"bash","label":"Install from PyPI"}],"dependencies":[{"reason":"Core dependency for model definitions","package":"pydantic","optional":false},{"reason":"Target Spark environment","package":"pyspark","optional":true}],"imports":[{"note":"Correct import path is from pydantic_spark directly","wrong":"from pydantic_spark.core import to_spark_schema","symbol":"to_spark_schema","correct":"from pydantic_spark import to_spark_schema"},{"note":"Secondary utility for generating pandas DataFrames","symbol":"to_pandas_schema","correct":"from pydantic_spark import to_pandas_schema"}],"quickstart":{"code":"from pyspark.sql import SparkSession\nfrom pydantic import BaseModel\nfrom pydantic_spark import to_spark_schema\n\nclass MyModel(BaseModel):\n    name: str\n    age: int\n\nspark = SparkSession.builder.getOrCreate()\nschema = to_spark_schema(MyModel)\ndf = spark.createDataFrame([], schema)\nprint(df.schema)\nspark.stop()","lang":"python","description":"Basic usage: define a Pydantic model and convert to Spark schema."},"warnings":[{"fix":"Update Pydantic to v2 and follow their migration guide (https://docs.pydantic.dev/latest/migration/)","message":"Version 1.0.0 dropped support for Pydantic v1. If upgrading from v0.3.0 or earlier, you must migrate your models to Pydantic v2.","severity":"breaking","affected_versions":"<1.0.0 to >=1.0.0"},{"fix":"Inspect the generated schema and override using pydantic Field(..., schema_extra={...}) or custom serialization.","message":"Complex nested types (e.g., models with Union, Optional, or recursive references) may produce unexpected Spark types. Manual schema adjustments might be needed.","severity":"gotcha","affected_versions":"all"},{"fix":"Remove usage of CoerceType and replace with @field_validator or @model_validator in your Pydantic model.","message":"The 'coerce' feature (CoerceType) from v0.3.0 is deprecated in v1.0.0+. Use Pydantic's built-in validators instead.","severity":"deprecated","affected_versions":">=1.0.0"}],"env_vars":null,"last_verified":"2026-04-27T00:00:00.000Z","next_check":"2026-07-26T00:00:00.000Z","problems":[{"fix":"Upgrade to latest version: pip install --upgrade pydantic-spark. Use correct import: from pydantic_spark import to_spark_schema","cause":"Installed version <0.3.0 does not have the API; or typo in import path.","error":"ImportError: cannot import name 'to_spark_schema' from 'pydantic_spark'"},{"fix":"Simplify the model to avoid Union/Optional; or provide explicit schema via pyspark's StructType.","cause":"The generated schema is incomplete or wrong for complex Pydantic models (e.g., Union types).","error":"pyspark.sql.utils.AnalysisException: u'Unable to infer schema for type. It must be specified manually.;'"},{"fix":"Use from pydantic_spark import to_spark_schema directly. If using v0.x, use from pydantic_spark.converter import to_spark_schema.","cause":"The function was renamed or moved in version 1.0.0.","error":"AttributeError: module 'pydantic_spark' has no attribute 'to_spark_schema'"}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}