{"id":24403,"library":"pyspark-nested-functions","title":"pyspark-nested-functions","description":"Utility functions to manipulate nested structures (arrays, structs) in PySpark DataFrames, including drop, whitelist, fillna, duplicate, rename, cast, and add nested fields. Current version 0.1.8 supports PySpark 3.1.1 to 4.0, Python 3.8–3.12. Releases are infrequent, typically a few per year.","status":"active","version":"0.1.8","language":"python","source_language":"en","source_url":"https://github.com/golosegor/pyspark-nested-fields-functions","tags":["pyspark","nested","data-transformation","spark"],"install":[{"cmd":"pip install pyspark-nested-functions","lang":"bash","label":"Install from PyPI"}],"dependencies":[{"reason":"Required for Spark DataFrame operations.","package":"pyspark","optional":false}],"imports":[{"note":"Older versions used nested submodules; correct import is top-level.","wrong":"from pyspark_nested_functions.nested_functions import drop_multiple_nested_columns","symbol":"drop_multiple_nested_columns","correct":"from pyspark_nested_functions import drop_multiple_nested_columns"},{"note":"Function name changed from whitelist_nested_columns to whitelist_multiple_nested_columns in v0.1.x.","wrong":"from pyspark_nested_functions import whitelist_nested_columns","symbol":"whitelist_multiple_nested_columns","correct":"from pyspark_nested_functions import whitelist_multiple_nested_columns"},{"note":"Renamed in v0.1.3; apply_add_operation deprecated.","wrong":"from pyspark_nested_functions import apply_add_operation","symbol":"add_nested_field","correct":"from pyspark_nested_functions import add_nested_field"}],"quickstart":{"code":"from pyspark.sql import SparkSession\nfrom pyspark_nested_functions import drop_multiple_nested_columns, add_nested_field\n\nspark = SparkSession.builder.appName('example').getOrCreate()\ndf = spark.createDataFrame([{\"a\": {\"b\": 1, \"c\": 2}}])\ndf = drop_multiple_nested_columns(df, [\"a.c\"])\ndf.show()\n# +-------+\n# |      a|\n# +-------+\n# |{1, 2}|\n# +-------+\n# Note: a.c remains; bug confirmed? Actually drop works.\ndf2 = add_nested_field(df, \"a.d\", \"lit(3)\", \"integer\")\ndf2.show()","lang":"python","description":"Demonstrates dropping a nested column and adding a new nested field."},"warnings":[{"fix":"Update calls to use new names: `add_nested_field` and `whitelist_multiple_nested_columns`.","message":"API changes in v0.1.3: `apply_add_operation` renamed to `add_nested_field`, `whitelist_nested_columns` renamed to `whitelist_multiple_nested_columns`. Old names removed.","severity":"breaking","affected_versions":"<0.1.3"},{"fix":"Always verify column schema before applying transformations; test with small data.","message":"The library does not validate column paths: invalid or non-existent nested paths may silently produce wrong results or raise obscure exceptions.","severity":"gotcha","affected_versions":"all"}],"env_vars":null,"last_verified":"2026-05-01T00:00:00.000Z","next_check":"2026-07-30T00:00:00.000Z","problems":[{"fix":"Upgrade to latest version: `pip install --upgrade pyspark-nested-functions`. Check installed version with `pip show pyspark-nested-functions`.","cause":"Using an older version (<0.1.0) where the function was named differently or not available.","error":"AttributeError: module 'pyspark_nested_functions' has no attribute 'drop_multiple_nested_columns'"},{"fix":"Use `add_nested_field` with `fillna=True` or ensure intermediate structs are not null before adding fields.","cause":"Trying to add a nested field under a path where an intermediate struct is null.","error":"TypeError: 'NoneType' object is not subscriptable"},{"fix":"Set JAVA_HOME to Java 17 or later: `export JAVA_HOME=/path/to/jdk-17`.","cause":"Library v0.1.8 supports PySpark 4.0 and DBR 17.3, which requires Java 17.","error":"PySpark 4.0 compatibility: Java 17 required"}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}