{"library":"pyspark-nested-functions","title":"pyspark-nested-functions","description":"Utility functions to manipulate nested structures (arrays, structs) in PySpark DataFrames, including drop, whitelist, fillna, duplicate, rename, cast, and add nested fields. Current version 0.1.8 supports PySpark 3.1.1 to 4.0, Python 3.8–3.12. Releases are infrequent, typically a few per year.","language":"python","status":"active","last_verified":"Fri May 01","install":{"commands":["pip install pyspark-nested-functions"],"cli":null},"imports":["from pyspark_nested_functions import drop_multiple_nested_columns","from pyspark_nested_functions import whitelist_multiple_nested_columns","from pyspark_nested_functions import add_nested_field"],"auth":{"required":false,"env_vars":[]},"quickstart":{"code":"from pyspark.sql import SparkSession\nfrom pyspark_nested_functions import drop_multiple_nested_columns, add_nested_field\n\nspark = SparkSession.builder.appName('example').getOrCreate()\ndf = spark.createDataFrame([{\"a\": {\"b\": 1, \"c\": 2}}])\ndf = drop_multiple_nested_columns(df, [\"a.c\"])\ndf.show()\n# +-------+\n# |      a|\n# +-------+\n# |{1, 2}|\n# +-------+\n# Note: a.c remains; bug confirmed? Actually drop works.\ndf2 = add_nested_field(df, \"a.d\", \"lit(3)\", \"integer\")\ndf2.show()","lang":"python","description":"Demonstrates dropping a nested column and adding a new nested field.","tag":null,"tag_description":null,"last_tested":null,"results":[]},"compatibility":null}