{"library":"pyspark-extension","title":"pyspark-extension","description":"A library providing useful extensions to Apache Spark, including DataFrame diff, column transformation utilities, Parquet metadata reading, Spark Connect support, and dependency installation helpers. Current version 2.15.0.4.1, supports Spark 3.2+, 4.0; release cadence is irregular with multiple releases per year.","language":"python","status":"active","last_verified":"Fri May 01","install":{"commands":["pip install pyspark-extension","pip install pyspark-extension[scala]"],"cli":null},"imports":["from spark_extension import diff","from spark_extension.comparators import default_comparator","from spark_extension.parquet import read_encrypted_parquet"],"auth":{"required":false,"env_vars":[]},"quickstart":{"code":"from pyspark.sql import SparkSession\nfrom spark_extension import diff\n\nspark = SparkSession.builder.appName(\"test\").getOrCreate()\ndf1 = spark.createDataFrame([(1, \"a\"), (2, \"b\")], [\"id\", \"val\"])\ndf2 = spark.createDataFrame([(1, \"a\"), (3, \"c\")], [\"id\", \"val\"])\nresult = diff(df1, df2)\nresult.show()","lang":"python","description":"Basic usage: diff two DataFrames","tag":null,"tag_description":null,"last_tested":null,"results":[]},"compatibility":null}