{"library":"pyspark-regression","title":"PySpark Regression","description":"PySpark Regression is a Python library for regression testing Spark DataFrames, enabling comparison of outputs between different runs or environments. Version 4.2.4 is current; release cadence is irregular but active.","language":"python","status":"active","last_verified":"Sat May 09","install":{"commands":["pip install pyspark-regression"],"cli":null},"imports":["from pyspark_regression import RegressionTest"],"auth":{"required":false,"env_vars":[]},"quickstart":{"code":"from pyspark.sql import SparkSession\nfrom pyspark_regression import RegressionTest\n\nspark = SparkSession.builder.appName('test').getOrCreate()\ndf1 = spark.createDataFrame([(1, 'a'), (2, 'b')], ['id', 'val'])\ndf2 = spark.createDataFrame([(1, 'a'), (2, 'b')], ['id', 'val'])\n\ntester = RegressionTest()\nresult = tester.compare(df1, df2, key='id')\nprint(result)  # Should indicate equality","lang":"python","description":"Basic usage: compare two Spark DataFrames with a key column.","tag":null,"tag_description":null,"last_tested":null,"results":[]},"compatibility":null}