{"library":"dbldatagen","type":"library","category":null,"description":"dbldatagen (Databricks Labs Data Generator) is an open-source Python library for generating synthetic data at scale within Apache Spark and Databricks environments. It allows users to define complex data schemas with various constraints, distributions, and inter-column relationships to create realistic datasets for testing, benchmarking, and machine learning model development. The library is currently at version 0.4.0.post1 and has an active development and release cadence.","language":"python","status":"active","version":"0.4.0.post1","tags":["pyspark","databricks","synthetic-data","data-generation","testing","benchmarking"],"last_verified":"Sun May 24","install":[{"cmd":"pip install dbldatagen","imports":["from dbldatagen import DataGenerator","import dbldatagen as dg","import dbldatagen as dg\ndf = dg.Datasets(spark, \"basic/user\").get().build()"]},{"cmd":"%pip install dbldatagen","imports":[]}],"homepage":null,"github":"https://github.com/databrickslabs/data-generator","docs":"https://databrickslabs.github.io/dbldatagen/public_docs/index.html","changelog":null,"pypi":"https://pypi.org/project/dbldatagen/","npm":null,"openapi_spec":null,"status_page":null,"smithery":null,"compatibility":{"summary":{"python_range":"3.10–3.9","success_rate":100,"avg_install_s":1.6,"avg_import_s":null,"wheel_type":"wheel"},"url":"https://checklist.day/v1/registry/dbldatagen/compatibility"}}