{"library":"repartipy","type":"library","category":null,"description":"repartipy is a Python library designed to assist with managing PySpark DataFrame partition sizes. It provides a function to repartition a DataFrame based on a target partition size in megabytes, aiming to optimize storage and processing efficiency. As of version 0.1.8, it's a relatively stable and focused utility, with updates likely driven by PySpark compatibility or feature requests rather than a fixed cadence.","language":"python","status":"active","version":"0.1.8","tags":["pyspark","dataframe","partitioning","data-engineering","spark"],"install":[{"cmd":"pip install repartipy","imports":["from repartipy import SizeEstimator"]}],"homepage":null,"github":"https://github.com/sakjung/repartipy","docs":null,"changelog":null,"pypi":"https://pypi.org/project/repartipy/","npm":null,"openapi_spec":null,"status_page":null,"smithery":null,"compatibility":{"summary":{"python_range":"3.10–3.9","success_rate":100,"avg_install_s":1.7,"avg_import_s":null,"wheel_type":"wheel"},"url":"https://checklist.day/v1/registry/repartipy/compatibility"},"provenance":{"verified_status":"passing","verified_at":"Tue Jun 30","last_verified":"Tue Jun 30","next_check":"Thu Jul 30","install_tag":null}}