{"library":"pyspark-huggingface","type":"library","category":null,"description":"pyspark-huggingface is a Spark Data Source for seamlessly accessing 🤗 Hugging Face Datasets as Spark DataFrames. It enables streaming datasets from the Hub, applying projection and predicate filters, and saving Spark DataFrames back to Hugging Face as Parquet files with fast, deduplicated uploads. It supports authentication via `huggingface-cli login` or tokens, and is compatible with Spark 4 (with auto-import) as well as backporting functionality for Spark 3.5, 3.4, and 3.3. The current version is 2.1.0 and it is actively maintained.","language":"python","status":"active","version":"2.1.0","tags":["pyspark","spark","huggingface","datasets","data-source","etl","distributed-computing"],"install":[{"cmd":"pip install pyspark-huggingface","imports":["import pyspark_huggingface"]}],"homepage":null,"github":null,"docs":null,"changelog":null,"pypi":"https://pypi.org/project/pyspark-huggingface/","npm":null,"openapi_spec":null,"status_page":null,"smithery":null,"compatibility":{"summary":{"python_range":"3.10–3.9","success_rate":100,"avg_install_s":16.4,"avg_import_s":null,"wheel_type":"wheel"},"url":"https://checklist.day/v1/registry/pyspark-huggingface/compatibility"},"provenance":{"verified_status":"import_fail","verified_at":"Fri Jul 03","last_verified":"Fri Jul 03","next_check":"Fri Jul 10","install_tag":null}}