{"id":23834,"library":"h2o-pysparkling-3-1","title":"Sparkling Water (H2O PySparkling 3.1)","description":"Sparkling Water integrates H2O's Fast Scalable Machine Learning with Apache Spark, enabling scalable ML workflows. Current version: 3.46.0.6.post1. Release cadence follows H2O-3 major/minor releases.","status":"active","version":"3.46.0.6.post1","language":"python","source_language":"en","source_url":"https://github.com/h2oai/sparkling-water","tags":["h2o","spark","machine-learning","big-data","pysparkling"],"install":[{"cmd":"pip install h2o-pysparkling-3.1","lang":"bash","label":"PyPI"}],"dependencies":[],"imports":[{"note":"H2OContext is in pysparkling, not h2o","wrong":"from h2o import H2OContext","symbol":"H2OContext","correct":"from pysparkling import H2OContext"},{"note":"HC is a shortcut in pysparkling","wrong":"from h2o import HC","symbol":"HC","correct":"from pysparkling import HC"}],"quickstart":{"code":"from pyspark.sql import SparkSession\nfrom pysparkling import H2OContext\n\nspark = SparkSession.builder.appName('app').getOrCreate()\nsc = spark.sparkContext\n\n# Initialize H2OContext\nh2o_context = H2OContext.getOrCreate(sc)\n\n# Start H2O services\nh2o_context.start()\n\nprint(f'H2O cluster status: {h2o_context.cluster().status()}')","lang":"python","description":"Initialize Spark and H2OContext. Must be run in a Spark environment (pyspark shell or submitted job)."},"warnings":[{"fix":"Match the major.minor version of h2o-pysparkling with your Spark version. For Spark 3.1.x, use h2o-pysparkling-3.1.","message":"PySparkling 3.2+ requires Spark 3.2.x; PySparkling 3.1 requires Spark 3.1.x. Using wrong Spark version causes runtime errors.","severity":"breaking","affected_versions":"all"},{"fix":"Use H2OContext.getOrCreate(spark.sparkContext) or H2OContext(sc) depending on version. Check Sparkling Water changelog for exact changes.","message":"The H2OContext API has changed. Older code using H2OContext(sc) directly may fail; use H2OContext.getOrCreate(sc) or H2OContext(sc).","severity":"deprecated","affected_versions":">=3.36"},{"fix":"Set JAVA_HOME to Java 8 or 11 before starting Spark.","message":"PySparkling requires Java 8 or 11. Java 17+ is not supported and will cause cryptic errors.","severity":"gotcha","affected_versions":"all"},{"fix":"Run code via spark-submit or pyspark shell.","message":"H2OContext must be initialized inside a Spark context (e.g., in a PySpark shell or Spark job). Running outside Spark (plain Python) fails with 'No SparkContext found'.","severity":"gotcha","affected_versions":"all"}],"env_vars":null,"last_verified":"2026-05-01T00:00:00.000Z","next_check":"2026-07-30T00:00:00.000Z","problems":[{"fix":"Install h2o-pysparkling-3.1 in the same environment where PySpark runs: pip install h2o-pysparkling-3.1","cause":"Package not installed or wrong environment (Python vs PySpark venv).","error":"ModuleNotFoundError: No module named 'pysparkling'"},{"fix":"Set Spark config: spark.hadoop.fs.defaultFS=hdfs://namenode:8020 or use local files with file://","cause":"H2O attempt to load data from HDFS but Spark configuration not set.","error":"IllegalArgumentException: requirement failed: Wrong FS: hdfs://... expected file:///"},{"fix":"Set JAVA_HOME to Java 8: export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64","cause":"Java version mismatch (Java 8 required for this version).","error":"py4j.protocol.Py4JJavaError: An error occurred while calling o135.start. : java.lang.UnsupportedClassVersionError: h2o/water/... Unsupported major.minor version 52.0"}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}