{"id":21108,"library":"dbnd-spark","title":"DBND Spark","description":"DBND Spark provides integration between Databand's data orchestration framework and Apache Spark. It enables tracking, monitoring, and logging of Spark jobs, including data metrics, lineage, and execution context. The library wraps SparkSession to automatically capture logs and telemetry. Version 1.0.34.1 is the latest stable release, with monthly updates. 'dbnd-spark' is part of the 'dbnd' ecosystem but installed separately. Maintained by Databand (now IBMA).","status":"active","version":"1.0.34.1","language":"python","source_language":"en","source_url":"https://github.com/databand-ai/dbnd","tags":["spark","data-orchestration","machine-learning","databand"],"install":[{"cmd":"pip install dbnd-spark","lang":"bash","label":"Standard install"}],"dependencies":[{"reason":"Core library required for orchestration and tracking runtime.","package":"dbnd","optional":false},{"reason":"Required to interact with Spark.","package":"pyspark","optional":false}],"imports":[{"note":"Direct import avoids full package path; however, using module prefix is also valid.","wrong":"dbnd_spark.DbndSparkConfig","symbol":"DbndSparkConfig","correct":"from dbnd_spark import DbndSparkConfig"}],"quickstart":{"code":"from dbnd import dbnd_config, task\nfrom dbnd_spark import DbndSparkConfig\n\n@task\ndef my_spark_job():\n    from pyspark.sql import SparkSession\n    spark = SparkSession.builder.appName(\"test\").getOrCreate()\n    df = spark.range(10)\n    df.show()\n    spark.stop()\n\nif __name__ == \"__main__\":\n    dbnd_config.set(DbndSparkConfig.webapp_url=os.environ.get('DBND_WEBAPP_URL', ''))\n    my_spark_job.dbnd_run()","lang":"python","description":"Define a Spark job as a DBND task, configure tracking URL (optional), and run via dbnd_run."},"warnings":[{"fix":"Migrate to 'dbnd' package and use 'from dbnd_spark import ...' from within dbnd.","message":"The 'dbnd-spark' package is being deprecated in favor of 'dbnd' unified package. New versions of dbnd include Spark support internally.","severity":"deprecated","affected_versions":">=1.0.34"},{"fix":"Always create SparkSession inside a @task-decorated function.","message":"SparkSession must be created inside a DBND task. Creating it at module level breaks tracking.","severity":"gotcha","affected_versions":"all"},{"fix":"Use underscore style: app_name instead of appName.","message":"Changed from camelCase to snake_case for configuration attributes in v1.0.20.","severity":"breaking","affected_versions":">=1.0.20"}],"env_vars":null,"last_verified":"2026-04-27T00:00:00.000Z","next_check":"2026-07-26T00:00:00.000Z","problems":[{"fix":"Run 'pip install dbnd-spark' in addition to dbnd.","cause":"dbnd-spark is a separate package from dbnd.","error":"ModuleNotFoundError: No module named 'dbnd_spark'"},{"fix":"Ensure you import dbnd_spark before creating SparkSession, or use DbndSparkSessionBuilder.","cause":"Spark session not wrapped by DBND; import missing or SparkContext not initialized properly.","error":"AttributeError: 'SparkSession' object has no attribute 'dbnd_tracking'"}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}