{"id":23757,"library":"flytekitplugins-spark","title":"flytekitplugins-spark","description":"Spark 3 plugin for flytekit, enabling PySpark jobs within Flyte workflows. Current version 1.16.19, part of the Flytekit ecosystem, released regularly alongside Flytekit.","status":"active","version":"1.16.19","language":"python","source_language":"en","source_url":"https://github.com/flyteorg/flytekit","tags":["flyte","spark","pyspark","workflow","mlops"],"install":[{"cmd":"pip install flytekitplugins-spark","lang":"bash","label":"Install plugin"}],"dependencies":[{"reason":"Core Flyte SDK required.","package":"flytekit","optional":false},{"reason":"Spark runtime dependency.","package":"pyspark","optional":false}],"imports":[{"note":"Correct import for the Spark task type.","wrong":"","symbol":"Spark","correct":"from flytekitplugins.spark import Spark"},{"note":"PySparkTask is exposed at the top level; nested import was used in older docs.","wrong":"from flytekitplugins.spark.task import PySparkTask","symbol":"PySparkTask","correct":"from flytekitplugins.spark import PySparkTask"}],"quickstart":{"code":"from flytekit import task, workflow\nfrom flytekitplugins.spark import Spark, PySparkTask\n\n@task(task_config=Spark(\n    spark_conf={\n        \"spark.executor.cores\": \"1\",\n        \"spark.executor.instances\": \"1\"\n    }\n))\ndef my_spark_task() -> int:\n    import pyspark\n    spark = pyspark.sql.SparkSession.builder.getOrCreate()\n    df = spark.range(10)\n    return df.count()\n\n@workflow\ndef wf() -> int:\n    return my_spark_task()\n\nif __name__ == \"__main__\":\n    print(wf())","lang":"python","description":"Define a Spark task using task_config=Spark(...), then build a workflow."},"warnings":[{"fix":"Always provide spark_conf dictionary in Spark() task config.","message":"In flytekit>=1.10, Spark task configuration requires explicit spark_conf; default configs may be removed.","severity":"breaking","affected_versions":">=1.10.0"},{"fix":"Always call SparkSession.builder.getOrCreate() inside the task function body.","message":"SparkSession must be obtained inside the task at runtime, not at import time. Using getOrCreate() outside the task scope can cause serialization errors.","severity":"gotcha","affected_versions":"all"},{"fix":"Use 'from flytekitplugins.spark import Spark, PySparkTask'.","message":"The old import path from 'flytekitplugins.spark.task' is deprecated in favor of top-level imports from 'flytekitplugins.spark'.","severity":"deprecated","affected_versions":">=1.0.0"}],"env_vars":null,"last_verified":"2026-05-01T00:00:00.000Z","next_check":"2026-07-30T00:00:00.000Z","problems":[{"fix":"Run 'pip install flytekitplugins-spark'.","cause":"Plugin package not installed.","error":"ModuleNotFoundError: No module named 'flytekitplugins.spark'"},{"fix":"Use 'from flytekitplugins.spark import PySparkTask' (or upgrade flytekitplugins-spark).","cause":"Deprecated import path used; PySparkTask not exposed in older versions or wrong import.","error":"AttributeError: module 'flytekitplugins.spark' has no attribute 'PySparkTask'"},{"fix":"Ensure spark_conf includes 'spark.executor.cores' and 'spark.executor.instances'. Verify cluster is running.","cause":"Spark configuration missing required settings or cluster resources insufficient.","error":"Py4JJavaError: An error occurred while calling o123.showString."}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}