{"id":27582,"library":"airflow-provider-duckdb","title":"Airflow Provider for DuckDB","description":"An Apache Airflow provider that integrates DuckDB, enabling SQL execution, data loading, and transformations within Airflow DAGs. Current version 0.2.0 targets DuckDB 0.3.x+ and Airflow 2.x. Release cadence is irregular.","status":"active","version":"0.2.0","language":"python","source_language":"en","source_url":"https://github.com/duckdb/airflow-provider-duckdb","tags":["airflow","duckdb","provider","ETL","SQL"],"install":[{"cmd":"pip install airflow-provider-duckdb","lang":"bash","label":"PyPI"}],"dependencies":[{"reason":"Required runtime dependency","package":"apache-airflow","optional":false},{"reason":"Core DuckDB library","package":"duckdb","optional":false}],"imports":[{"note":"Incorrect due to provider naming convention; must use dotted path under airflow.providers.","wrong":"from airflow_provider_duckdb.operators import DuckDBOperator","symbol":"DuckDBOperator","correct":"from airflow.providers.duckdb.operators.duckdb import DuckDBOperator"},{"note":"Same dot-path convention; wrong import leads to ModuleNotFoundError.","wrong":"from airflow_provider_duckdb.hooks import DuckDBHook","symbol":"DuckDBHook","correct":"from airflow.providers.duckdb.hooks.duckdb import DuckDBHook"}],"quickstart":{"code":"from datetime import datetime\nfrom airflow import DAG\nfrom airflow.providers.duckdb.operators.duckdb import DuckDBOperator\n\ndefault_args = {'owner': 'airflow', 'start_date': datetime(2023, 1, 1)}\n\nwith DAG('duckdb_example', schedule_interval=None, default_args=default_args, catchup=False) as dag:\n    create_table = DuckDBOperator(\n        task_id='create_table',\n        sql=\"CREATE TABLE IF NOT EXISTS test (id INTEGER, name VARCHAR)\",\n        duckdb_conn_id='duckdb_default'\n    )\n    insert_data = DuckDBOperator(\n        task_id='insert_data',\n        sql=\"INSERT INTO test VALUES (1, 'Alice'), (2, 'Bob')\",\n        duckdb_conn_id='duckdb_default'\n    )\n    select_data = DuckDBOperator(\n        task_id='select_data',\n        sql=\"SELECT * FROM test\",\n        duckdb_conn_id='duckdb_default',\n        do_xcom_push=True\n    )\n    create_table >> insert_data >> select_data","lang":"python","description":"Minimal DAG creating a table, inserting rows, and selecting data with DuckDBOperator."},"warnings":[{"fix":"Combine statements in one string with semicolons or use multiple tasks.","message":"Operator `sql` parameter expects a single SQL string; multiple statements must be separated by semicolons (DuckDB multi-statement support is limited)","severity":"breaking","affected_versions":"all"},{"fix":"Set environment variable AIRFLOW_CONN_DUCKDB_DEFAULT='duckdb://localhost:5432/mydb' or configure via UI.","message":"Connection ID 'duckdb_default' is not automatically created; you must configure it in Airflow UI or environment. Missing connection leads to cryptic Airflow errors.","severity":"gotcha","affected_versions":"all"},{"fix":"Use dotted provider import path as shown in quickstart above.","message":"Old import path `from airflow_provider_duckdb import ...` is deprecated; always use `from airflow.providers.duckdb import ...`","severity":"deprecated","affected_versions":">=0.2.0"}],"env_vars":null,"last_verified":"2026-05-09T00:00:00.000Z","next_check":"2026-08-07T00:00:00.000Z","problems":[{"fix":"Use `from airflow.providers.duckdb.operators.duckdb import DuckDBOperator`","cause":"Incorrect import path using underscore instead of dotted path.","error":"ModuleNotFoundError: No module named 'airflow_provider_duckdb'"},{"fix":"Add connection via Airflow UI (Admin -> Connections) or set env var: AIRFLOW_CONN_DUCKDB_DEFAULT='duckdb://localhost:5432/mydb'","cause":"DuckDB connection not configured in Airflow.","error":"AirflowException: The conn_id `duckdb_default` isn't defined"},{"fix":"Use `CREATE TABLE IF NOT EXISTS` or wrap in `DROP TABLE IF EXISTS` before creation.","cause":"Running DAG multiple times without CREATE TABLE IF NOT EXISTS or handling idempotency.","error":"duckdb.CatalogException: Catalog Error: Table with name test already exists"}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}