{"id":23258,"library":"apache-airflow-providers-apache-pig","title":"Apache Airflow Provider for Apache Pig","description":"Apache Airflow provider for Apache Pig. Version 4.8.4 requires Airflow >=2.9.0 and Python >=3.10. This provider allows Airflow to run Pig jobs via the PigOperator. Release cadence follows Airflow's provider release cycle (monthly).","status":"active","version":"4.8.4","language":"python","source_language":"en","source_url":"https://github.com/apache/airflow/tree/main/airflow/providers/apache/pig","tags":["airflow","provider","apache","pig","pig operator"],"install":[{"cmd":"pip install apache-airflow-providers-apache-pig","lang":"bash","label":"PyPI install"}],"dependencies":[{"reason":"Required: Airflow >=2.9.0 is needed for this provider version.","package":"apache-airflow","optional":false}],"imports":[{"note":"Correct import path after provider restructuring (Airflow 2.0+).","symbol":"PigOperator","correct":"from airflow.providers.apache.pig.operators.pig import PigOperator"}],"quickstart":{"code":"from datetime import datetime\nfrom airflow import DAG\nfrom airflow.providers.apache.pig.operators.pig import PigOperator\n\nwith DAG(dag_id='example_pig', start_date=datetime(2025,1,1), schedule='@once', catchup=False) as dag:\n    run_pig = PigOperator(\n        task_id='run_pig',\n        pig='ls /user/hadoop;',\n        pig_opts='-x local',\n        pig_cli_conn_id='pig_default'\n    )\n    run_pig","lang":"python","description":"Basic DAG using PigOperator. Note: 'pig_cli_conn_id' must be set to a connection with your Pig CLI environment."},"warnings":[{"fix":"For inline Pig Latin, use a multiline string. For a file, set pig='/path/to/script.pig'.","message":"The PigOperator expects the 'pig' argument to be a script string or file path. If using a file, set 'pig' to the file path and ensure the file is accessible on all worker nodes.","severity":"gotcha","affected_versions":"all"},{"fix":"Use 'pig_cli_conn_id' instead of 'pig_conn_id'.\nAlso ensure the connection type is 'pig'.","message":"The 'pig_conn_id' parameter was renamed to 'pig_cli_conn_id' in provider version 4.0.0 (Airflow 2.3+). Using the old name will fail.","severity":"deprecated","affected_versions":">=4.0.0"},{"fix":"Install Apache Pig and verify with 'which pig'. Set the executable path in the connection or via 'pig_cli_conn_id'.","message":"The PigOperator runs the pig command via subprocess. Ensure the 'pig' CLI is installed and available in PATH on all worker nodes where tasks are executed.","severity":"gotcha","affected_versions":"all"}],"env_vars":null,"last_verified":"2026-05-01T00:00:00.000Z","next_check":"2026-07-30T00:00:00.000Z","problems":[{"fix":"Create a connection in Airflow UI (Admin -> Connections) with Conn Id: pig_default, Conn Type: Pig, Host: localhost (or appropriate).","cause":"Connection 'pig_default' is not set up in Airflow.","error":"airflow.exceptions.AirflowException: The conn_id `pig_default` isn't defined"},{"fix":"Replace 'pig_conn_id' with 'pig_cli_conn_id'.","cause":"Using deprecated parameter 'pig_conn_id' in provider 4.0.0+.","error":"TypeError: __init__() got an unexpected keyword argument 'pig_conn_id'"}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}