{"id":27587,"library":"apache-airflow-providers-apache-hdfs","title":"Apache Airflow Provider for Apache HDFS","description":"A provider package for Apache Airflow that integrates with Apache HDFS, providing hooks and operators for HDFS file operations. Current version 4.11.5 requires Python >=3.10. Released on Airflow provider schedule.","status":"active","version":"4.11.5","language":"python","source_language":"en","source_url":"https://github.com/apache/airflow/tree/main/airflow/providers/apache/hdfs","tags":["airflow","provider","hdfs","hadoop","data","etl"],"install":[{"cmd":"pip install apache-airflow-providers-apache-hdfs","lang":"bash","label":"Standard install"}],"dependencies":[{"reason":"Provider requires Airflow core","package":"apache-airflow","optional":false},{"reason":"Python HDFS client library","package":"hdfs","optional":false},{"reason":"Optional integration with Hive","package":"apache-airflow-providers-apache-hive","optional":true}],"imports":[{"note":"Old hook path removed in provider version 2.0.0+","wrong":"from airflow.hooks.hdfs import HDFSHook","symbol":"HDFSHook","correct":"from airflow.providers.apache.hdfs.hooks.hdfs import HDFSHook"},{"note":"All operators available; check docs for exact names","symbol":"HdfsOperators","correct":"from airflow.providers.apache.hdfs.operators.hdfs import HdfsMkdirFileOperator, HdfsPutFileOperator, HdfsGetFileOperator, HdfsDeleteFileOperator, HdfsListDirectoryOperator, HdfsConcatFileOperator, HdfsMoveFileOperator"}],"quickstart":{"code":"from airflow.providers.apache.hdfs.hooks.hdfs import HDFSHook\n\nhook = HDFSHook(conn_id='hdfs_default')\n# List files in a directory\nfiles = hook.list_directory('/tmp')\nprint(files)","lang":"python","description":"Basic usage of HDFSHook to list files"},"warnings":[{"fix":"Use new import paths: `from airflow.providers.apache.hdfs.hooks.hdfs import HDFSHook`","message":"HDFSHook and operators were moved from `airflow.hooks.hdfs` and `airflow.operators.hdfs` to `airflow.providers.apache.hdfs` in version 2.0.0. Old imports will break.","severity":"breaking","affected_versions":"<2.0.0"},{"fix":"Ensure your Airflow connection has extra parameters for kerberos if needed.","message":"The HDFSHook's get_conn() returns a HDFSClient that may require explicit authentication; default uses 'hdfs' connection string without kerberos if not configured.","severity":"gotcha","affected_versions":"all"},{"fix":"Use `FileTransferOperator` from `airflow.providers.apache.hdfs.operators.hdfs` or implement custom logic.","message":"HdfsMkdirFileOperator and HdfsPutFileOperator are deprecated as of provider version 4.0.0 in favor of generic FileTransferOperator.","severity":"deprecated","affected_versions":">=4.0.0"}],"env_vars":null,"last_verified":"2026-05-09T00:00:00.000Z","next_check":"2026-08-07T00:00:00.000Z","problems":[{"fix":"Change to `from airflow.providers.apache.hdfs.hooks.hdfs import HDFSHook`.","cause":"Using old import path before provider split.","error":"ModuleNotFoundError: No module named 'airflow.hooks.hdfs'"},{"fix":"Use `from airflow.providers.apache.hdfs.operators.hdfs import HdfsPutFileOperator`.","cause":"Operator moved to provider package.","error":"ImportError: cannot import name 'HdfsPutFileOperator' from 'airflow.operators.hdfs'"},{"fix":"Create an Airflow connection with conn_id='hdfs_default', type='HDFS', and host/port details.","cause":"No Airflow connection configured for HDFS.","error":"airflow.exceptions.AirflowException: Connection 'hdfs_default' not found"}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}