{"id":20921,"library":"amundsen-databuilder","title":"Amundsen Databuilder","description":"Amundsen Databuilder is the data ingestion library for Amundsen, a data discovery and metadata platform. It provides Extractors, Transformers, and Loaders (ETL framework) to pull metadata from various sources (Snowflake, Hive, Postgres, etc.) and push it into a search index (Elasticsearch) and a graph database (Neo4j or Apache Gremlin). Current version is 7.5.1, compatible with Python >=3.8. Releases are frequent, roughly monthly.","status":"active","version":"7.5.1","language":"python","source_language":"en","source_url":"https://github.com/amundsen-io/amundsen/tree/main/databuilder","tags":["data-catalog","metadata","etl","amundsen"],"install":[{"cmd":"pip install amundsen-databuilder","lang":"bash","label":"Standard install"}],"dependencies":[],"imports":[{"note":"Correct class name is SnowflakeMetadataExtractor, not SnowflakeExtractor. Common mistake: many users try to import SnowflakeExtractor which does not exist.","wrong":"from databuilder.extractor.snowflake_extractor import SnowflakeExtractor","symbol":"SnowflakeExtractor","correct":"from databuilder.extractor.snowflake_metadata_extractor import SnowflakeMetadataExtractor"},{"note":"The correct class is FsNeo4jCsvLoader. The module is file_system_neo4j_csv_loader.","wrong":"from databuilder.loader.neo4j_csv_loader import Neo4jCsvLoader","symbol":"Neo4jCsvLoader","correct":"from databuilder.loader.file_system_neo4j_csv_loader import FsNeo4jCsvLoader"},{"note":"","wrong":null,"symbol":"GenericExtractor","correct":"from databuilder.extractor.generic_extractor import GenericExtractor"}],"quickstart":{"code":"import logging\nimport os\n\nfrom databuilder.job.job import DefaultJob\nfrom databuilder.task.task import DefaultTask\nfrom databuilder.extractor.snowflake_metadata_extractor import SnowflakeMetadataExtractor\nfrom databuilder.loader.file_system_neo4j_csv_loader import FsNeo4jCsvLoader\nfrom databuilder.publisher.neo4j_csv_publisher import Neo4jCsvPublisher\nfrom databuilder.transformer.base_transformer import NoopTransformer\n\nlogging.basicConfig(level=logging.INFO)\n\n# Configuration for Snowflake extractor\nextractor = SnowflakeMetadataExtractor()\nextractor.init(\n    {\n        SnowflakeMetadataExtractor.ACCOUNT_KEY: os.environ.get('SNOWFLAKE_ACCOUNT', ''),\n        SnowflakeMetadataExtractor.DATABASE_KEY: os.environ.get('SNOWFLAKE_DATABASE', ''),\n        SnowflakeMetadataExtractor.SCHEMA_KEY: os.environ.get('SNOWFLAKE_SCHEMA', ''),\n        SnowflakeMetadataExtractor.USER_KEY: os.environ.get('SNOWFLAKE_USER', ''),\n        SnowflakeMetadataExtractor.PASSWORD_KEY: os.environ.get('SNOWFLAKE_PASSWORD', ''),\n        SnowflakeMetadataExtractor.WAREHOUSE_KEY: os.environ.get('SNOWFLAVE_WAREHOUSE', ''),\n    }\n)\n\nloader = FsNeo4jCsvLoader()\nloader.init({})\n\npublisher = Neo4jCsvPublisher()\npublisher.init(\n    {\n        Neo4jCsvPublisher.NEO4J_END_POINT_KEY: os.environ.get('NEO4J_ENDPOINT', 'bolt://localhost:7687'),\n        Neo4jCsvPublisher.NEO4J_USER: os.environ.get('NEO4J_USER', 'neo4j'),\n        Neo4jCsvPublisher.NEO4J_PASSWORD: os.environ.get('NEO4J_PASSWORD', 'test'),\n        Neo4jCsvPublisher.NEO4J_MAX_CONN_LIFE_TIME_SEC: 1000,\n        Neo4jCsvPublisher.JOB_PUBLISHER_TASK: 'test',\n    }\n)\n\ntask = DefaultTask(\n    extractor=extractor,\n    loader=loader,\n    transformer=NoopTransformer(),\n)\n\njob = DefaultJob(\n    task=task,\n    publisher=publisher,\n)\n\njob.launch()","lang":"python","description":"Minimal job that extracts metadata from Snowflake and loads it into Neo4j via CSV. Ensure SNOWFLAKE_* and NEO4J_* environment variables are set."},"warnings":[{"fix":"Update imports to new names. Check release notes for full list of renamed classes.","message":"Breaking change in version 7.0.0: renamed many extractors and loaders. SnowflakeExtractor became SnowflakeMetadataExtractor. Also removed old deprecated modules like databuilder.extractor.hive_table_metadata_extractor.","severity":"breaking","affected_versions":">=7.0.0"},{"fix":"Pass loader.init({'base_directory': '/tmp/amundsen'}) or similar.","message":"The FsNeo4jCsvLoader now requires a file system directory; previously it wrote to temp. Must call init() with proper config or it may fail.","severity":"breaking","affected_versions":">=7.0.0"},{"fix":"Use 'from databuilder.loader.file_system_neo4j_csv_loader import FsNeo4jCsvLoader'.","message":"Common mistake: using the wrong Neo4j CSV loader class name. Users often import 'Neo4jCsvLoader' or 'CsvLoader', but correct class is 'FsNeo4jCsvLoader'.","severity":"gotcha","affected_versions":"all"}],"env_vars":null,"last_verified":"2026-04-27T00:00:00.000Z","next_check":"2026-07-26T00:00:00.000Z","problems":[{"fix":"Use 'from databuilder.extractor.snowflake_metadata_extractor import SnowflakeMetadataExtractor'.","cause":"Old import path; renamed in 7.0.0.","error":"ModuleNotFoundError: No module named 'databuilder.extractor.snowflake_extractor'"},{"fix":"Use publisher.init() with a dictionary of config keys, e.g., publisher.init({Neo4jCsvPublisher.NEO4J_END_POINT_KEY: 'bolt://...'}).","cause":"Neo4j publisher config keys changed; they no longer accept direct keyword args.","error":"TypeError: __init__() got an unexpected keyword argument 'neo4j_endpoint'"}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}