{"id":27857,"library":"hail","title":"Hail","description":"Hail is an open-source, general-purpose, Python-based data analysis tool with additional data types and methods for working with genomic data. Current version 0.2.138, with monthly releases. Requires Python >=3.10.","status":"active","version":"0.2.138","language":"python","source_language":"en","source_url":"https://github.com/hail-is/hail","tags":["genomics","bioinformatics","spark","data-analysis","gwas"],"install":[{"cmd":"pip install hail","lang":"bash","label":"Install Hail"}],"dependencies":[{"reason":"Hail uses Apache Spark for distributed computing.","package":"pyspark","optional":false},{"reason":"Required for array operations.","package":"numpy","optional":false},{"reason":"Required for DataFrame integration.","package":"pandas","optional":false}],"imports":[{"note":"Hail's API is exposed via the `hl` module; wildcard imports can cause namespace pollution.","wrong":"from hail import *","symbol":"hl","correct":"import hail as hl"},{"note":"Must call init after import, before any Hail operations.","wrong":"hail.init()","symbol":"init","correct":"hl.init()"}],"quickstart":{"code":"import hail as hl\nhl.init()\nmt = hl.balding_nichols_model(n_populations=3, n_samples=100, n_variants=10)\nmt.show()","lang":"python","description":"Initializes Hail, simulates a simple genetic dataset, and displays it."},"warnings":[{"fix":"Upgrade to Python 3.10 or later.","message":"Hail removed support for Python 3.9 and below. Requires Python >=3.10 as of version 0.2.130.","severity":"breaking","affected_versions":">=0.2.130"},{"fix":"Replace `hl.import_vcf` with `hl.import_vcf_bgen` or the new VDS API.","message":"The VCF import method `hl.import_vcf` is deprecated; use `hl.import_vcf_bgen` or `hail.VariantDataset.from_vcf` instead.","severity":"deprecated","affected_versions":">=0.2.120"},{"fix":"Use `mt = mt.persist()` or `hl.write(mt, 'output.ht')` to trigger execution.","message":"Hail uses lazy evaluation; mutations to MatrixTable require writing to disk or calling `.persist()` to enforce computation.","severity":"gotcha","affected_versions":"all"}],"env_vars":null,"last_verified":"2026-05-09T00:00:00.000Z","next_check":"2026-08-07T00:00:00.000Z","problems":[{"fix":"Run `pip install hail` in the correct Python environment (Python >=3.10).","cause":"Hail not installed or installed in wrong environment.","error":"ModuleNotFoundError: No module named 'hail'"},{"fix":"Add `hl.init()` after `import hail as hl`.","cause":"Forgot to call `hl.init()` after import.","error":"ValueError: Hail is not initialized. Call hl.init() before using Hail methods."},{"fix":"Configure memory via `hl.init(spark_conf={'spark.executor.memory': '16g', 'spark.driver.memory': '16g'})`.","cause":"Default Spark memory settings are insufficient for large datasets.","error":"java.lang.OutOfMemoryError: Java heap space"}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}