{"id":23931,"library":"kerchunk","title":"Kerchunk","description":"Kerchunk is a Python library for creating and manipulating chunked reference descriptions for cloud-optimized data access. It enables efficient reading of scientific data (e.g., NetCDF/HDF5) from remote storage without downloading entire files. The current version is 0.2.10, supporting Python >=3.11, with a stable but evolving API.","status":"active","version":"0.2.10","language":"python","source_language":"en","source_url":"https://github.com/fsspec/kerchunk","tags":["cloud-storage","zarr","netcdf","hdf5","fsspec","data-access"],"install":[{"cmd":"pip install kerchunk","lang":"bash","label":"Latest from PyPI"}],"dependencies":[{"reason":"Required for file system abstractions and reference file system implementation.","package":"fsspec","optional":false},{"reason":"Optional for integration with xarray for opening reference datasets.","package":"xarray","optional":true}],"imports":[{"note":"MultiZarrToZarr is in the combine submodule, not top-level.","wrong":"from kerchunk import MultiZarrToZarr","symbol":"combine_kwargs","correct":"from kerchunk.combine import MultiZarrToZarr"},{"note":"For HDF5/NetCDF files, use KerchunkGroup or single_file_to_reference, no direct open_dataset.","wrong":"from kerchunk import open_dataset","symbol":"open_dataset","correct":"from kerchunk.hdf import KerchunkGroup"}],"quickstart":{"code":"import fsspec\nimport xarray as xr\nfrom kerchunk.hdf import single_file_to_reference\nfrom kerchunk.combine import MultiZarrToZarr\n\n# Generate reference for a single file\nurl = 's3://example-bucket/file.nc'  # or local path\nfs = fsspec.filesystem('s3', anon=True)\nwith fs.open(url) as f:\n    h5chunks = single_file_to_reference(f, url)\n\n# Save reference as JSON\nimport json\nwith open('ref.json', 'w') as f:\n    json.dump(h5chunks, f)\n\n# Open with xarray\nmapper = fsspec.get_mapper('reference://', fo='ref.json')\nds = xr.open_dataset(mapper, engine='zarr')\nprint(ds)","lang":"python","description":"Creates a single-file reference and opens it with xarray via Zarr engine."},"warnings":[{"fix":"Upgrade Python to 3.11 or later.","message":"Kerchunk 0.2.0 dropped Python 3.8 support. Use Python >=3.11 as of 0.2.10.","severity":"breaking","affected_versions":">=0.2.0, !=0.2.10"},{"fix":"Generate single-file references first using single_file_to_reference, then pass to MultiZarrToZarr.","message":"For multi-file concatenation, MultiZarrToZarr expects a list of reference dicts, not file paths. Passing file paths will raise cryptic errors.","severity":"gotcha","affected_versions":"all"},{"fix":"Use MultiZarrToZarr with appropriate options.","message":"The 'kerchunk.combine' module's 'concat' function is deprecated in favor of MultiZarrToZarr.","severity":"deprecated","affected_versions":"<0.2.0"}],"env_vars":null,"last_verified":"2026-05-01T00:00:00.000Z","next_check":"2026-07-30T00:00:00.000Z","problems":[{"fix":"Use fsspec.get_mapper('reference://', fo='ref.json') or wrap the dict in a ReferenceFileSystem. See quickstart.","cause":"Attempting to open a single-file reference dict directly with fsspec without using the correct mapper.","error":"KeyError: 'refs'"},{"fix":"Use `from kerchunk.combine import MultiZarrToZarr` and ensure kerchunk>=0.2.0.","cause":"Outdated kerchunk version (<0.2.0) or wrong import path. The combine module exists but is not imported by default from top-level.","error":"AttributeError: module 'kerchunk' has no attribute 'combine'"},{"fix":"Always specify `engine='zarr'` when opening reference datasets in xarray.","cause":"Opening a reference file with xarray without specifying engine='zarr' or using a non-Zarr engine.","error":"ValueError: unrecognized chunk manager: none"}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}