{"id":23715,"library":"fastcdc","title":"FastCDC","description":"FastCDC is a pure Python implementation of the Fast Content-Defined Chunking (CDC) algorithm, providing variable-size chunking based on content similarity. Current version is 1.7.0, supporting Python 3.7+.","status":"active","version":"1.7.0","language":"python","source_language":"en","source_url":"https://github.com/iscc/fastcdc-py","tags":["chunking","content-defined-chunking","deduplication","cdc"],"install":[{"cmd":"pip install fastcdc","lang":"bash","label":"Install from PyPI"}],"dependencies":[],"imports":[{"note":"Direct import of FastCDC class; using 'import fastcdc' requires 'fastcdc.FastCDC'.","wrong":"import fastcdc","symbol":"FastCDC","correct":"from fastcdc import FastCDC"}],"quickstart":{"code":"from fastcdc import FastCDC\n\n# Create a FastCDC instance with desired average chunk size\ncdc = FastCDC(avg_size=4096)  # 4KB average chunk size\n\n# Chunk a bytes object\ndata = b\"\"\"Some large binary data repeated many times to demonstrate chunking.\"\"\" * 100\nchunks = list(cdc.chunk(data))\n\nprint(f\"Number of chunks: {len(chunks)}\")\nfor offset, length, chunk_hash in chunks[:3]:\n    print(f\"Offset: {offset}, Length: {length}, Hash: {chunk_hash.hex()}\")","lang":"python","description":"Demonstrates basic usage: instantiate FastCDC with average chunk size, then chunk bytes."},"warnings":[{"fix":"Use data[offset:offset+length] to get chunk bytes after receiving offset and length.","message":"The chunk() method returns an iterator of tuples (offset, length, hash). The chunk data must be fetched separately from the original bytes via data[offset:offset+length].","severity":"gotcha","affected_versions":"all"},{"fix":"For stronger hashing, pass a custom hash function to FastCDC or re-hash the chunk bytes with SHA-256.","message":"The hash object is a bytes object of length 8 (64-bit) by default. This may not be suitable for deduplication at scale; consider using a custom hash function or additional hashing.","severity":"gotcha","affected_versions":"all"}],"env_vars":null,"last_verified":"2026-05-01T00:00:00.000Z","next_check":"2026-07-30T00:00:00.000Z","problems":[{"fix":"Run 'pip install fastcdc'.","cause":"fastcdc is not installed.","error":"ModuleNotFoundError: No module named 'fastcdc'"},{"fix":"Use 'cdc.chunk(data)' instead of 'cdc(data)'.","cause":"Trying to call the class instance directly instead of using .chunk() method.","error":"TypeError: 'FastCDC' object is not callable"}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}