{"library":"datasketch","title":"datasketch","type":"library","description":"datasketch is a Python library that provides probabilistic data structures for efficient similarity search and approximate nearest neighbor (ANN) computations on very large datasets. It currently stands at version 1.9.0 and maintains an active release cadence, with updates addressing features, fixes, and dependency management.","language":"python","status":"active","last_verified":"Wed May 20","install":{"commands":["pip install datasketch","pip install datasketch[redis]","pip install datasketch[hnsw]"],"cli":null},"imports":["from datasketch import MinHash","from datasketch import MinHashLSH","from datasketch import MinHashLSHForest","from datasketch import WeightedMinHashGenerator","from datasketch import bBitMinHash","from datasketch import MinHashLSHDeletionSession"],"auth":{"required":false,"env_vars":[]},"links":{"homepage":"https://ekzhu.github.io/datasketch","github":"https://github.com/ekzhu/datasketch","docs":"https://ekzhu.github.io/datasketch","changelog":null,"pypi":"https://pypi.org/project/datasketch/","npm":null,"openapi_spec":null,"status_page":null,"smithery":null},"quickstart":{"code":"from datasketch import MinHash, MinHashLSH\n\n# Create MinHash objects for two sets\ns1 = {\"minhash\", \"is\", \"a\", \"probabilistic\", \"data\", \"structure\", \"for\", \"estimating\", \"similarity\", \"between\", \"sets\"}\ns2 = {\"minhash\", \"is\", \"a\", \"data\", \"structure\", \"for\", \"estimating\", \"similarity\", \"between\", \"documents\"}\ns3 = {\"today\", \"is\", \"a\", \"beautiful\", \"day\"}\n\nm1 = MinHash(num_perm=128)\nm2 = MinHash(num_perm=128)\nm3 = MinHash(num_perm=128)\n\nfor d in s1:\n    m1.update(d.encode('utf8'))\nfor d in s2:\n    m2.update(d.encode('utf8'))\nfor d in s3:\n    m3.update(d.encode('utf8'))\n\n# Create an LSH index with a threshold\nlsh = MinHashLSH(threshold=0.5, num_perm=128)\nlsh.insert(\"m1\", m1)\nlsh.insert(\"m2\", m2)\nlsh.insert(\"m3\", m3)\n\n# Query the LSH for candidates similar to m2\nprint(f\"Candidate keys for m2: {lsh.query(m2)}\")","lang":"python","description":"This quickstart demonstrates the core functionality of datasketch: creating MinHash objects from sets and using MinHashLSH to find approximate nearest neighbors. The example initializes three MinHash objects from different text sets, inserts them into an LSH index, and then queries the index to find items similar to `m2`.","tag":null,"tag_description":null,"last_tested":"2026-04-24","results":[{"runtime":"python:3.10-alpine","exit_code":0},{"runtime":"python:3.10-slim","exit_code":0},{"runtime":"python:3.11-alpine","exit_code":0},{"runtime":"python:3.11-slim","exit_code":0},{"runtime":"python:3.12-alpine","exit_code":0},{"runtime":"python:3.12-slim","exit_code":0},{"runtime":"python:3.13-alpine","exit_code":0},{"runtime":"python:3.13-slim","exit_code":0},{"runtime":"python:3.9-alpine","exit_code":0},{"runtime":"python:3.9-slim","exit_code":0}]},"compatibility":{"tag":"verified","tag_description":"installs cleanly on critical runtimes, fast import, recently tested","last_tested":"2026-05-20","installed_version":"1.10.0","pypi_latest":"1.10.0","is_stale":false,"summary":{"python_range":"3.10–3.9","success_rate":100,"avg_install_s":7.3,"avg_import_s":1.83,"wheel_type":"wheel"},"results":[{"runtime":"python:3.10-alpine","python_version":"3.10","os_libc":"alpine (musl)","variant":"datasketch","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":null,"import_time_s":1.36,"mem_mb":29.1,"disk_size":"231.4M"},{"runtime":"python:3.10-alpine","python_version":"3.10","os_libc":"alpine (musl)","variant":"datasketch","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":1.32,"mem_mb":27.4,"disk_size":"231.3M"},{"runtime":"python:3.10-alpine","python_version":"3.10","os_libc":"alpine (musl)","variant":"hnsw","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":null,"import_time_s":1.31,"mem_mb":29.1,"disk_size":"231.4M"},{"runtime":"python:3.10-alpine","python_version":"3.10","os_libc":"alpine (musl)","variant":"hnsw","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":1.21,"mem_mb":27.4,"disk_size":"231.3M"},{"runtime":"python:3.10-alpine","python_version":"3.10","os_libc":"alpine (musl)","variant":"redis","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":null,"import_time_s":1.5,"mem_mb":32.6,"disk_size":"235.2M"},{"runtime":"python:3.10-alpine","python_version":"3.10","os_libc":"alpine (musl)","variant":"redis","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":1.34,"mem_mb":32.4,"disk_size":"235.2M"},{"runtime":"python:3.10-slim","python_version":"3.10","os_libc":"slim (glibc)","variant":"datasketch","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":6.9,"import_time_s":1.03,"mem_mb":29.1,"disk_size":"222M"},{"runtime":"python:3.10-slim","python_version":"3.10","os_libc":"slim (glibc)","variant":"datasketch","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":3.68,"mem_mb":27.4,"disk_size":"222M"},{"runtime":"python:3.10-slim","python_version":"3.10","os_libc":"slim (glibc)","variant":"hnsw","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":7.2,"import_time_s":1.04,"mem_mb":29.1,"disk_size":"222M"},{"runtime":"python:3.10-slim","python_version":"3.10","os_libc":"slim (glibc)","variant":"hnsw","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":0.93,"mem_mb":27.4,"disk_size":"222M"},{"runtime":"python:3.10-slim","python_version":"3.10","os_libc":"slim (glibc)","variant":"redis","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":7.2,"import_time_s":1.15,"mem_mb":32.6,"disk_size":"226M"},{"runtime":"python:3.10-slim","python_version":"3.10","os_libc":"slim (glibc)","variant":"redis","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":1.03,"mem_mb":32.4,"disk_size":"226M"},{"runtime":"python:3.11-alpine","python_version":"3.11","os_libc":"alpine (musl)","variant":"datasketch","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":null,"import_time_s":2.08,"mem_mb":35.1,"disk_size":"246.6M"},{"runtime":"python:3.11-alpine","python_version":"3.11","os_libc":"alpine (musl)","variant":"datasketch","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":2.18,"mem_mb":33.2,"disk_size":"246.6M"},{"runtime":"python:3.11-alpine","python_version":"3.11","os_libc":"alpine (musl)","variant":"hnsw","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":null,"import_time_s":2.05,"mem_mb":35.1,"disk_size":"246.6M"},{"runtime":"python:3.11-alpine","python_version":"3.11","os_libc":"alpine (musl)","variant":"hnsw","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":2.13,"mem_mb":33.2,"disk_size":"246.6M"},{"runtime":"python:3.11-alpine","python_version":"3.11","os_libc":"alpine (musl)","variant":"redis","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":null,"import_time_s":2.2,"mem_mb":39.2,"disk_size":"251.2M"},{"runtime":"python:3.11-alpine","python_version":"3.11","os_libc":"alpine (musl)","variant":"redis","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":2.5,"mem_mb":39,"disk_size":"251.1M"},{"runtime":"python:3.11-slim","python_version":"3.11","os_libc":"slim (glibc)","variant":"datasketch","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":6.9,"import_time_s":1.88,"mem_mb":35.1,"disk_size":"236M"},{"runtime":"python:3.11-slim","python_version":"3.11","os_libc":"slim (glibc)","variant":"datasketch","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":3.13,"mem_mb":33.2,"disk_size":"236M"},{"runtime":"python:3.11-slim","python_version":"3.11","os_libc":"slim (glibc)","variant":"hnsw","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":6.9,"import_time_s":1.86,"mem_mb":35.1,"disk_size":"236M"},{"runtime":"python:3.11-slim","python_version":"3.11","os_libc":"slim (glibc)","variant":"hnsw","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":1.79,"mem_mb":33.2,"disk_size":"236M"},{"runtime":"python:3.11-slim","python_version":"3.11","os_libc":"slim (glibc)","variant":"redis","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":7.3,"import_time_s":2.04,"mem_mb":39.2,"disk_size":"240M"},{"runtime":"python:3.11-slim","python_version":"3.11","os_libc":"slim (glibc)","variant":"redis","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":2.19,"mem_mb":39,"disk_size":"240M"},{"runtime":"python:3.12-alpine","python_version":"3.12","os_libc":"alpine (musl)","variant":"datasketch","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":null,"import_time_s":2.23,"mem_mb":34.5,"disk_size":"232.6M"},{"runtime":"python:3.12-alpine","python_version":"3.12","os_libc":"alpine (musl)","variant":"datasketch","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":2.12,"mem_mb":32.4,"disk_size":"232.6M"},{"runtime":"python:3.12-alpine","python_version":"3.12","os_libc":"alpine (musl)","variant":"hnsw","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":null,"import_time_s":2.22,"mem_mb":34.5,"disk_size":"232.6M"},{"runtime":"python:3.12-alpine","python_version":"3.12","os_libc":"alpine (musl)","variant":"hnsw","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":2.07,"mem_mb":32.4,"disk_size":"232.6M"},{"runtime":"python:3.12-alpine","python_version":"3.12","os_libc":"alpine (musl)","variant":"redis","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":null,"import_time_s":2.42,"mem_mb":38.4,"disk_size":"237.0M"},{"runtime":"python:3.12-alpine","python_version":"3.12","os_libc":"alpine (musl)","variant":"redis","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":2.53,"mem_mb":38.2,"disk_size":"237.0M"},{"runtime":"python:3.12-slim","python_version":"3.12","os_libc":"slim (glibc)","variant":"datasketch","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":7.1,"import_time_s":2.24,"mem_mb":34.5,"disk_size":"222M"},{"runtime":"python:3.12-slim","python_version":"3.12","os_libc":"slim (glibc)","variant":"datasketch","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":2.06,"mem_mb":32.4,"disk_size":"222M"},{"runtime":"python:3.12-slim","python_version":"3.12","os_libc":"slim (glibc)","variant":"hnsw","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":7.3,"import_time_s":2.33,"mem_mb":34.5,"disk_size":"222M"},{"runtime":"python:3.12-slim","python_version":"3.12","os_libc":"slim (glibc)","variant":"hnsw","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":2.15,"mem_mb":32.4,"disk_size":"222M"},{"runtime":"python:3.12-slim","python_version":"3.12","os_libc":"slim (glibc)","variant":"redis","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":7.5,"import_time_s":2.42,"mem_mb":38.4,"disk_size":"226M"},{"runtime":"python:3.12-slim","python_version":"3.12","os_libc":"slim (glibc)","variant":"redis","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":2.58,"mem_mb":38.2,"disk_size":"226M"},{"runtime":"python:3.13-alpine","python_version":"3.13","os_libc":"alpine (musl)","variant":"datasketch","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":null,"import_time_s":1.94,"mem_mb":34.6,"disk_size":"231.4M"},{"runtime":"python:3.13-alpine","python_version":"3.13","os_libc":"alpine (musl)","variant":"datasketch","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":2.04,"mem_mb":32.2,"disk_size":"231.3M"},{"runtime":"python:3.13-alpine","python_version":"3.13","os_libc":"alpine (musl)","variant":"hnsw","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":null,"import_time_s":1.96,"mem_mb":34.6,"disk_size":"231.4M"},{"runtime":"python:3.13-alpine","python_version":"3.13","os_libc":"alpine (musl)","variant":"hnsw","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":1.91,"mem_mb":32.2,"disk_size":"231.3M"},{"runtime":"python:3.13-alpine","python_version":"3.13","os_libc":"alpine (musl)","variant":"redis","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":null,"import_time_s":2.12,"mem_mb":39.5,"disk_size":"235.8M"},{"runtime":"python:3.13-alpine","python_version":"3.13","os_libc":"alpine (musl)","variant":"redis","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":2.33,"mem_mb":39.3,"disk_size":"235.6M"},{"runtime":"python:3.13-slim","python_version":"3.13","os_libc":"slim (glibc)","variant":"datasketch","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":7.2,"import_time_s":2.08,"mem_mb":34.6,"disk_size":"220M"},{"runtime":"python:3.13-slim","python_version":"3.13","os_libc":"slim (glibc)","variant":"datasketch","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":2.17,"mem_mb":32.2,"disk_size":"220M"},{"runtime":"python:3.13-slim","python_version":"3.13","os_libc":"slim (glibc)","variant":"hnsw","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":7.2,"import_time_s":1.98,"mem_mb":34.6,"disk_size":"220M"},{"runtime":"python:3.13-slim","python_version":"3.13","os_libc":"slim (glibc)","variant":"hnsw","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":1.97,"mem_mb":32.2,"disk_size":"220M"},{"runtime":"python:3.13-slim","python_version":"3.13","os_libc":"slim (glibc)","variant":"redis","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":7.5,"import_time_s":2.15,"mem_mb":39.5,"disk_size":"225M"},{"runtime":"python:3.13-slim","python_version":"3.13","os_libc":"slim (glibc)","variant":"redis","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":2.6,"mem_mb":39.3,"disk_size":"225M"},{"runtime":"python:3.9-alpine","python_version":"3.9","os_libc":"alpine (musl)","variant":"datasketch","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":null,"import_time_s":1.12,"mem_mb":26.4,"disk_size":"233.6M"},{"runtime":"python:3.9-alpine","python_version":"3.9","os_libc":"alpine (musl)","variant":"datasketch","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":1.02,"mem_mb":24.7,"disk_size":"233.6M"},{"runtime":"python:3.9-alpine","python_version":"3.9","os_libc":"alpine (musl)","variant":"hnsw","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":null,"import_time_s":1.08,"mem_mb":26.4,"disk_size":"233.6M"},{"runtime":"python:3.9-alpine","python_version":"3.9","os_libc":"alpine (musl)","variant":"hnsw","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":0.97,"mem_mb":24.7,"disk_size":"233.6M"},{"runtime":"python:3.9-alpine","python_version":"3.9","os_libc":"alpine (musl)","variant":"redis","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":null,"import_time_s":1.23,"mem_mb":29.6,"disk_size":"236.8M"},{"runtime":"python:3.9-alpine","python_version":"3.9","os_libc":"alpine (musl)","variant":"redis","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":1.08,"mem_mb":29.5,"disk_size":"236.8M"},{"runtime":"python:3.9-slim","python_version":"3.9","os_libc":"slim (glibc)","variant":"datasketch","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":8,"import_time_s":1.06,"mem_mb":26.4,"disk_size":"230M"},{"runtime":"python:3.9-slim","python_version":"3.9","os_libc":"slim (glibc)","variant":"datasketch","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":2.4,"mem_mb":24.7,"disk_size":"230M"},{"runtime":"python:3.9-slim","python_version":"3.9","os_libc":"slim (glibc)","variant":"hnsw","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":7.8,"import_time_s":1.02,"mem_mb":26.4,"disk_size":"230M"},{"runtime":"python:3.9-slim","python_version":"3.9","os_libc":"slim (glibc)","variant":"hnsw","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":0.9,"mem_mb":24.7,"disk_size":"230M"},{"runtime":"python:3.9-slim","python_version":"3.9","os_libc":"slim (glibc)","variant":"redis","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":8.2,"import_time_s":1.18,"mem_mb":29.6,"disk_size":"233M"},{"runtime":"python:3.9-slim","python_version":"3.9","os_libc":"slim (glibc)","variant":"redis","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":1.09,"mem_mb":29.5,"disk_size":"233M"}]}}