{"library":"splink","title":"Splink","type":"library","description":"Splink is a Python package for fast, accurate, and scalable probabilistic record linkage (entity resolution). It enables users to deduplicate and link records from datasets that lack unique identifiers, leveraging unsupervised learning based on the Fellegi-Sunter model. Splink supports various SQL backends like DuckDB, Apache Spark, and AWS Athena, allowing it to scale to datasets of 100 million records or more, and provides a suite of interactive visualizations for model understanding and diagnostics.","language":"python","status":"active","last_verified":"Sat May 16","install":{"commands":["pip install splink","pip install 'splink[spark]'","pip install 'splink[athena]'","pip install 'splink[postgres]'"],"cli":null},"imports":["from splink import Linker","from splink import SettingsCreator","from splink import block_on","from splink import DuckDBAPI","from splink import splink_datasets","import splink.comparison_library as cl"],"auth":{"required":false,"env_vars":[]},"links":{"homepage":"https://moj-analytical-services.github.io/splink/","github":"https://github.com/moj-analytical-services/splink","docs":null,"changelog":null,"pypi":"https://pypi.org/project/splink/","npm":null,"openapi_spec":null,"status_page":null,"smithery":null},"quickstart":{"code":"import splink.comparison_library as cl\nfrom splink import DuckDBAPI, Linker, SettingsCreator, block_on, splink_datasets\n\ndb_api = DuckDBAPI()\ndf = splink_datasets.fake_1000\n\nsettings = SettingsCreator(\n    link_type=\"dedupe_only\",\n    comparisons=[\n        cl.NameComparison(\"first_name\"),\n        cl.JaroAtThresholds(\"surname\"),\n        cl.DateOfBirthComparison(\"dob\", input_is_string=True),\n        cl.ExactMatch(\"city\").configure(term_frequency_adjustments=True),\n        cl.EmailComparison(\"email\"),\n    ],\n    blocking_rules_to_generate_predictions=[\n        block_on(\"first_name\", \"dob\"),\n        block_on(\"surname\"),\n    ]\n)\n\nlinker = Linker(df, settings, db_api)\n\nlinker.training.estimate_probability_two_random_records_match(\n    [block_on(\"first_name\", \"surname\")], recall=0.7\n)\nlinker.training.estimate_u_using_random_sampling(max_pairs=1e6)\nlinker.training.estimate_parameters_using_expectation_maximisation(\n    block_on(\"first_name\", \"surname\")\n)\n\n# To get the results, e.g., predictions_df = linker.inference.predict()","lang":"python","description":"This quickstart demonstrates how to set up a basic Splink deduplication model using DuckDB. It covers defining comparison libraries and blocking rules, estimating parameters for record linkage, and preparing for prediction. It uses a built-in `fake_1000` dataset for convenience.","tag":null,"tag_description":null,"last_tested":null,"results":[]},"compatibility":{"tag":null,"tag_description":null,"last_tested":"2026-05-16","installed_version":"4.0.16","pypi_latest":"4.0.16","is_stale":false,"summary":{"python_range":"3.10–3.9","success_rate":50,"avg_install_s":20.2,"avg_import_s":1.48,"wheel_type":"wheel"},"results":[{"runtime":"python:3.10-alpine","python_version":"3.10","os_libc":"alpine (musl)","variant":"athena","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.10-alpine","python_version":"3.10","os_libc":"alpine (musl)","variant":"postgres","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.10-alpine","python_version":"3.10","os_libc":"alpine (musl)","variant":"spark","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.10-alpine","python_version":"3.10","os_libc":"alpine (musl)","variant":"splink","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.10-slim","python_version":"3.10","os_libc":"slim (glibc)","variant":"athena","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":16.3,"import_time_s":1.23,"mem_mb":41.5,"disk_size":"449M"},{"runtime":"python:3.10-slim","python_version":"3.10","os_libc":"slim (glibc)","variant":"postgres","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":13.7,"import_time_s":1,"mem_mb":38.5,"disk_size":"297M"},{"runtime":"python:3.10-slim","python_version":"3.10","os_libc":"slim (glibc)","variant":"spark","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"clean","install_time_s":41.6,"import_time_s":1.13,"mem_mb":38.5,"disk_size":"751M"},{"runtime":"python:3.10-slim","python_version":"3.10","os_libc":"slim (glibc)","variant":"splink","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":11.4,"import_time_s":1,"mem_mb":38.5,"disk_size":"263M"},{"runtime":"python:3.11-alpine","python_version":"3.11","os_libc":"alpine (musl)","variant":"athena","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.11-alpine","python_version":"3.11","os_libc":"alpine (musl)","variant":"postgres","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.11-alpine","python_version":"3.11","os_libc":"alpine (musl)","variant":"spark","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.11-alpine","python_version":"3.11","os_libc":"alpine (musl)","variant":"splink","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.11-slim","python_version":"3.11","os_libc":"slim (glibc)","variant":"athena","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":14.8,"import_time_s":1.85,"mem_mb":48,"disk_size":"470M"},{"runtime":"python:3.11-slim","python_version":"3.11","os_libc":"slim (glibc)","variant":"postgres","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":12.5,"import_time_s":1.71,"mem_mb":44.8,"disk_size":"317M"},{"runtime":"python:3.11-slim","python_version":"3.11","os_libc":"slim (glibc)","variant":"spark","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"clean","install_time_s":38.1,"import_time_s":1.66,"mem_mb":44.8,"disk_size":"771M"},{"runtime":"python:3.11-slim","python_version":"3.11","os_libc":"slim (glibc)","variant":"splink","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":10.7,"import_time_s":1.65,"mem_mb":44.8,"disk_size":"280M"},{"runtime":"python:3.12-alpine","python_version":"3.12","os_libc":"alpine (musl)","variant":"athena","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.12-alpine","python_version":"3.12","os_libc":"alpine (musl)","variant":"postgres","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.12-alpine","python_version":"3.12","os_libc":"alpine (musl)","variant":"spark","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.12-alpine","python_version":"3.12","os_libc":"alpine (musl)","variant":"splink","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.12-slim","python_version":"3.12","os_libc":"slim (glibc)","variant":"athena","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":15,"import_time_s":1.85,"mem_mb":46.9,"disk_size":"462M"},{"runtime":"python:3.12-slim","python_version":"3.12","os_libc":"slim (glibc)","variant":"postgres","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":11.7,"import_time_s":1.69,"mem_mb":43.7,"disk_size":"301M"},{"runtime":"python:3.12-slim","python_version":"3.12","os_libc":"slim (glibc)","variant":"spark","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"clean","install_time_s":39.4,"import_time_s":1.66,"mem_mb":43.7,"disk_size":"753M"},{"runtime":"python:3.12-slim","python_version":"3.12","os_libc":"slim (glibc)","variant":"splink","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":10.4,"import_time_s":1.59,"mem_mb":43.7,"disk_size":"264M"},{"runtime":"python:3.13-alpine","python_version":"3.13","os_libc":"alpine (musl)","variant":"athena","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.13-alpine","python_version":"3.13","os_libc":"alpine (musl)","variant":"postgres","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.13-alpine","python_version":"3.13","os_libc":"alpine (musl)","variant":"spark","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.13-alpine","python_version":"3.13","os_libc":"alpine (musl)","variant":"splink","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.13-slim","python_version":"3.13","os_libc":"slim (glibc)","variant":"athena","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":15,"import_time_s":1.88,"mem_mb":48,"disk_size":"461M"},{"runtime":"python:3.13-slim","python_version":"3.13","os_libc":"slim (glibc)","variant":"postgres","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":12.5,"import_time_s":1.46,"mem_mb":44.8,"disk_size":"299M"},{"runtime":"python:3.13-slim","python_version":"3.13","os_libc":"slim (glibc)","variant":"spark","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"clean","install_time_s":37.9,"import_time_s":1.6,"mem_mb":44.8,"disk_size":"751M"},{"runtime":"python:3.13-slim","python_version":"3.13","os_libc":"slim (glibc)","variant":"splink","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"clean","install_time_s":10.9,"import_time_s":1.51,"mem_mb":44.8,"disk_size":"263M"},{"runtime":"python:3.9-alpine","python_version":"3.9","os_libc":"alpine (musl)","variant":"athena","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.9-alpine","python_version":"3.9","os_libc":"alpine (musl)","variant":"postgres","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.9-alpine","python_version":"3.9","os_libc":"alpine (musl)","variant":"spark","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.9-alpine","python_version":"3.9","os_libc":"alpine (musl)","variant":"splink","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.9-slim","python_version":"3.9","os_libc":"slim (glibc)","variant":"athena","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"noisy","install_time_s":18.9,"import_time_s":1.47,"mem_mb":42.2,"disk_size":"444M"},{"runtime":"python:3.9-slim","python_version":"3.9","os_libc":"slim (glibc)","variant":"postgres","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"noisy","install_time_s":15.9,"import_time_s":1.19,"mem_mb":39.4,"disk_size":"304M"},{"runtime":"python:3.9-slim","python_version":"3.9","os_libc":"slim (glibc)","variant":"spark","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"noisy","install_time_s":43.3,"import_time_s":1.24,"mem_mb":39.4,"disk_size":"737M"},{"runtime":"python:3.9-slim","python_version":"3.9","os_libc":"slim (glibc)","variant":"splink","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"noisy","install_time_s":13.3,"import_time_s":1.3,"mem_mb":39.4,"disk_size":"271M"}]}}