{"library":"urlcanon","type":"library","category":null,"description":"urlcanon is a URL canonicalization and normalization library for Python and Java, currently at version 0.3.1. It provides a URL parser that preserves input bytes, a predefined set of canonicalization rules aiming to match browser parsing behavior, and an alternative URL serialization format called SSURT. The library is stable and in production use, though API and output stability are not yet guaranteed, and feature sets differ between its Python and Java implementations. It does not have a strict release cadence but updates as needed.","language":"python","status":"active","version":"0.3.1","tags":["url canonicalization","url normalization","web scraping","web archiving","url parsing"],"last_verified":"Mon May 25","install":[{"cmd":"pip install urlcanon","imports":["import urlcanon\nparsed_url = urlcanon.parse_url(input_url)","import urlcanon\nurlcanon.whatwg(parsed_url)","from urlcanon.parse import ParsedUrl","from urlcanon import MatchRule"]}],"homepage":null,"github":"https://github.com/iipc/urlcanon","docs":null,"changelog":null,"pypi":"https://pypi.org/project/urlcanon/","npm":null,"openapi_spec":null,"status_page":null,"smithery":null,"compatibility":{"summary":{"python_range":"3.10–3.9","success_rate":100,"avg_install_s":2.5,"avg_import_s":0.06,"wheel_type":"sdist"},"url":"https://checklist.day/v1/registry/urlcanon/compatibility"}}