{"library":"warc3-wet-clueweb09","type":"library","category":null,"description":"A Python library designed to efficiently parse and work with ARC and WARC files, specifically tailored with fixes and optimizations for ClueWeb09 WET (Web Extracted Text) files. It provides an interface to iterate over records within these compressed archives. The current version is 0.2.5, indicating a pre-1.0 status with potential for future API changes, and it's maintained on an as-needed basis.","language":"python","status":"active","version":"0.2.5","tags":["warc","wet","clueweb09","web archives","parsing","data extraction"],"last_verified":"Mon May 25","install":[{"cmd":"pip install warc3-wet-clueweb09","imports":["from warc3_wet_clueweb09 import Warc3Record"]}],"homepage":null,"github":"https://github.com/seanmacavaney/warc3-clueweb","docs":null,"changelog":null,"pypi":"https://pypi.org/project/warc3-wet-clueweb09/","npm":null,"openapi_spec":null,"status_page":null,"smithery":null,"compatibility":{"summary":{"python_range":"3.10–3.9","success_rate":100,"avg_install_s":2.4,"avg_import_s":null,"wheel_type":"sdist"},"url":"https://checklist.day/v1/registry/warc3-wet-clueweb09/compatibility"}}