{"library":"justext","type":"library","category":null,"description":"justext is a heuristic-based boilerplate removal tool for HTML documents. It extracts the main content from web pages, discarding navigation, advertisements, and other extraneous elements. The current version is 3.0.2, and it typically releases updates for bug fixes and compatibility issues.","language":"python","status":"active","version":"3.0.2","tags":["web scraping","html parsing","boilerplate removal","nlp","text extraction"],"last_verified":"Wed May 20","install":[{"cmd":"pip install justext","imports":["import justext"]}],"homepage":null,"github":"https://github.com/miso-belica/jusText","docs":null,"changelog":null,"pypi":"https://pypi.org/project/justext/","npm":null,"openapi_spec":null,"status_page":null,"smithery":null,"compatibility":{"summary":{"python_range":"3.10–3.9","success_rate":80,"avg_install_s":2.1,"avg_import_s":0.26,"wheel_type":"wheel"},"url":"https://checklist.day/v1/registry/justext/compatibility"}}