{"library":"html-text","type":"library","category":null,"description":"html-text is a Python library designed to extract clean, readable plain text from HTML content. It goes beyond simple text extraction by removing invisible non-text content like inline styles, JavaScript, and comments. The library intelligently normalizes whitespace and can optionally add newlines after block-level elements (e.g., headers, paragraphs) to produce text that more closely resembles browser rendering, making it suitable for text classification or further natural language processing. The current version is 0.7.1, and it maintains an active development status.","language":"python","status":"active","version":"0.7.1","tags":["html","text extraction","web scraping","cleaning","lxml","nlp"],"last_verified":"Thu May 21","install":[{"cmd":"pip install html-text","imports":["from html_text import extract_text","from html_text import parse_html","from html_text import cleaner","from html_text import etree_to_text","from html_text import cleaned_selector"]}],"homepage":null,"github":"https://github.com/zytedata/html-text","docs":null,"changelog":null,"pypi":"https://pypi.org/project/html-text/","npm":null,"openapi_spec":null,"status_page":null,"smithery":null,"compatibility":{"summary":{"python_range":"3.10–3.9","success_rate":100,"avg_install_s":2.2,"avg_import_s":0.11,"wheel_type":"wheel"},"url":"https://checklist.day/v1/registry/html-text/compatibility"}}