{"library":"trafilatura","type":"library","category":null,"description":"Trafilatura is a Python and command-line tool designed for gathering text and metadata from the web. It specializes in crawling, scraping, and extracting main content from web pages, supporting various output formats like CSV, JSON, HTML, Markdown, TXT, and XML. The library is actively maintained with frequent releases, offering robust extraction, navigation, and deduplication features.","language":"python","status":"active","version":"2.0.0","tags":["web scraping","text extraction","web crawling","metadata","NLP"],"last_verified":"Wed May 20","install":[{"cmd":"pip install trafilatura","imports":["from trafilatura import fetch_url","from trafilatura import extract","from trafilatura import bare_extraction","from trafilatura.settings import Document"]}],"homepage":"https://trafilatura.readthedocs.io","github":"https://github.com/adbar/trafilatura","docs":null,"changelog":null,"pypi":"https://pypi.org/project/trafilatura/","npm":null,"openapi_spec":null,"status_page":null,"smithery":null,"compatibility":{"summary":{"python_range":"3.10–3.9","success_rate":80,"avg_install_s":4.6,"avg_import_s":2.6,"wheel_type":"wheel"},"url":"https://checklist.day/v1/registry/trafilatura/compatibility"}}