{"slug":"manchittlab/thecrawler","name":"TheCrawler","description":"Web scraper exposing 5 MCP tools â crawl, markdown extraction, search-and-crawl, sitemap parsing, and LLM JSON-schema structured extraction. AGPL-3.0.","category":"web-scraping","tags":[],"official":false,"stars":0,"transport":null,"install":null,"tools":[{"name":"crawl","description":"Crawl URLs and return rich page data including title, description, language, canonical URL, robots directives, full text, boilerplate-stripped markdown, links, images, meta tags, OG/Twitter Card, JSON-LD, microdata, commerce data, forms, analytics-detected, emails, phones, social links, hreflang, pagination, redirect chain, response headers, timing, and structured error information."},{"name":"extract","description":"Crawl URLs and perform LLM-powered structured extraction using a JSON schema or natural language prompt, returning parsed typed data per URL."},{"name":"diagnose","description":"Crawl URLs and score source readiness for a built-in extraction contract without an LLM call, returning per-URL verdict, readiness score, blockers, warnings, and a recommended next step."},{"name":"extract-contract","description":"Crawl URLs and perform LLM-powered extraction using a built-in contract schema (e.g., real-estate-listing, product-page), appending contract validation results including validation.valid and missingRequiredFields."}],"env_vars":["THECRAWLER_LLM_API_KEY","THECRAWLER_API_KEY"],"auth_type":"none","github":"https://github.com/manchittlab/TheCrawler","homepage":"","server_url":"","status":"active","source":"mcpservers.org","updated_at":"Thu May 28"}