{"slug":"nameetp/pdfmux","name":"pdfmux","description":"Smart PDF-to-Markdown router that picks the best extractor per page, audits output quality, and re-extracts failures automatically. Confidence scoring, BYOK LLM support, RAG chunking.","category":"file-system","tags":[],"official":false,"stars":65,"transport":"stdio","install":[{"cmd":"npx -y pdfmux-mcp","imports":[]}],"tools":[{"name":"pdfmux convert","description":"Extract PDF to Markdown, JSON, or chunks with per-page confidence scoring, auto-routing each page to the best backend."},{"name":"pdfmux stream","description":"Stream pages as NDJSON as they finish, useful for long documents."},{"name":"pdfmux watch","description":"Watch a directory for new PDFs and auto-convert them."},{"name":"pdfmux estimate","description":"Predict cost before running extraction on a PDF."},{"name":"pdfmux diff","description":"Diff two extractions side-by-side."},{"name":"pdfmux doctor","description":"Pre-flight a directory to check which extras are needed for the batch."},{"name":"batch_extract","description":"Batch extract PDFs, yielding (path, result) tuples as each completes."},{"name":"extract_text","description":"Extract PDF to a Markdown string."},{"name":"extract_json","description":"Extract PDF to a locked schema dict."},{"name":"chunk","description":"Extract PDF into RAG-ready chunks with token limits."}],"env_vars":["GEMINI_API_KEY","ANTHROPIC_API_KEY"],"auth_type":"none","github":"https://github.com/NameetP/pdfmux","homepage":"","server_url":"","status":"active","source":"mcpservers.org","updated_at":"Thu May 28"}