{"id":21456,"library":"img2table","title":"img2table","description":"img2table is a table identification and extraction library for PDFs and images, based on OpenCV image processing. Current version: 1.4.2. Supports Python 3.9-3.13. Released on PyPI with moderate cadence.","status":"active","version":"1.4.2","language":"python","source_language":"en","source_url":"https://github.com/xavctn/img2table","tags":["table extraction","OCR","PDF","image processing","OpenCV"],"install":[{"cmd":"pip install img2table","lang":"bash","label":"Install base library"}],"dependencies":[],"imports":[{"note":"Wrong casing; class is PaddleOCR (capital O, C, R).","wrong":"from img2table.ocr import PaddleOcr","symbol":"OCR","correct":"from img2table.ocr import PaddleOCR, TesseractOCR"},{"note":"Image and PDF are classes inside img2table.document, not top-level.","wrong":"from img2table import Image","symbol":"Document","correct":"from img2table.document import Image, PDF"}],"quickstart":{"code":"import os\nfrom img2table.document import Image\nfrom img2table.ocr import PaddleOCR\n\n# Use environment variable for API key if needed\nocr = PaddleOCR(lang='en', api_key=os.environ.get('PADDLE_OCR_KEY', ''))\n\nimg = Image(src='table.png')\ntables = img.extract_tables(ocr=ocr)\nprint(tables)","lang":"python","description":"Extract tables from an image using PaddleOCR."},"warnings":[{"fix":"No action needed if using Document classes; only affects direct use of PDF library internals.","message":"In v1.4.0, the PDF backend was migrated from PyMuPDF/fitz to pypdfium2 for license compliance. Existing code expecting fitz will break.","severity":"breaking","affected_versions":">=1.4.0"},{"fix":"Switch to PaddleOCR or SuryaOCR via pip install paddleocr or pip install surya-ocr (separate).","message":"The old TesseractOCR class used Tesseract 4.x; future versions may remove support. Recommended to migrate to PaddleOCR or SuryaOCR.","severity":"deprecated","affected_versions":"<=1.4.2"},{"fix":"Create one OCR object and pass it to multiple extract_tables calls.","message":"OCR initialization is heavy; avoid recreating OCR instance per image in loops. Reuse the same OCR object for multiple documents.","severity":"gotcha","affected_versions":"all"}],"env_vars":null,"last_verified":"2026-04-27T00:00:00.000Z","next_check":"2026-07-26T00:00:00.000Z","problems":[{"fix":"pip install paddleocr","cause":"PaddleOCR is an extra dependency, not installed by default with img2table.","error":"ModuleNotFoundError: No module named 'paddleocr'"},{"fix":"Use: from img2table.ocr import PaddleOCR","cause":"Using wrong casing; correct class name is PaddleOCR (capital O, C, R).","error":"ImportError: cannot import name 'PaddleOCR' from 'img2table.ocr'"},{"fix":"Use: from img2table.document import Image","cause":"Incorrect import; Image class is not in top-level package.","error":"AttributeError: 'Image' object has no attribute 'extract_tables'"}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}