{"id":24222,"library":"pdfminer2","title":"pdfminer2","description":"A fork of PDFMiner for Python 3. Provides tools for extracting text, images, and metadata from PDF files. Version 20151206 is the last release; the project is largely superseded by pdfminer.six.","status":"deprecated","version":"20151206","language":"python","source_language":"en","source_url":"https://github.com/metachris/pdfminer","tags":["pdf","text extraction","python3","fork"],"install":[{"cmd":"pip install pdfminer2","lang":"bash","label":"Install from PyPI"}],"dependencies":[{"reason":"PDF encryption support","package":"pycryptodome","optional":true}],"imports":[{"note":"PDFParser is in pdfparser module, not pdfinterp.","wrong":"from pdfminer.pdfinterp import PDFParser","symbol":"PDFParser","correct":"from pdfminer.pdfparser import PDFParser"},{"note":"Module name is 'pdfinterp', not 'pdfinterpreter'.","wrong":"from pdfminer.pdfinterpreter import PDFPageInterpreter","symbol":"PDFPageInterpreter","correct":"from pdfminer.pdfinterp import PDFPageInterpreter"}],"quickstart":{"code":"from pdfminer.pdfparser import PDFParser\nfrom pdfminer.pdfdocument import PDFDocument\nfrom pdfminer.pdfpage import PDFPage\nfrom pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter\nfrom pdfminer.converter import TextConverter\nfrom pdfminer.layout import LAParams\nfrom io import StringIO\n\n# Open PDF file\nwith open('sample.pdf', 'rb') as fh:\n    parser = PDFParser(fh)\n    doc = PDFDocument(parser)\n    rsrcmgr = PDFResourceManager()\n    retstr = StringIO()\n    laparams = LAParams()\n    device = TextConverter(rsrcmgr, retstr, laparams=laparams)\n    interpreter = PDFPageInterpreter(rsrcmgr, device)\n    for page in PDFPage.create_pages(doc):\n        interpreter.process_page(page)\n    text = retstr.getvalue()\n    device.close()\n    retstr.close()\n    print(text)","lang":"python","description":"Extract text from a PDF file using pdfminer2."},"warnings":[{"fix":"Replace pdfminer2 with pdfminer.six (pip install pdfminer.six) and update imports to pdfminer.high_level.","message":"pdfminer2 is deprecated. Use pdfminer.six for active maintenance and Python 3 support.","severity":"deprecated","affected_versions":"all"},{"fix":"If migrating from pdfminer2 to pdfminer.six, use 'from pdfminer.high_level import extract_text' for simpler extraction.","message":"Import paths differ between pdfminer2 and newer forks like pdfminer.six. Code that works on one may break on the other.","severity":"breaking","affected_versions":"all"},{"fix":"Wrap doc.initialize(password=...) in try-except or provide the correct password.","message":"The PDFDocument constructor may not validate the password argument; calling doc.initialize() can raise TypeError if password is wrong.","severity":"gotcha","affected_versions":"all"}],"env_vars":null,"last_verified":"2026-05-01T00:00:00.000Z","next_check":"2026-07-30T00:00:00.000Z","problems":[{"fix":"Remove the call to .initialize(). Instantiate PDFDocument(parser) directly.","cause":"The PDFDocument class does not have an 'initialize' method; the constructor handles initialization directly.","error":"AttributeError: 'PDFDocument' object has no attribute 'initialize'"},{"fix":"Install with 'pip install pdfminer2' and use correct imports as shown in quickstart.","cause":"pdfminer2 is not installed or the import path is wrong.","error":"ModuleNotFoundError: No module named 'pdfminer'"}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}