{"id":23824,"library":"grobid-client-python","title":"GROBID client Python","description":"Simple Python client for GROBID REST services. Current version: 0.1.4. Release cadence: irregular, with several fixes in 2024-2025.","status":"active","version":"0.1.4","language":"python","source_language":"en","source_url":"https://github.com/kermitt2/grobid_client_python","tags":["grobid","pdf","text-mining","nlp","scientific-papers"],"install":[{"cmd":"pip install grobid-client-python","lang":"bash","label":"Install from PyPI"}],"dependencies":[],"imports":[{"note":"GrobidClient is in grobid_client module, not top-level package","wrong":"from grobid_client import GrobidClient","symbol":"GrobidClient","correct":"from grobid_client.grobid_client import GrobidClient"},{"note":"Importing grobid_client alone does not expose GrobidClient; must import explicitly","wrong":"import grobid_client","symbol":"GrobidClient","correct":"from grobid_client import GrobidClient"}],"quickstart":{"code":"from grobid_client.grobid_client import GrobidClient\n\nclient = GrobidClient(config_path=None, grobid_server='http://localhost:8070')\nclient.process(\"processFulltextDocument\", \"input.pdf\", output=\"output/\")\nprint(\"Done\")","lang":"python","description":"Initialize client with default config and process a PDF."},"warnings":[{"fix":"Ensure GROBID server is running at the configured grobid_server URL (default: http://localhost:8070).","message":"The client expects a running GROBID server at the specified URL. Without it, all API calls will raise ConnectionError.","severity":"gotcha","affected_versions":"all"},{"fix":"Use generateIDs=True and specify output format parameters like output_format='tei' if needed.","message":"Version 0.1.0 changed the default output format to JSON and Markdown. The process() method now returns dicts and writes files differently. Old code expecting raw TEI XML may break.","severity":"breaking","affected_versions":">=0.1.0"},{"fix":"Adjust batch_size parameter in client initialization (e.g., GrobidClient(batch_size=100) for larger throughput, but test for stability).","message":"The batch size default changed to 10 in v0.0.17 to avoid unexpected behaviors. Large batches may cause server timeouts.","severity":"gotcha","affected_versions":">=0.0.17"},{"fix":"Consider using threading or asyncio wrappers if concurrent processing is needed.","message":"The client uses synchronous requests. Processing many PDFs can block the calling thread for a long time.","severity":"gotcha","affected_versions":"all"}],"env_vars":null,"last_verified":"2026-05-01T00:00:00.000Z","next_check":"2026-07-30T00:00:00.000Z","problems":[{"fix":"Install with 'pip install grobid-client-python', then import correctly: from grobid_client.grobid_client import GrobidClient","cause":"The package is installed as 'grobid-client-python', but the import path uses underscore. Mistaking package name for import name.","error":"ModuleNotFoundError: No module named 'grobid_client'"},{"fix":"Use: from grobid_client.grobid_client import GrobidClient","cause":"Trying to import GrobidClient from the top-level grobid_client package without specifying the submodule.","error":"AttributeError: module 'grobid_client' has no attribute 'GrobidClient'"},{"fix":"Start GROBID server or provide a different grobid_server URL pointing to a running instance.","cause":"No GROBID server running at the default URL.","error":"ConnectionError: HTTPConnectionPool(host='localhost', port=8070): Max retries exceeded"},{"fix":"Ensure input exists and is a valid PDF or TEI XML file, or include a directory containing such files.","cause":"Passing a non-existent path or invalid file type (not PDF or XML).","error":"ValueError: The 'input' parameter must be a file path or a directory."}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}