{"id":24089,"library":"mwxml","title":"mwxml","description":"A set of utilities for processing MediaWiki XML dump data. Currently at version 0.3.8, with irregular releases.","status":"active","version":"0.3.8","language":"python","source_language":"en","source_url":"https://github.com/mediawiki-utilities/python-mwxml","tags":["mediawiki","xml","dump","parsing"],"install":[{"cmd":"pip install mwxml","lang":"bash","label":"Install from PyPI"}],"dependencies":[{"reason":"Used for parsing wiki text within dump processing.","package":"mwparserfromhell","optional":true}],"imports":[{"note":"Dump is accessed via mwxml.Dump after importing the module.","wrong":"","symbol":"Dump","correct":"import mwxml"},{"note":"Page is only available as mwxml.Page. Some users mistakenly try mwxml.dump.Page.","wrong":"","symbol":"Page","correct":"from mwxml import Page"}],"quickstart":{"code":"import mwxml\n\ndump = mwxml.Dump.from_file(open('example.xml', 'rb'))\nfor page in dump.pages:\n    print(page.title)\n    for revision in page.revisions:\n        print(revision.text)","lang":"python","description":"Opens a MediaWiki XML dump file and iterates over pages and revisions."},"warnings":[{"fix":"Always open the dump file with open('file.xml', 'rb').","message":"The Dump.from_file expects a file opened in binary mode ('rb'), not text mode.","severity":"gotcha","affected_versions":"0.3.x"},{"fix":"Use: if revision.text is not None: process(revision.text)","message":"Revision.text may be None if the revision has been deleted or suppressed. Always check for None before processing.","severity":"gotcha","affected_versions":"all"},{"fix":"Use mwxml.Dump.from_file() instead of mwxml.Dump().","message":"The mwxml.Dump constructor is deprecated in favor of Dump.from_file.","severity":"deprecated","affected_versions":"0.3.0+"}],"env_vars":null,"last_verified":"2026-05-01T00:00:00.000Z","next_check":"2026-07-30T00:00:00.000Z","problems":[{"fix":"Ensure mwxml is installed (pip install mwxml) and import with import mwxml. Then use mwxml.Dump.","cause":"Common installation issue where the package is not installed correctly or version mismatch.","error":"AttributeError: module 'mwxml' has no attribute 'Dump'"},{"fix":"Open the file with open('dump.xml', 'rb') (binary mode).","cause":"Opening the dump file in text mode instead of binary mode.","error":"OSError: [Errno 22] Invalid argument"},{"fix":"Use binary mode: open('dump.xml', 'rb').","cause":"Passing a file opened in text mode to Dump.from_file.","error":"TypeError: cannot use a string pattern on a bytes-like object"}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}