{"id":24074,"library":"mrjob","title":"mrjob - Python MapReduce Framework","description":"mrjob is a Python library that allows you to write MapReduce jobs and run them on Hadoop, Amazon EMR, or your local machine. Version 0.7.4 is the latest release; development appears slow with no recent releases.","status":"maintenance","version":"0.7.4","language":"python","source_language":"en","source_url":"https://github.com/Yelp/mrjob","tags":["mapreduce","hadoop","emr","big-data"],"install":[{"cmd":"pip install mrjob","lang":"bash","label":"latest from PyPI"}],"dependencies":[],"imports":[{"note":"Standard import for writing MapReduce jobs.","wrong":"","symbol":"MRJob","correct":"from mrjob.job import MRJob"},{"note":"Used to define multi-step jobs.","wrong":"","symbol":"MRStep","correct":"from mrjob.step import MRStep"},{"note":"Runner for Hadoop clusters.","wrong":"","symbol":"HadoopJobRunner","correct":"from mrjob.runner import HadoopJobRunner"},{"note":"Runner for Amazon EMR.","wrong":"","symbol":"EMRJobRunner","correct":"from mrjob.runner import EMRJobRunner"}],"quickstart":{"code":"from mrjob.job import MRJob\nimport re\n\nWORD_RE = re.compile(r\"[\\w']+\")\n\nclass MRWordFreqCount(MRJob):\n    def mapper(self, _, line):\n        for word in WORD_RE.findall(line):\n            yield (word.lower(), 1)\n\n    def combiner(self, word, counts):\n        yield (word, sum(counts))\n\n    def reducer(self, word, counts):\n        yield (word, sum(counts))\n\nif __name__ == '__main__':\n    MRWordFreqCount.run()","lang":"python","description":"Word count MapReduce job using mrjob. Run locally with: python word_count.py input.txt"},"warnings":[{"fix":"Install pyyaml: `pip install pyyaml`","message":"mrjob 0.7.4 requires `pyyaml` for configuration files (`.mrjob.conf`). Missing it will cause import errors.","severity":"gotcha","affected_versions":">=0.7.0"},{"fix":"Use Python 3.6+.","message":"Support for Python 2 was dropped after version 0.5.0. Using Python 3 is required for 0.7.x.","severity":"deprecated","affected_versions":">=0.6.0"},{"fix":"Install boto3: `pip install boto3`. Set environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY.","message":"Running on Amazon EMR requires `boto3` and proper AWS credentials. Jobs may fail silently if credentials are not configured.","severity":"gotcha","affected_versions":"all"}],"env_vars":null,"last_verified":"2026-05-01T00:00:00.000Z","next_check":"2026-07-30T00:00:00.000Z","problems":[{"fix":"Run: `pip install pyyaml`","cause":"mrjob uses PyYAML for configuration but does not declare it as a dependency in some versions.","error":"ModuleNotFoundError: No module named 'yaml'"},{"fix":"Use: `from mrjob.job import MRJob`","cause":"The module structure changed; `from mrjob import MRJob` no longer works.","error":"ImportError: No module named mrjob.job"},{"fix":"Specify runner via `-r` flag, e.g., `python job.py -r hadoop input.txt`. For local, use `-r local`.","cause":"mrjob could not find a suitable runner. Often happens when running without specifying a runner or missing dependencies for the chosen runner.","error":"mrjob.errors.MrJobError: No runners found (tried ...)"}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}