{"library":"mrjob","title":"mrjob - Python MapReduce Framework","description":"mrjob is a Python library that allows you to write MapReduce jobs and run them on Hadoop, Amazon EMR, or your local machine. Version 0.7.4 is the latest release; development appears slow with no recent releases.","language":"python","status":"maintenance","last_verified":"Fri May 01","install":{"commands":["pip install mrjob"],"cli":{"name":"mrjob","version":"\"--version\" is not a command"}},"imports":["from mrjob.job import MRJob","from mrjob.step import MRStep","from mrjob.runner import HadoopJobRunner","from mrjob.runner import EMRJobRunner"],"auth":{"required":false,"env_vars":[]},"quickstart":{"code":"from mrjob.job import MRJob\nimport re\n\nWORD_RE = re.compile(r\"[\\w']+\")\n\nclass MRWordFreqCount(MRJob):\n    def mapper(self, _, line):\n        for word in WORD_RE.findall(line):\n            yield (word.lower(), 1)\n\n    def combiner(self, word, counts):\n        yield (word, sum(counts))\n\n    def reducer(self, word, counts):\n        yield (word, sum(counts))\n\nif __name__ == '__main__':\n    MRWordFreqCount.run()","lang":"python","description":"Word count MapReduce job using mrjob. Run locally with: python word_count.py input.txt","tag":null,"tag_description":null,"last_tested":null,"results":[]},"compatibility":null}