import sys
def mapper(lines):
maps=''
lines.strip()
lines=lines.split("\n")
for line in lines:
line.strip()
words=line.split()
for word in words:
maps=maps+word+'\t1\n'
return maps
def reducer(lines):
wordCount={}
lines.strip()
lines=lines.split("\n")
lines=lines[0:len(lines)-2]
# print lines
for line in lines:
line.strip()
word,count=line.split('\t',1)
try:
count=int(count)
wordCount[word]=wordCount.get(word,0)+count
except ValueError:
pass
print wordCount.items()
sorted_wordCount=sorted(wordCount.items(),key=lambda x:x[0])
for word,count in sorted_wordCount:
print("%s\t%s"%(word,count))
reducer(mapper("foo foo test show my mapp reduce quux labs\n"))
代码很容易改造成mapreduce形式在hadoop上运行,如http://blog.youkuaiyun.com/yaoyepeng/article/details/5929457