第5题:有一个目录,放了你一个月的日记,都是 txt,为了避免分词的问题,假设内容都是英文,请统计出你认为每篇日记最重要的词。
#!/usr/bin/env python3
# -*- coding : utf-8 -*-
import os
import re
from collections import Counter
ignore_words = ['I','am','is','be','that','the','and','maybe','it','a','not']
def getWordNum(filesource):
with open(filesource) as f:
r = re.findall('\w+',f.read())
return Counter(r)
def getImportantWord(dirpath):
for file in os.listdir(dirpath):
filepath = os.path.join(dirpath,file)
totalCnt = getWordNum(filepath)
for word in ignore_words:
totalCnt[word] = 0
print('The most import word in %s is %s.' % (filepath,totalCnt.most_common()[0][0]))
if __name__ == '__main__':
dirpath = input('Please input dirpath: ')
getImportantWord(dirpath)