import nltk
from nltk.book import *
freq=FreqDist(text2)
cfd=nltk.ConditionalFreqDist((word,fdist[word])
for word in text2 if word in ['Elinor','Marianne','Edward','Willoughby']
if fdist[word]>=0)
cfd.tabulate(conditions=['Elinor','Marianne','Edward','Willoughby'])
text5.collocations() #查找比较频繁的双连词
15.复习1.4节讨论的条件语句。在聊天语料库(text5)中查找所有以字母b开头的词。按字母顺序显示出来。[w for w in text5 if w.startswith('b')]
word=[w for w in text5 if len(w)==4]
fdist=FreqDist(word)
fdist
for w in text6:
if w.isupper():
print w
a. 以ize结尾
b.包含字母z
c.包含字母序列pt
d.除了首字母外全部都是小写字母(即titlecase)
[w for w in text6 if w.endswith('ize') and w.contains('z') and w.contains('pt') and w[1:].islower()]
def percent(word,text):
from __future__ import division
fdist=FreqDist(word)
return 100*fdist[word]/len(text)