#!/usr/bin/python -tt
Read in the file specified on the command line.
Do a simple split() on whitespace to obtain all the words in the file.
Rather than read the file line by line, it's easier to read
it into one giant string and split it once.
Build a "mimic" dict that maps each word that appears in the file
to a list of all the words that immediately follow that word in the file.
The list of words can be be in any order and should include
duplicates. So for example the key "and" might have the list
["then", "best", "then", "after", ...] listing
all the words which came after "and" in the text.
We'll say that the empty string is what comes before
the first word in the file.
With the mimic dict, it's fairly easy to emit random
text that mimics the original. Print a word, then look
up what words might come next and pick one at random as
the next work.
Use the empty string as the first word to prime things.
If we ever get stuck with a word that is not in the dict,
go back to the empty string to keep things moving.
Note: the standard python module 'random' includes a
random.choice(list) method which picks a random element
from a non-empty list.
For fun, feed your program to itself as input.
Could work on getting it to put in linebreaks around 70
columns, so the output looks better.
"""
import random
import sys
def mimic_dict(filename):
"""Returns mimic dict mapping each word to list of words which follow it."""
[color=red] f=open(filename,'r')
words=f.read()
f.close()
dicts={}
words=words.split()
#solution1:
if len(words)>0:
dicts['']=[words[0]]
for word in words:
if words.index(word)<len(words)-1:
if word in dicts.keys() and words[words.index(word)+1] not in dicts.values():
dicts.get(word).append(words[words.index(word)+1])
else:
dicts[word]=[words[words.index(word)+1]]
return dicts[/color]
#solution 2:
[color=blue] temp=''
for word in words:
if temp not in dicts:
dicts[temp]=[word]
else:
dicts[temp].append(word)
temp=word
return dicts[/color]
def print_mimic(mimic_dict, word):
"""Given mimic dict and start word, prints 200 random words."""
[color=red] for i in range(0,200):
print word,
if len(mimic_dict[word])<1:
word=''
word=random.choice(mimic_dict.get(word))
[/color]
# Provided main(), calls mimic_dict() and mimic()
def main():
if len(sys.argv) != 2:
print 'usage: ./mimic.py file-to-read'
sys.exit(1)
dict = mimic_dict(sys.argv[1])
print_mimic(dict, '')
if __name__ == '__main__':
main()
Read in the file specified on the command line.
Do a simple split() on whitespace to obtain all the words in the file.
Rather than read the file line by line, it's easier to read
it into one giant string and split it once.
Build a "mimic" dict that maps each word that appears in the file
to a list of all the words that immediately follow that word in the file.
The list of words can be be in any order and should include
duplicates. So for example the key "and" might have the list
["then", "best", "then", "after", ...] listing
all the words which came after "and" in the text.
We'll say that the empty string is what comes before
the first word in the file.
With the mimic dict, it's fairly easy to emit random
text that mimics the original. Print a word, then look
up what words might come next and pick one at random as
the next work.
Use the empty string as the first word to prime things.
If we ever get stuck with a word that is not in the dict,
go back to the empty string to keep things moving.
Note: the standard python module 'random' includes a
random.choice(list) method which picks a random element
from a non-empty list.
For fun, feed your program to itself as input.
Could work on getting it to put in linebreaks around 70
columns, so the output looks better.
"""
import random
import sys
def mimic_dict(filename):
"""Returns mimic dict mapping each word to list of words which follow it."""
[color=red] f=open(filename,'r')
words=f.read()
f.close()
dicts={}
words=words.split()
#solution1:
if len(words)>0:
dicts['']=[words[0]]
for word in words:
if words.index(word)<len(words)-1:
if word in dicts.keys() and words[words.index(word)+1] not in dicts.values():
dicts.get(word).append(words[words.index(word)+1])
else:
dicts[word]=[words[words.index(word)+1]]
return dicts[/color]
#solution 2:
[color=blue] temp=''
for word in words:
if temp not in dicts:
dicts[temp]=[word]
else:
dicts[temp].append(word)
temp=word
return dicts[/color]
def print_mimic(mimic_dict, word):
"""Given mimic dict and start word, prints 200 random words."""
[color=red] for i in range(0,200):
print word,
if len(mimic_dict[word])<1:
word=''
word=random.choice(mimic_dict.get(word))
[/color]
# Provided main(), calls mimic_dict() and mimic()
def main():
if len(sys.argv) != 2:
print 'usage: ./mimic.py file-to-read'
sys.exit(1)
dict = mimic_dict(sys.argv[1])
print_mimic(dict, '')
if __name__ == '__main__':
main()
本文介绍了一个使用Python编写的文本模仿生成器。该程序通过读取指定文件中的文本,建立一个模仿字典来映射每个出现的单词到其后可能跟随的所有单词列表,然后根据这个字典生成200个随机但风格类似原文的单词。文中提供了两种构造模仿字典的方法,并附带了完整的代码实现。
972

被折叠的 条评论
为什么被折叠?



