python钓鱼评论爬取 并存入txt
import re
import requests
def direct(secname):
r=requests.get(secname)
html=r.text
rule="http://www.diaoyu.com/diaochang/changsha/list-.*-4-1-1.html"
gg=re.findall(rule,html)
return gg
def direct2(secname):
r=requests.get(secname)
html=r.text
rule="http://www.diaoyu.com/diaochang/changsha/\d+.html"
gg=re.findall(rule,html)
gg=list(set(gg))
return gg
def direct3(secname):
r=requests.get(secname)
html=r.text
rule="</span>\d分\n</span>\n<p>.*</p>"
gg=re.findall(rule,html)
gg=list(set(gg))
return gg
def main():
gg=direct("http://www.diaoyu.com/diaochang/changsha/list-0-4-1-1.html")
print(gg)
allcomments=[]
for i in range(2,len(gg)):
print(gg[i])
gg2=direct2(gg[i])
for j in range(len(gg2)):
gg3=direct3(gg2[j])
allcomments.append(gg3)
outfiles='E:\\comments'+'.txt'
output= open(outfiles,'w',encoding='utf-8')
for i in allcomments:
for j in i:
print(str(j[7:9]))
output.write(str(j[7:9]))
output.write("\t")
output.write(str(j[21:-4]))
output.write('\t')
output.write('\n')
if __name__ == '__main__':
main()