import urllib
import time
url = ['']*50
con = urllib.urlopen('http://blog.sina.com.cn/s/articlelist_1197161814_0_1.html').read()
i = 0
title = con.find(r'<a title')
href = con.find(r'href=',title)
html = con.find(r'.html',href)
while title!=-1 and href!=-1 and html!=-1 and i<50:
url[i] = con[href + 6:html+5]
print(url[i])
title = con.find(r'<a title=',html)
href = con.find(r'href=',title)
html = con.find(r'.html',href)
i=i+1
else:
print('find end!')
j=0
while j<50:
content = urllib.urlopen(url[j]).read()
print('downloading......'+str(j+1))
open(r'likaifu/'+url[j][-26:],'w+').write(content)
j=j+1
time.sleep(15)
else:
print('download title')
一个简单的python网络爬虫程序(下载博客文章)
最新推荐文章于 2024-12-23 21:24:08 发布