import sys
reload(sys)
sys.setdefaultencoding('utf8')
#solve the problem : ERROR (UnicodeEncodeError): 'ascii' codec can't encode character u'\uff08' in position 9: ordinal not in range(128)
# coding = UTF-8
import requests
url = 'https://news.sina.com.cn/c/2019-01-01/doc-ihqhqcis2070961.shtml'
news = requests.get(url)
filename = url[url.rfind('/')+1:]
start_pos = url.find('//')+ 2
end_pos = url.find('/', start_pos)
domain = url[start_pos:end_pos]
filename = domain + '_' + filename
f = open(filename,'w+')
f.write(news.text)
f.close()
Python 爬虫之第一课,抽取,存储xml文件
最新推荐文章于 2024-06-30 02:06:50 发布
1506

被折叠的 条评论
为什么被折叠?



