一、利用Requests爬取图片
def save_Img(img_Url, filename,base_path):
filename_abs=base_path+'\\'+filename+'.jpg'
file=open(filename_abs,'wb')
img=requests.get(img_Url,headers={'User-Agent':random.choice(headers.my_headers)},timeout=5)
file.write(img.content)
file.close()
二、Requests 爬取网页数据
def get_img_profile_chmuseum_Requests(url,base_path,urlTitle=None): html = requests.get(url, headers={'User-Agent': random.choice(headers.my_headers)}, timeout=5) tree=lxml.html.fromstring(html.content) title_fixed, pic_path, texts=get_contents_from_lxml(tree,urlTitle) save_Img(pic_path, title_fixed, base_path) save_Profile(texts, title_fixed,base_path)def save_Profile(contents,filename,base_path): filename_abs = base_path + '\\' + filename + '.txt' file = open(filename_abs, 'w+') print '-----------Write to file------------' file.write(contents.encode('utf-8')) file.close()

1420

被折叠的 条评论
为什么被折叠?



