__author__ = 'user'
#encoding:utf-8
import urllib.request as request
import urllib.parse as parse
import string
print("starting--------------------------")
def baidu_tieba(url,begin_page,end_page):
for i in range(begin_page,end_page+1):
sName='e:/test'+str(i).zfill(5)+'.html'
print("正在下载第"+str(i)+"个页面,并保存为"+sName)
m=request.urlopen(url+str(i)).read()
with open(sName,'wb') as file:
file.write(m)
file.close()
if __name__=="__main__":
url="http://tieba.baidu.com/p/"
begin_page=1
end_page=3
baidu_tieba(url,begin_page,end_page)
__author__ = 'user' import urllib.request as request import urllib.parse as parse import string import re import os import urllib.error as error print("Starting_________________") def baidu_tieba(url,begin_page,end_page): count=1 for i in range(begin_page,end_page+1): sName='e:/test/'+str(i).zfill(5)+'.html' print('正在下载第'+str(i)+'个页面,并保存为'+sName) m=request.urlopen(url+str(i)).read() dirpath='e:/test/' dirname=str(i) new_path=os.path.join(dirpath,dirname) if not os.path.isdir(new_path): os.makedirs(new_path) page_data=m.decode('utf-8','ignore') page_image=re.compile('<img src=\"(.+?)\"') for image in page_image.findall(page_data): pattern=re.compile(r'^http://.*.png$') if pattern.match(image): try: image_data=request.urlopen(image).read() image_path=dirpath+dirname+'/'+str(count)+'.png' count+=1 print(image_path) with open(image_path,'wb') as image_file: image_file.write(image_data) image_file.close() except error.URLError as e: print('Download failed') with open(sName,'wb') as file: file.write(m) file.close() if __name__=="__main__": url="http://tieba.baidu.com/p/" begin_page=1 end_page=3 baidu_tieba(url,begin_page,end_page)
__author__ = 'user' import urllib import urllib.request as request from bs4 import BeautifulSoup def taobao(url): response=request.urlopen(url) html=response.read() data=html.decode('gbk','ignore').encode('utf-8') soup=BeautifulSoup(data,"html.parser") for list in soup.find_all("h3"): print(list.string) if __name__=='__main__': print("starts-----------------") url="http://stackoverflow.com/questions/28745153/importing-bs4-in-python-3-5" taobao(url)
ImportError: cannot import name 'HTMLParseError'
beautifulsoap怎么加入python
解决bs4在Python 3.5下出现“ImportError: cannot import name 'HTMLParseError'”错误
升级了Python3.5之后,我使用BeautifulSoup4时候出现了ImportError: cannot import name 'HTMLParseError'的错误。在网上搜索好久资料之后得到了解决方法,原因是BeautifulSoup在4.4.0以前的版本不支持 Python3.5,所以我们需要把我们的BeautifulSoup升级到4.4.0版本以上,就可以使用了。我看到网上有几个回答都是改用 Python3.4,我觉得这种方法容易误导人。
这里提供两种升级新版BeautifulSoup的方法:
1、使用pip升级
在管理员权限下使用命令行输入命令“pip install --upgrade beautifulsoup4”
或者。。。。
2、使用源码重新安装
卸载原先的BS4=》在http://www.crummy.com/software/BeautifulSoup/bs4 /download/下找到你需要下载的版本下载=》解压文件=》找到根目录下的setup.py=》以管理员权限打开命令行输入“python setup.py install”