re lxml BeautifulSoup 速度对比

最新推荐文章于 2025-02-20 16:12:27 发布

原创最新推荐文章于 2025-02-20 16:12:27 发布 · 259 阅读

CC 4.0 BY-SA版权

Python

<code> 对比 <a href="https://www.168seo.cn/tag/re" title="View all posts in re" target="_blank">re</a> <a href="https://www.168seo.cn/tag/lxml" title="View all posts in lxml" target="_blank">lxml</a> <a href="https://www.168seo.cn/tag/beautifulsoup" title="View all posts in BeautifulSoup" target="_blank">BeautifulSoup</a>的解析速度 #coding:utf-8 import <a href="https://www.168seo.cn/tag/re" title="View all posts in re" target="_blank">re</a>quests, <a href="https://www.168seo.cn/tag/re" title="View all posts in re" target="_blank">re</a>, sys,time from bs4 import <a href="https://www.168seo.cn/tag/beautifulsoup" title="View all posts in BeautifulSoup" target="_blank">BeautifulSoup</a> as bs4 # reload(sys) # sys.setdefaultencoding("utf-8") from <a href="https://www.168seo.cn/tag/lxml" title="View all posts in lxml" target="_blank">lxml</a> import etree from pyquery import PyQuery as pq t1 = time.time() for i in range(1000): html = requests.get("http://cuiqingcai.com/2621.html").text pa = re.compile("<title>(.*?)</title>",re.S) print(re.search(pa,html).group(1),i) t2= time.time() t3 = t2-t1 time.sleep(30) t4 = time.time() for i in range(1000): html = requests.get("http://cuiqingcai.com/2621.html").content html = etree.HTML(html) print(html.xpath("//title/text()")[0],i) t5= time.time() t6 = t5-t4 t7= time.time() for i in range(1000): html = requests.get("http://cuiqingcai.com/2621.html").content soup = bs4(html,'<a href="https://www.168seo.cn/tag/lxml" title="View all posts in lxml" target="_blank">lxml</a>') print(soup.title.get_text(),i) t8= time.time() t9= t8-t7 # t10 = time.time() for i in range(1000): html = requests.get("http://cuiqingcai.com/2621.html").content doc = pq(html) print(doc('title').text(),i) t11 = time.time() t12 = t11-t10 print("re :"+str(t3),"lxml :"+str(t6),"bs4 lxml:"+str(t9),"PyQuery"+str(t12))</pre> </code>