re lxml BeautifulSoup 速度对比

Python
<code> 对比 <span class="wp_keywordlink_affiliate"><a href="https://www.168seo.cn/tag/re" title="View all posts in re" target="_blank">re</a></span> <span class="wp_keywordlink_affiliate"><a href="https://www.168seo.cn/tag/lxml" title="View all posts in lxml" target="_blank">lxml</a></span> <span class="wp_keywordlink_affiliate"><a href="https://www.168seo.cn/tag/beautifulsoup" title="View all posts in BeautifulSoup" target="_blank">BeautifulSoup</a></span>的解析速度 #coding:utf-8 import <span class="wp_keywordlink_affiliate"><a href="https://www.168seo.cn/tag/re" title="View all posts in re" target="_blank">re</a></span>quests, <span class="wp_keywordlink_affiliate"><a href="https://www.168seo.cn/tag/re" title="View all posts in re" target="_blank">re</a></span>, sys,time from bs4 import <span class="wp_keywordlink_affiliate"><a href="https://www.168seo.cn/tag/beautifulsoup" title="View all posts in BeautifulSoup" target="_blank">BeautifulSoup</a></span> as bs4 # reload(sys) # sys.setdefaultencoding("utf-8") from <span class="wp_keywordlink_affiliate"><a href="https://www.168seo.cn/tag/lxml" title="View all posts in lxml" target="_blank">lxml</a></span> import etree from pyquery import PyQuery as pq t1 = time.time() for i in range(1000): html = requests.get("http://cuiqingcai.com/2621.html").text pa = re.compile("<title>(.*?)</title>",re.S) print(re.search(pa,html).group(1),i) t2= time.time() t3 = t2-t1 time.sleep(30) t4 = time.time() for i in range(1000): html = requests.get("http://cuiqingcai.com/2621.html").content html = etree.HTML(html) print(html.xpath("//title/text()")[0],i) t5= time.time() t6 = t5-t4 t7= time.time() for i in range(1000): html = requests.get("http://cuiqingcai.com/2621.html").content soup = bs4(html,'<span class="wp_keywordlink_affiliate"><a href="https://www.168seo.cn/tag/lxml" title="View all posts in lxml" target="_blank">lxml</a></span>') print(soup.title.get_text(),i) t8= time.time() t9= t8-t7 # t10 = time.time() for i in range(1000): html = requests.get("http://cuiqingcai.com/2621.html").content doc = pq(html) print(doc('title').text(),i) t11 = time.time() t12 = t11-t10 print("re :"+str(t3),"lxml :"+str(t6),"bs4 lxml:"+str(t9),"PyQuery"+str(t12))</pre> </code>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
< code >
对比 re lxml BeautifulSoup的解析速度
#coding:utf-8
import requests , re , sys , time
from bs4 import BeautifulSoup as bs4
# reload(sys)
# sys.setdefaultencoding("utf-8")
from lxml import etree
from pyquery import PyQuery as pq
 
 
t1 = time . time ( )
for i in range ( 1000 ) :
     html = requests . get ( "http://cuiqingcai.com/2621.html" ) . text
     pa = re . compile ( "<title>(.*?)</title>" , re . S )
     print ( re . search ( pa , html ) . group ( 1 ) , i )
 
t2 = time . time ( )
t3 = t2 - t1
time . sleep ( 30 )
t4 = time . time ( )
for i in range ( 1000 ) :
     html = requests . get ( "http://cuiqingcai.com/2621.html" ) . content
     html = etree . HTML ( html )
     print ( html . xpath ( "//title/text()" ) [ 0 ] , i )
 
t5 = time . time ( )
t6 = t5 - t4
t7 = time . time ( )
 
for i in range ( 1000 ) :
     html = requests . get ( "http://cuiqingcai.com/2621.html" ) . content
     soup = bs4 ( html , 'lxml' )
     print ( soup . title . get_text ( ) , i )
 
t8 = time . time ( )
t9 = t8 - t7
#
 
t10 = time . time ( )
 
for i in range ( 1000 ) :
     html = requests . get ( "http://cuiqingcai.com/2621.html" ) . content
     doc = pq ( html )
     print ( doc ( 'title' ) . text ( ) , i )
t11 = time . time ( )
t12 = t11 - t10
 
print ( "re :" + str ( t3 ) , "lxml :" + str ( t6 ) , "bs4 lxml:" + str ( t9 ) , "PyQuery" + str ( t12 ) ) < / pre >
 
< / code >



  • zeropython 微信公众号 5868037 QQ号 5868037@qq.com QQ邮箱
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值