在python 3,3后新增了asyncio模块,可以帮我们检测IO(只能是网络IO), 实现应用程序级别切换.
import asyncio
@asyncio.coroutine
def task(task_id,senconds):
print('%s is start' %task_id)
yield from asyncio.sleep(senconds) #只能检测网络IO,检测到IO后切换到其他任务执行
print('%s is end' %task_id)
tasks=[task(task_id=1,senconds=3),task(task_id=2,senconds=4)]
loop=asyncio.get_event_loop()
loop.run_until_complete(asyncio.gather(*tasks))
loop.close()
基本使用
但是asyncio只能发tcp级别的请求,不能发http协议,因此我们需要发送http请求时候,需要我们子栋义http报头
#我们爬取一个网页的过程,以https://www.python.org/doc/为例,将关键步骤列举如下
#步骤一:向www.python.org这台主机发送tcp三次握手,是IO阻塞操作
#步骤二:封装http协议的报头
#步骤三:发送http协议的请求包,是IO阻塞操作
#步骤四:接收http协议的响应包,是IO阻塞操作
import asyncio
@asyncio.coroutine
def get_page(host,port=80,url='/'):
#步骤一(IO阻塞):发起tcp链接,是阻塞操作,因此需要yield from
recv,send=yield from asyncio.open_connection(host,port)
#步骤二:封装http协议的报头,因为asyncio模块只能封装并发送tcp包,因此这一步需要我们自己封装http协议的包
requset_headers="""GET %s HTTP/1.0\r\nHost: %s\r\n\r\n""" % (url, host,)
# requset_headers="""POST %s HTTP/1.0\r\nHost: %s\r\n\r\nname=egon&password=123""" % (url, host,)
requset_headers=requset_headers.encode('utf-8')
#步骤三(IO阻塞):发送http请求包
send.write(requset_headers)
yield from send.drain()
#步骤四(IO阻塞):接收http协议的响应包
text=yield from recv.read()
#其他处理
print(host,url,text)
send.close()
print('-===>')
return 1
tasks=[get_page(host='www.python.org',url='/doc'),get_page(host='www.cnblogs.com',url='linhaifeng'),get_page(host='www.openstack.org')]
loop=asyncio.get_event_loop()
results=loop.run_until_complete(asyncio.gather(*tasks))
loop.close()
print('=====>',results) #[1, 1, 1]
asyncio+自定义http协议报头
aiohttp模块可以实现帮组我们封装http报头.然后结合asyncio 实现检测IO切换
import aiohttp
import asyncio
@asyncio.coroutine
def get_page(url):
print('GET:%s' %url)
response=yield from aiohttp.request('GET',url)
data=yield from response.read()
print(url,data)
response.close()
return 1
tasks=[
get_page('https://www.python.org/doc'),
get_page('https://www.cnblogs.com/linhaifeng'),
get_page('https://www.openstack.org')
]
loop=asyncio.get_event_loop()
results=loop.run_until_complete(asyncio.gather(*tasks))
loop.close()
print('=====>',results) #[1, 1, 1]
asyncio+aiohttp
还可以将requests.get函数传给asyncio ,就能够被检测了.
import requests
import asyncio
@asyncio.coroutine
def get_page(func,*args):
print('GET:%s' %args[0])
loog=asyncio.get_event_loop()
furture=loop.run_in_executor(None,func,*args)
response=yield from furture
print(response.url,len(response.text))
return 1
tasks=[
get_page(requests.get,'https://www.python.org/doc'),
get_page(requests.get,'https://www.cnblogs.com/linhaifeng'),
get_page(requests.get,'https://www.openstack.org')
]
loop=asyncio.get_event_loop()
results=loop.run_until_complete(asyncio.gather(*tasks))
loop.close()
print('=====>',results) #[1, 1, 1]
asyncio+requests模块的方法