Catalog
异步非阻塞
from gevent import monkey
monkey.patch_all()
import requests, gevent
# 待访问的URL
def get_urls():
jd_url = 'https://search.jd.com/Search?keyword=%E7%88%AC%E8%99%AB&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&wq=%E7%88%AC%E8%99%AB&page={}&click=0'
return [jd_url.format(i) for i in range(1, 200, 2)]
# 网页请求
def request(url):
ua = 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'
header = {'User-Agent': ua}
response = requests.get(url, headers=header)
print(response.status_code, response.url)
# 异步非阻塞
def grequest():
from gevent.pool import Pool
pool = Pool(16)
urls = get_urls()
gevent.joinall([pool.spawn(request, url) for url in urls])
# 时间测试
if __name__ == '__main__':
from time import time
t = time()
grequest()
print(time() - t)
多线程+异步非阻塞
from gevent import monkey
monkey.patch_all()
import requests, gevent
# 待访问的URL
def get_urls():
jd_url = 'https://search.jd.com/Search?keyword=%E7%88%AC%E8%99%AB&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&wq=%E7%88%AC%E8%99%AB&page={}&click=0'
return [jd_url.format(i) for i in range(1, 200, 2)]
# 网页请求
def request(url):
ua = 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'
header = {'User-Agent': ua}
response = requests.get(url, headers=header)
print(response.status_code, response.url)
# 异步非阻塞
def grequest(urls):
from gevent.pool import Pool
pool = Pool(4)
gevent.joinall([pool.spawn(request, url) for url in urls])
# 多线程
def concurrent(n=4):
from concurrent.futures import ThreadPoolExecutor
pool = ThreadPoolExecutor(n)
url_ls = get_urls()
length = len(url_ls)
step = int(length / n) + 1
for i in range(0, length, step):
urls = url_ls[i: i + step]
pool.submit(grequest, urls)
pool.shutdown(True)
# 时间测试
if __name__ == '__main__':
from time import time
t = time()
concurrent()
print(time() - t)
多进程
import requests
# 待访问的URL
def get_urls():
jd_url = 'https://search.jd.com/Search?keyword=%E7%88%AC%E8%99%AB&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&wq=%E7%88%AC%E8%99%AB&page={}&click=0'
return [jd_url.format(i) for i in range(1, 200, 2)]
# 网页请求
def request(urls):
ua = 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'
header = {'User-Agent': ua}
for url in urls:
response = requests.get(url, headers=header)
print(response.status_code, response.url)
# 多进程
def concurrent(n=16):
from concurrent.futures import ProcessPoolExecutor
pool = ProcessPoolExecutor(n)
url_ls = get_urls()
length = len(url_ls)
step = int(length / n) + 1
for i in range(0, length, step):
urls = url_ls[i: i + step]
pool.submit(request, urls)
pool.shutdown(True)
# 时间测试
if __name__ == '__main__':
from time import time
t = time()
concurrent()
print(time() - t)
测试结果
方法 | 时间(秒) |
---|---|
异步 | 3.4 |
多线程+异步 | 3.5 |
多进程 | 4.8 |