多进程、多线程、协程、多进程+协程、多线程+协程的并发效率对比
#--coding:utf-8--
from multiprocessing import Process,Queue
import requests
import time
def crawl_process(queue,i):
while not queue.empty():
try:
url = queue.get()
r = requests.get(url,timeout = 3)
print("我是第%s个【进程】" %i,url,r.status_code)
except Exception as e:
print(e)
return
if __name__ == '__main__':
queue = Queue()
urls = []
with open(r"d:\yunxing2.txt") as fp:
for url in fp:
urls.append(url.strip())
print("一共%s个url" %len(urls))
for url in urls:
queue.put(url)
start = time.time()
print("********************** 开始计时 **********************")
p_list = []
for i in range(1,5):
p = Process(target=crawl_process, args=(queue,i)) #多进程
p_list.append(p)
p.start()
print(p)
for p in p_list:
p.join()
print(p)
end = time.time()
print("********************** 结束计时 **********************")
print("总耗时:",end - start)
#--coding:utf-8--
from threading import Thread
import queue
import requests
import time
def crawl_process(queue,i):
while not queue.empty():
try:
url = queue.get()
r = requests.get(url,timeout = 3)
print("我是第%s个【线程】" %i,url,r.status_code)
except Exception as e:
print(e)
return
if __name__ == '__main__':
queue = queue.Queue()
urls = []
with open(r"d:\yunxing2.txt") as fp:
for url in fp:
urls.append(url.strip())
print("一共%s个url" %len(urls))
for url in urls:
queue.put(url)
start = time.time()
print("********************** 开始计时 **********************")
t_list = []
for i in range(1,5):
t = Thread(target=crawl_process, args=(queue,i)) #多进程
t_list.append(t)
t.start()
print(t)
for t in t_list:
t.join()
print(t)
end = time.time()
print("********************** 结束计时 **********************")
print("总耗时:",end - start)
#--coding:utf-8--
from gevent import monkey;
import gevent
monkey.patch_all()
from tornado.queues import Queue
import requests
import time
def crawl(urls,i):
while urls:
url = urls.pop()
try:
r = requests.get(url,timeout = 3)
print("我是第%s个【协程】" %i,url,r.status_code)
except Exception as e:
print(e)
def crawl_gevent(queue):
url_list = []
tasks = []
i = 0
while not queue.empty():
url = queue.get()._result
url_list.append(url)
if len(url_list) == 5:
i += 1
tasks.append(gevent.spawn(crawl,url_list,i))
url_list = []
gevent.joinall(tasks)
if __name__ == '__main__':
queue = Queue()
urls = []
with open(r"d:\yunxing2.txt") as fp:
for url in fp:
urls.append(url.strip())
print("一共%s个url" % len(urls))
for url in urls:
queue.put(url)
start = time.time()
print("********************** 开始计时 **********************")
crawl_gevent(queue)
end = time.time()
print("********************** 结束计时 **********************")
print("总耗时:",end - start)
#--coding:utf-8--
from gevent import monkey;
import gevent
monkey.patch_all()
from multiprocessing import Process
from tornado.queues import Queue
import requests
import time
def crawl(url,i):
try:
r = requests.get(url,timeout = 3)
print("我是第%s个【进程+协程】" %i,url,r.status_code)
except Exception as e:
print(e)
def task_gevent(queue,i):
url_list = []
while not queue.empty():
url = queue.get()._result
url_list.append(url)
if len(url_list) == 5:
tasks = []
for url in url_list:
tasks.append(gevent.spawn(crawl,url,i))
gevent.joinall(tasks)
return
if __name__ == '__main__':
queue = Queue()
urls = []
with open(r"d:\yunxing2.txt") as fp:
for url in fp:
urls.append(url.strip())
print("一共%s个url" % len(urls))
for url in urls:
queue.put(url)
start = time.time()
print("********************** 开始计时 **********************")
p_list = []
for i in range(1,3):
p = Process(target=task_gevent, args=(queue,i)) #多进程 + 协程
p.start()
p_list.append(p)
print(p)
for p in p_list:
p.join()
print(p)
end = time.time()
print("********************** 结束计时 **********************")
print("总耗时:",end - start)
#--coding:utf-8--
from gevent import monkey;
import gevent
monkey.patch_all()
from threading import Thread
import queue
import requests
import time
def crawl(url,i):
try:
r = requests.get(url,timeout = 3)
print("我是第%s个【线程+协程】" %i,url,r.status_code)
except Exception as e:
print(e)
def task_gevent(queue,i):
url_list = []
while not queue.empty():
url = queue.get()
url_list.append(url)
if len(url_list) == 5:
tasks = []
for url in url_list:
tasks.append(gevent.spawn(crawl,url,i))
gevent.joinall(tasks)
return
if __name__ == '__main__':
queue = queue.Queue()
urls = []
with open(r"d:\yunxing2.txt") as fp:
for url in fp:
urls.append(url.strip())
print("一共%s个url" % len(urls))
for url in urls:
queue.put(url)
start = time.time()
print("********************** 开始计时 **********************")
t_list = []
for i in range(1,5):
t = Thread(target=task_gevent, args=(queue,i)) #多进程 + 协程
t.start()
t_list.append(t)
print(t)
for p in t_list:
p.join()
print(p)
end = time.time()
print("********************** 结束计时 **********************")
print("总耗时:",end - start)