使用threading+queue队列,发送get请求,输出状态码
import requests
import threading
from queue import Queue
class JianDan(threading.Thread):
def __init__(self, queue):
threading.Thread.__init__(self)
self.queue = queue
def run(self):
while not self.queue.empty():
url = self.queue.get_nowait()
self.spider(url)
print(self.queue.qsize())
print(self.queue.empty())
def spider(self, url):
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36'}
r = requests.get(url=url, headers=headers)
print(r.status_code, len(r.content))
def main():
q = Queue(200)
for i in range(0, 10):
q.put('http://www.bj.chinanews.com/news/2021/0228/8103' + str(i) + '.html')
threads = []
thread_count = 30
for i in range(thread_count):
threads.append(JianDan(q))
for t in threads:
t.start()
t.join()
if __name__ == '__main__':
main()
执行结果:
