# -*- coding:utf-8 -*-
from lxml import etree
import requests
from Queue import Queue
import threading
import time
import json
class thread_crawl(threading.Thread):
'''
抓取线程类
'''
def __init__(self,threadID,q):
threading.Thread.__init__(self)
self.threadID = threadID
self.q = q
def run(self):
print("starting" + self.threadID)
self.qiushi_spider()
print("exiting" + self.threadID)
def qiushi_spider(self):
while True:
if self.q.empty():
break
else:
page = self.q.get()
print("qiushi_spider=",self.threadID,",page=",str(page))
url = 'http://www.qiushibaike.com/8hr/page/' + str(page) + '/'
headers = {
'User-Agent': 'Mozilla/5.0 (
【python】多线程爬虫实例
最新推荐文章于 2023-08-15 00:42:53 发布