#encoding:utf8
import requests
import os
from lxml import etree
from multiprocessing import Pool
class Dt:
def __init__(self):
self.stit = "https://www.doutula.com/article/list/?page="
self.root_url = "https://www.doutula.com/article/list/"
self.head = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.3964.2 Safari/537.36"}
def get_ts(self,url):
xpath = etree.HTML(requests.get(url,headers=self.head).text)
links = xpath.xpath('//*[@class="row"]/div[1]/a/@href')
for ll in links:
self.img(ll)
def img(self,url):
xpath = etree.HTML(requests.get(url,headers=self.head).text)
name = xpath.xpath('//*[@class="pic-title"]/h1/a/text()')[0].strip()
urls = xpath.xpath('//*[@class="artile_des"]//img/@src')
self.download(name,urls)
def download(self,name,urls):
os.mkdir("img/%s"%name)
for i in urls:
img = requests.get(i,headers=self.head).content
f = open("img/%s/%s"%(name,i[-10:]),"wb")
f.write(img)
print("完成")
def dio(self):
pool = Pool(10)
kkk = pool.map(self.get_ts,[self.stit+str(i) for i in range(1,50)])#开启进程池
try:
for link in kkk:
self.img(link)
except Exception as e:
print(e)
if __name__ == '__main__':
wode = Dt()
wode.dio()