爬取糗百成人图片的所有图图,先来个单线程版本的:
#encoding:utf-8
#单线程简单版本
import requests
import os
from bs4 import BeautifulSoup
import threading
import urllib.request
FIRST_PAGE_URL = 'http://www.qiubaichengren.com/{}.html'
PAGE_URL_LIST = []
gLock = threading.Lock()
for x in range(1,100):
page_url = FIRST_PAGE_URL.format(x)
PAGE_URL_LIST.append(page_url)
def get_page(page_url):
response = requests.get(page_url)
content = response.content
soup = BeautifulSoup(content, 'lxml')
src = soup.find_all('div',class_='mala-text')
imgs = soup.find_all('img')
for img in src:
url = img.find('img')
link = url.get('src')
title = url.get('alt')
split_list = link.split('/')
final = split_list.pop()
t_split_list = final.split('.')
suffix = t_split_list.pop()
filename = title + '.' + suffix #名字加后缀
#