网易云课堂上的教程,写在这里方便以后查看。先来一个简单的多线程例子:
#encoding:utf-8
import threading
import random
import time
#生产者和消费者
MONEY = 0
gLock = threading.Lock()
def procuder():
while True:
global MONEY
random_money = random.randint(10,100)
gLock.acquire();
MONEY += random_money
gLock.release()
print('生产者%s--生产了%d'%(threading.currentThread,random_money))
time.sleep(0.5)
def customer():
while True:
global MONEY
random_money = random.randint(10,100)
if MONEY < random_money:
print('余额不足,欲消费:%d,但是仓库剩余:%d'%(random_money,MONEY) )
else:
gLock.acquire()
MONEY -= random_money
gLock.release()
print('消费者%s--消费了%d'%(threading.currentThread,random_money))
time.sleep(0.5)
def p_c_test():
for x in range(3):
th = threading.Thread(target=procuder)
th.start()
for x in range(3):
th = threading.Thread(target=customer)
th.start()
if __name__ =="__main__":
p_c_test()
接着是爬取图片:
#encoding: utf-8
import requests
import urllib.request
from bs4 import BeautifulSoup
import os
import threading
'''
def GetUrl():
url = []
first_url = 'http://www.doutula.com/photo/list/?page={}'
for i in range(1,1076):
url.append(first_url.format(i))
return url
print(GetUrl())
'''
PAGE_URL_LIST = []
BASE_PAGE_URL = 'http://www.doutula.com/photo/list/?page='
FACE_URL_LIST = []#所有表情的url列表
gLock = threading.Lock()
for x in range(1,1111):
url = BASE_PAGE_URL + str(x)
PAGE_URL_LIST.append(url)
def download_image(url):
# url = 'https://ws4.sinaimg.cn/bmiddle/9150e4e5ly1fkrb5wjzr5j207706z3z1.jpg'
split_list = url.split('/')
filename = split_list.pop()
path = os.path.join('images', filename)
urllib.request.urlretrieve(url, filename=path)
#获取每一页的图片链接
def get_page(page_url):
response = requests.get(page_url)
content = response.content
soup = BeautifulSoup(content, 'lxml')
img_list = soup.find_all('img', attrs={'class': 'img-responsive lazy image_dta'})
# print(img_list)
for img in img_list:
url = img['data-original']
download_image(url)
# print(img['data-original'])
# print('-'*10)
def procuder():
while True:
gLock.acquire()
if len(PAGE_URL_LIST) == 0:
gLock.release()
break
else:
page_url = PAGE_URL_LIST.pop()
gLock.release()
response = requests.get(page_url)
content = response.content
soup = BeautifulSoup(content, 'lxml')
img_list = soup.find_all('img', attrs={'class': 'img-responsive lazy image_dta'})
# print(img_list)
gLock.acquire()
for img in img_list:
url = img['data-original']
if not url.startswith('http'):
url = 'http:' + url
url = img['data-original']
FACE_URL_LIST.append(url)
gLock.release()
def customer():
while True:
gLock.acquire()
if len(FACE_URL_LIST) == 0:
gLock.release()
continue
else:
face_url = FACE_URL_LIST.pop()
gLock.release()
split_list = face_url.split('/')
filename = split_list.pop()
path = os.path.join('images', filename)
urllib.request.urlretrieve(face_url, filename=path)
def main():
#创建两个多线程作为生产者去爬取表情的url
for x in range(5):
th = threading.Thread(target=procuder)
th.start()
for x in range(5):
th = threading.Thread(target=customer)
th.start()
#创建4个线程来作为消费者去把表情图片的url下载到本地
for page_url in PAGE_URL_LIST:
get_page(page_url)
if __name__ == "__main__":
main()