一.背景篇
如何让计算机实现并发作业,提高运行效率是很重要的。但是通常我们提到并发都会想到线程,在同一个进程内创建多线程可以共享资源,同时创建线程的开销会小于创建进程。但是线程之间的竞争资源(锁)同步问题还是要消耗一些资源,并且多线程编程对很多程序员来说并不友好,毕竟大部分人的思维是串行的吧,冯诺依曼设计的计算机架构也是以顺序执行为基础的~
重点是对于python而言,由于其使用了全局解释锁(GIL),所以python的多线程并不能实现并发。因此,python的并发是通过多进程来实现的!!!
然后,很自然你就会思考了,进程之间不共享资源,实现通信就会麻烦一些咯,不过好在multiprocessing内提供了很多进程间通信的接口,包括管道pipe,队列queue等。
对于更大型的项目,我们部署到多台服务器上运行的话,机器间通信会更麻烦一些,不过也有很多并行框架,很经典的celery之类的。而且远程主机间通信业有很多方式,比如使用消息队列实现通信的kafka~
二.实战代码篇
下面的每个例子虽然都只给了代码,但是代码我都做了非常非常详细的注释,相信我们都能从其中得到知识的提升!~
参考Python标准库10 多进程初步 (multiprocessing包)
1.multithread_面向过程
import threading
import time
import os
# a program to simulate selling tickets in multi-thread way
# u may realize that this is almost precedure-oriented programming
# for OOP(object-oriented programming), see'multhr_oop.py'
# this function can be any function to do other chores.
def doChore():
#time.sleep(0.5)
print 'hh, I am drinking water!'
# function for each thread
def booth(tid):
# use keyword 'global' to indicate global variables
# because they r immutable objects that will be treated as local variales.
# so if they r mutable objects then dont need 'global' statement.
global i
global lock
while True:
lock.acquire() # lock; or wait if other thread is holding the lock
if i != 0:
i = i - 1 # print sel tickets
print(tid,':now left:',i) # tickets left
doChore()
else:
print('Thread_id',tid,'No more tickets left')
os._exit(0)
#print 'now, i am preparing to release the lock'
lock.release() # unlock
print 'now, i release the lock'
doChore() # Non-critical operations
# start of main function
# definition of two global variable
i = 100 # available ticket number
# use the Python Lock class for thread synchronization
lock = threading.Lock() # Lock, i.e mutex
for k in range(10):
# set up thread
# target:the callable function to be run
# args:the argument for the callable function
new_thread = threading.Thread(target=booth,args=(k,))
new_thread.start() # run the thread
2.multithread_面向对象oop
# A program to simulate selling tickets in multi-thread way
# OOP(obejct-oriented progarmming)
import threading
import time
import os
# This function could be any function to do other chores.
def doChore():
time.sleep(0.5)
# Function for each thread
# a class inherit from Thread
class BoothThread(threading.Thread):
def __init__(self, tid, monitor):
self.tid = tid
self.monitor = monitor
threading.Thread.__init__(self)
def run(self):
while True:
monitor['lock'].acquire() # Lock; or wait if other thread is holding the lock
if monitor['tick'] != 0:
monitor['tick'] = monitor['tick'] - 1 # Sell tickets
print(self.tid,':now left:',monitor['tick']) # Tickets left
doChore() # Other critical operations
else:
print("Thread_id",self.tid," No more tickets")
os._exit(0) # Exit the whole process immediately
monitor['lock'].release() # Unblock
doChore()
# Start of the main function
# here we use a dictionary(mutuable object), so we dont need to use 'global' inside the functions when we use monitor
# Note that this is a common trick used, we should aviod using global for its disability in windows platform.
monitor = {'tick':100, 'lock':threading.Lock()}
# start 10 threads
for k in range(10):
new_thread = BoothThread(k,monitor)
new_thread.start()
3.multiprocessing_test.py
import os
import threading
import multiprocessing
def worker(sign,lock):
lock.acquire()
print(sign,os.getpid())
lock.release()
print('Main:',os.getpid())
#multi-thread
record=[]
lock=threading.Lock()
for i in range(5):
thread = threading.Thread(target=worker,args = ('thread',lock))
thread.start()
record.append(thread)
for thread in record:
thread.join()
''' #output of multi-thread
('thread', 31404)
('thread', 31404)
('thread', 31404)
('thread', 31404)
('thread', 31404)
# Note that all threads share the same PID with main process
'''
#multi process
record =[]
lock = multiprocessing.Lock()
for i in range(5):
process = multiprocessing.Process(target=worker,args=('process',lock))
process.start()
record.append(process)
for process in record:
process.join()
''' # output of multi-process
('process', 32063)
('process', 32064)
('process', 32067)
('process', 32070)
('process', 32073)
# the processes dont share the same PID, and also the main process's PID is different from this processes.
'''
4.IPC(进程间通信)之multiprocessing中的pipe
# multiprocessing with pipe
import multiprocessing as mul
def pro1(pipe):
pipe.send('hello')
print('proc1 rec:',pipe.recv())
def pro2(pipe):
print('proc2 rec:',pipe.recv())
# build a pipe
# pipe can be half-duplex or duplex
# use 'mutiprocessing.Pipe(duplex=False)' to build a half-duplex pipe
# here we build a duplex pipe
pipe = mul.Pipe()
# pass an end of the pipe to process1
p1 = mul.Process(target = pro1, args =(pipe[0],))
# Pass the other end of the pipe to process2
p2=mul.Process(target=pro2,args=(pipe[1],))
p1.start()
p2.start()
p1.join()
p2.join()
5.IPC(进程间通信)之multiprocessing中的queue
import os
import multiprocessing
import time
# input worker
def inputQ(queue):
info = str(os.getpid())+'(put):'+str(time.time())
queue.put(info)
# output worker
def OutputQ(queue,lock):
info = queue.get()
lock.acquire()
print (str(os.getpid())+'(get):'+info)
lock.release()
# main
record1=[] # store the input processes
record2=[] # store the output processes
lock = multiprocessing.Lock() # To prevent from messy output
queue = multiprocessing.Queue(3)
# input processes
for i in range(10):
process = multiprocessing.Process(target=inputQ,args=(queue,))
process.start()
record1.append(process)
# output processes
for i in range(10):
process = multiprocessing.Process(target=OutputQ,args=(queue,lock))
process.start()
record2.append(process)
for p in record1:
p.join()
queue.close() # No more object wil come, close the queue
for p in record2:
p.join()
''' # the output is as below
32752(get):32742(put):1478843371.22
32753(get):32740(put):1478843371.23
32754(get):32739(put):1478843371.22
32755(get):32741(put):1478843371.23
32756(get):32743(put):1478843371.23
32758(get):32745(put):1478843371.23
32761(get):32744(put):1478843371.23
32762(get):32746(put):1478843371.23
32764(get):32749(put):1478843371.23
32766(get):32751(put):1478843371.23
'''