Multiprocessing Tutorial

本文围绕Python多进程编程展开,介绍了初尝多进程的简单操作与自定义进程名,阐述了Locks、日志记录及保存到磁盘的方法,提及使用进程池启动大量子进程,还讲解了进程间通信。同时指出在Windows使用时的注意事项及可能的失败原因。

初尝

simple

import os
 
from multiprocessing import Process
 
def doubler(number):
    """
    A doubling function that can be used by a process
    """
    result = number * 2
    proc = os.getpid()
    print('{0} doubled to {1} by process id: {2}'.format(
        number, result, proc))
 
if __name__ == '__main__':
    numbers = [5, 10, 15, 20, 25]
    procs = []
 
    for index, number in enumerate(numbers):
        proc = Process(target=doubler, args=(number,))
        procs.append(proc)
        proc.start()
 
    for proc in procs:
        proc.join()

自定义process name

import os
 
from multiprocessing import Process, current_process
 
 
def doubler(number):
    """
    A doubling function that can be used by a process
    """
    result = number * 2
    proc_name = current_process().name
    print('{0} doubled to {1} by: {2}'.format(
        number, result, proc_name))
 
 
if __name__ == '__main__':
    numbers = [5, 10, 15, 20, 25]
    procs = []
    proc = Process(target=doubler, args=(5,))
 
    for index, number in enumerate(numbers):
        proc = Process(target=doubler, args=(number,))
        procs.append(proc)
        proc.start()
 
    proc = Process(target=doubler, name='Test', args=(2,))
    proc.start()
    procs.append(proc)
 
    for proc in procs:
        proc.join()

Locks(锁)

from multiprocessing import Process, Lock
 
lock = Lock()
 
 
def printer(item):
    """
    Prints out the item that was passed in
    """
    lock.acquire()
    try:
        print(item)
    finally:
        lock.release()
 
if __name__ == '__main__':
    items = ['tango', 'foxtrot', 10]
    for item in items:
        p = Process(target=printer, args=(item,))
        p.start()

日志(Logging)

The reason for this is that Python’s logging packages doesn’t use process shared locks, so it’s possible for you to end up with messages from different processes getting mixed up

import logging
import multiprocessing
 
from multiprocessing import Process, Lock
 
lock = Lock()
 
def printer(item):
    """
    Prints out the item that was passed in
    """
    lock.acquire()
    try:
        print(item)
    finally:
        lock.release()
 
if __name__ == '__main__':
    items = ['tango', 'foxtrot', 10]
    multiprocessing.log_to_stderr()
    logger = multiprocessing.get_logger()
    logger.setLevel(logging.INFO)
    for item in items:
        p = Process(target=printer, args=(item,))
        p.start()

save the log to disk

import logging
import multiprocessing

from multiprocessing import Process, Lock

lock = Lock()

def create_logger():
   logger = multiprocessing.get_logger()
   logger.setLevel(logging.INFO)

   fh = logging.FileHandler("process.log")

   fmt = '%(asctime)s - %(levelname)s - %(message)s'
   formatter = logging.Formatter(fmt)
   fh.setFormatter(formatter)

   logger.addHandler(fh)
   return logger

def printer(item):
   """
   Prints out the item that was passed in
   """
   lock.acquire()
   try:
       print(item)
   finally:
       lock.release()

if __name__ == '__main__':
   items = ['tango', 'foxtrot', 10]
   logger = create_logger()
   for item in items:
       p = Process(target=printer, args=(item,))
       p.start()

进程池

要启动大量的子进程时可使用进程池

from multiprocessing import Pool
 
def doubler(number):
    return number * 2
 
if __name__ == '__main__':
    numbers = [5, 10, 20]
    pool = Pool(processes=3)
    print(pool.map(doubler, numbers))
  • You can also get the result of your process in a pool by using the apply_async method:
from multiprocessing import Pool
 
def doubler(number):
    return number * 2
 
if __name__ == '__main__':
    pool = Pool(processes=3)
    result = pool.apply_async(doubler, (25,))
    print(result.get(timeout=1))  #  timeout:don’t want it to block indefinitely after all

进程间通信(Process Communication)

from multiprocessing import Process, Queue
 
sentinel = -1
 
def creator(data, q):
    """
    Creates data to be consumed and waits for the consumer
    to finish processing
    """
    print('Creating data and putting it on the queue')
    for item in data:
 
        q.put(item)
 
 
def my_consumer(q):
    """
    Consumes some data and works on it
 
    In this case, all it does is double the input
    """
    while True:
        data = q.get()
        print('data found to be processed: {}'.format(data))
        processed = data * 2
        print(processed)
 
        if data is sentinel:
            break
 
 
if __name__ == '__main__':
    q = Queue()
    data = [5, 10, 13, -1]
    process_one = Process(target=creator, args=(data, q))
    process_two = Process(target=my_consumer, args=(q,))
    process_one.start()
    process_two.start()
 
    q.close()
    q.join_thread()
 
    process_one.join()
    process_two.join()

注意

在windows中使用时需加入freeze_support,解释请看这里

在Unix/Linux下,multiprocessing模块封装了fork()调用,使我们不需要关注fork()的细节。由于Windows没有fork调用,因此,multiprocessing需要“模拟”出fork的效果,父进程所有Python对象都必须通过pickle序列化再传到子进程去,所有,如果multiprocessing在Windows下调用失败了,要先考虑是不是pickle失败了。

import platform
from multiprocessing import freeze_support

sysstr = platform.system()
if sysstr == "Windows":
    freeze_support()

参考:
https://dzone.com/articles/python-201-a-multiprocessing-tutorial
freeze_support
廖雪峰-多进程

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值