python实现多个协程协作

最新推荐文章于 2024-10-16 11:07:31 发布

hyalone2010

最新推荐文章于 2024-10-16 11:07:31 发布

阅读量880

点赞数

CC 4.0 BY-SA版权

分类专栏： python

本文链接：https://blog.youkuaiyun.com/hyalone2010/article/details/40477151

python 专栏收录该内容

9 篇文章

订阅专栏

本文介绍了一个简单的多协程协作处理框架，该框架适用于处理包含分支和合并的数据流程，尤其当使用Hadoop、Spark或Storm等工具显得过于复杂时。通过定义不同的工作函数和它们之间的交互方式，开发者可以轻松地构建复杂的数据处理管道。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

有时需要做类似流处理的程序，对于存在分支、合并的处理函数式编程不很适合，用hadoop、spark、storm等工具可能又没太大必要。

做个简单的多协程协作处理框架，基于处理的模型，处理逻辑可以任意发挥。

#!/usr/bin/python
#coding:utf8

import sys
from collections import deque

try:
    from enum import Enum
except:
    class Enum(object):
        def __init__(self,name,keys):
            for i,key in enumerate(keys.split()):
                setattr(self,key,i+1)

#其他文件import本模块时需要from cowork import cowork,MessageType
MessageType = Enum('MessageType', 'Start Data Request Close Stop')

#让多个函数协作运行
def cowork(config,variables=None):
    #获得函数名称、工作函数对象、创建各协程对象
    fname = sys._getframe().f_code.co_name
    if not variables:
        variables = getvars()
    orignalworkers = [variables[name] for name in config]
    doworkers = dict([(worker.__name__,workerengine(worker,config)) for worker in orignalworkers])
    #创建消息队列、启动各协程、把启动消息放入消息队列
    messagebuffer = deque()
    for name,doworker in doworkers.items():
        doworker.next()
        messagebuffer.append((name,fname,MessageType.Start,None))
    #循环从消息队列取消息、发给协程，协程有返回消息的话放入消息队列
    while len(messagebuffer) > 0:
        target,source,cmd,info = messagebuffer.popleft()
        if debug:
            sys.stderr.write( "to send " + str((target,source,cmd,info)) + "\n" )
        messages = doworkers[target].send((target,source,cmd,info))
        [messagebuffer.append(message) for message in messages if message]
    #完成后发停止消息
    for name,doworker in doworkers.items():
        doworker.send((name,fname,MessageType.Start,None))

debug = 0

#在其他文件import，变量空间不同需要cowork的第二个参数，如果setvarfunc后就不需第二个参数
getvars = None
def setvarfunc(f):
    global getvars
    getvars = f
setvarfunc(lambda x=vars() : x) #可以把本行copy到其他文件中，调用cowork时只提供config参数即可

#调用worker的引擎，缺省实现了一些功能，worker只需实现必要的处理
def workerengine(worker,config):
    #获得worker名称、对应配置、消息来源、发消息去向
    fname = worker.__name__
    workconfig = config.get(fname,{})
    sourcenames = workconfig.get("requestfrom",[])
    targetnames = workconfig.get("sendto",[])
    result = []
    while True:
        #接受消息
        target, sender, cmd, info = (yield result)
        result = []
        #调用worker，worker返回需要发送的消息数组，如果返回None采用缺省处理
        messages = worker(workconfig, cmd, info, sender)
        if messages != None:
            #把消息条件到要返回的消息数组里
            for receiver,cmd,info in messages:
                result.append((receiver,fname,cmd,info))
        elif cmd == MessageType.Stop:
            #结束时退出循环
            break
        else:
            if cmd == MessageType.Start:
                #开始时，如果是不发出消息的worker，则向源发消息请求
                if len(targetnames) == 0:
                    for sourcename in sourcenames:
                        result.append((sourcename,fname,MessageType.Request,None))
            elif cmd == MessageType.Close:
                #收到关闭某个源的消息，将其从接受消息的源表中去除
                sourcenames.remove(sender)
            elif cmd == MessageType.Request:
                #收到消息请求，向源发出消息请求
                for sourcename in sourcenames:
                    result.append((sourcename,fname,MessageType.Request,None))
            elif cmd == MessageType.Data:
                #收到消息时，转发给后面的接受者或打印到屏幕
                if len(targetnames) > 0:
                    for targetname in targetnames:
                        result.append((targetname,fname,MessageType.Data,info))
                else:
                   print info,
                #转发后再向前请求消息
                for sourcename in sourcenames:
                    result.append((sourcename,fname,MessageType.Request,None))

#从文件中读入数据，开始时打开文件、申请消息时读入返回消息、结束时关闭文件
def fromfile(workconfig, cmd, info, sender):
    res = None
    if cmd == MessageType.Start:
        inputfiles = workconfig.get("inputfile",[])
        if len(inputfiles) > 0:
            workconfig["inputs"] = dict([(name,iter(open(name))) for name in inputfiles])
        else:
            workconfig["inputs"] = dict([("stdin",sys.stdin)])
    elif cmd == MessageType.Stop:
        [input.close() for input in workconfig["inputs"]]
    elif cmd == MessageType.Request:
        res = []
        for name,input in workconfig["inputs"].items():
            try:
                line = input.next()
                for targetname in workconfig.get("sendto",[]):
                    res.append((targetname,MessageType.Data,line))
            except Exception,e:
                input.close()
                del input
        if not workconfig["inputs"]:
            res.append((targetname,MessageType.Close,line))
    return res

#透传，不需做什么
def transfer(workconfig, cmd, info, sender):
    return None

#把结果打印到屏幕，收到消息时print
def printer(workconfig, cmd, info, sender):
    #if cmd == MessageType.Data:
    #    print info,
    return None

#把结果写入文件，开始时打开文件、收到消息时写入、结束时关闭文件
def tofile(workconfig, cmd, info, sender):
    res = None
    if cmd == MessageType.Start:
        workconfig["outputs"] = dict([(name,open(name,"w")) for name in workconfig.get("outputfile",[])])
    elif cmd == MessageType.Stop:
        [output.close() for output in workconfig["outputs"]]
    elif cmd == MessageType.Data:
        [output.write(info) for name,output in workconfig["outputs"].items()]
        res = []
    return res

if __name__ == '__main__':
    infilenames = [sys.argv[1]] if len(sys.argv)>1 else []
    outfilenames = [sys.argv[2]] if len(sys.argv)>2 else []
    config = {
        "fromfile" : {
            "inputfile" : infilenames,
            "sendto" : ["transfer"]
        },
        "transfer" : {
            "requestfrom" : ["fromfile"],
            "sendto" : ["tofile","printer"]
        },
        "tofile" : {
            "requestfrom" : ["transfer"],
            "outputfile" : outfilenames
        },
        "printer" : {
            #"requestfrom" : ["transfer"]
        },
    }
    cowork(config,vars())

测试

输入文件：

cat test.txt

1 3
4 5
8 9

执行命令：

python cowork.py test.txt test_copy.txt

1 3
4 5
8 9

同时生成test_copy.txt

计算拓扑图（后面出现多个分支时，后面节点不要重复给前面多次发送requst，只有一个节点发送即可）：