任务调度程序

题目:一个XXX系统,有多个数据来源,需要每个一段时间去数据提供方拉取数据,每次拉取有个超时上限,如果超时kill掉并且记录到日志。因为拉取数据的需求经常变,并且每个数据源变的需求都不一样,所以拉取操作希望能用脚本语言比如php实现,而调度进程>无要求。

解:调度进程用c++,php脚本接收两个参数超时时间和任务号,php脚本用sleep模拟拉取数据和处理数据操作。

c++调度程序:

#include <stdlib.h>
#include <unistd.h>
#include <vector>
#include <sys/time.h>
#include <sys/types.h>
#include <signal.h>
#include <string>
#include <fcntl.h>
#include <stdlib.h>
#include <sys/time.h>
#include <sys/wait.h>
#include <signal.h>
#include <algorithm>
#include <string.h>
//所有的异常先忽略
using namespace std;


//任务结构体,开始时间、超时时间
typedef struct{
    int id;
    unsigned begin;
    unsigned duration;
} Task;


static int gStartSec;


//测试20s周期即可
const unsigned timeRound = 20;
Task tasks[] = {
    {1,1,11}, 
    {2,5,12},
    {3,6,11}, 
    {4,8,14},
    {5,10,14},
    {6,12,12},
    {7,13,11},
    {8,15,13},
    {9,16,12},
    {10,16,12},
    {11,17,11},
    {12,18,11},
    {13,19,13},
    {14,19,13} 
};  
    
//大循环应该sleep的时间,就是下一个有超时或启动任务的时间
inline unsigned getInterval(const vector<vector<unsigned>> &timeMap,unsigned now){
    unsigned interval = 0;
    do{
        if(now == timeMap.size() - 1)
            now = 0;
        else
            now++;
        interval++;
    }while(timeMap[now].size() == 0);
    return interval;
}


//log函数,上不了网,先用c方法写入文件了
void toLog(const string &str){
    static int fd = -1;
    struct timeval tv;
    gettimeofday(&tv,NULL);
    if(fd == -1)
        fd = open("mainlog",O_WRONLY|O_APPEND|O_CLOEXEC|O_APPEND|O_CREAT,S_IRWXU|S_IRWXG|S_IRWXO);


    //write(fd,(to_string(tv.tv_sec - gStartSec) + ":" + str + "\n").c_str(),str.size());
    string log = to_string(tv.tv_sec - gStartSec) + ":" + str;
    write(fd,log.c_str(),log.size());
}


//每个任务当前执行进程pid数组,0说明没有该编号任务在执行
vector<pid_t> pids(sizeof(tasks) / sizeof(Task),0);


void child_handler(int signo,siginfo_t *psi,void *pv){
    pid_t pid = psi->si_pid;
    int status;
    string log;
    //僵尸done
    waitpid(pid,&status,WNOHANG);


    //防止多个子进程同时挂掉发信号用循环
    while(pid > 0){
        vector<pid_t>::iterator iter = find(pids.begin(),pids.end(),pid);
        log = to_string(pid) + " done,exit status: " + to_string(WEXITSTATUS(status));
        if(iter != pids.end()){
            *iter = 0;
            log += ",taskNum:" + to_string(iter - pids.begin() + 1);
        }
        log += "\n";
        toLog(log);
        pid = waitpid(-1,&status,WNOHANG);
    }
}


int main(){
    //lastTime为了防止本秒任务被重复执行
    unsigned i = 0,j,k,taskNum,interval,lastTime = timeRound * 2;
    int status;
    string log;
    vector<vector<unsigned>> timeMap(timeRound);
    pid_t pid;
    struct itimerval iv;
    struct timeval tv;
    gettimeofday(&tv,NULL);
    gStartSec = tv.tv_sec;


    struct sigaction sa;
    memset(&sa,0,sizeof(sa));
    sa.sa_sigaction = child_handler;
    //sleep可能会用SIGALRM实现
    sigemptyset(&(sa.sa_mask));
    sigaddset(&(sa.sa_mask),SIGALRM);
    sa.sa_flags = SA_SIGINFO;
    //安装sigchld处理程序
    sigaction(SIGCHLD,&sa,NULL);


    //初始化timeMap“事件时间图”,执行事件以0为个位数,超时事件以1为个位数
    while(i < sizeof(tasks) / sizeof(Task)){
        timeMap[tasks[i].begin].push_back(i * 10);
        timeMap[(tasks[i].begin + tasks[i].duration) % timeRound].push_back(i * 10 + 1);
        i++;
    }
    //bool roundOne = true;


    while(true){
        gettimeofday(&tv,NULL);
        i = tv.tv_sec % 20;
        if(lastTime != i && timeMap[i].size() != 0){
            j = 0;
            while(j < timeMap[i].size()){
                k = timeMap[i][j];
                taskNum = k / 10;


                //应该开始
                if(k % 10 == 0){
                    //新的都要开始了老的竟然没有执行完!!!
                    if(pids[taskNum] != 0){
                        //还活着没?是不是子进程?免得误伤
                        if(waitpid(pids[taskNum],&status,WNOHANG) == 0){
                            kill(pids[taskNum],SIGKILL);
                            log = string() + to_string(tasks[taskNum].id) + " lasted too long and killed\n";
                            toLog(log);
                        }
                        pids[taskNum] = 0;
                    }
                    if((pid = fork()) == 0){
                        if(execl("/usr/bin/php","php","/home/ubuntu/liu3/work/sleep.php",to_string(tasks[taskNum].duration).c_str(),to_string(tasks[taskNum].id).c_str(),NULL) == -1)
                            perror(NULL);
                        //should never go there
                        exit(0);
                    }
                    pids[taskNum] = pid;
                    log = string() + to_string(tasks[taskNum].id) + " started, pid = " + to_string(pid) + "\n";
                    toLog(log);
                }
                //应该结束
                else if(k % 10 == 1){
                    //重复操作可以define成宏
                    if(pids[taskNum] != 0){
                        if(waitpid(pids[taskNum],&status,WNOHANG) == 0){
                            kill(pids[taskNum],SIGKILL);
                            log = string() + to_string(tasks[taskNum].id) + " lasted too long and killed\n";
                            toLog(log);
                        }
                        pids[taskNum] = 0;
                    }
                }
                j++;
            }
            lastTime = i;
        }
        interval = getInterval(timeMap,i);
        log = string("now i = ") + to_string(i) + ",will sleep " + to_string(interval) + "\n";
        toLog(log);
        //sleep被唤醒可能是子进程结束,也可能是sleep足时间有新事件要处理被唤醒
        sleep(interval);
    }
}

php:

<?php
global $argv;
$time = $argv[1];
$taskId = $argv[2];
//假设time >= 20
//随机小概率超时
$sleepTime = $time + rand(-10,1);
$fd = fopen("log","a+");
fwrite($fd,"php:$taskId:".(time() - 1499855185).':'.posix_getpid().":$time:$sleepTime\n");
sleep($sleepTime);

//随机小概率返回执行失败返回码验证父进程处理sigchld
if(rand(1,10) < 2)
        exit(1);
exit(0);


测试结果,大致的测试和解说:

php:11:73312:28261:11:9
php:12:73313:28262:11:4
php:14:73314:28266:13:8
php:13:73314:28265:13:14        //编号13的任务执行超时,后面应该被kill
php:1:73316:28268:11:12         //编号1的任务本该执行11秒,现在随机到执行12秒
php:2:73320:28272:12:10
php:3:73321:28273:11:12         //任务3也应该被kill,在第20秒
php:4:73323:28274:14:8          //四号任务8秒后应该寿终正寝
php:5:73325:28276:14:12
php:6:73327:28278:12:12
php:7:73328:28279:11:6
php:8:73330:28280:13:10
php:10:73331:28282:12:2
php:9:73331:28281:12:7
php:11:73332:28283:11:10
php:12:73333:28284:11:8
php:14:73334:28289:13:13
php:13:73334:28288:13:12
php:1:73336:28291:11:4  //被kill的任务1 20秒后重启
php:2:73340:28292:12:5
php:3:73341:28293:11:5
php:4:73343:28296:14:7
php:5:73345:28299:14:10
php:6:73347:28302:12:7
php:7:73348:28304:11:12
php:8:73350:28305:13:11
php:10:73351:28307:12:7
php:9:73351:28306:12:11
php:11:73352:28308:11:5
php:12:73353:28309:11:12
php:14:73354:28313:13:5
php:13:73354:28312:13:11
php:1:73356:28330:11:4



0:11 started, pid = 28261
0:now i = 17,will sleep 1
1:12 started, pid = 28262
1:now i = 18,will sleep 1
2:13 started, pid = 28265
2:14 started, pid = 28266
2:now i = 19,will sleep 2
4:1 started, pid = 28268        //第四秒编号1的任务执行
4:now i = 1,will sleep 1
5:now i = 2,will sleep 2
5:28262 done,exit status: 0,taskNum:12
5:now i = 2,will sleep 2
7:now i = 4,will sleep 1
8:2 started, pid = 28272
8:now i = 5,will sleep 1
9:28261 done,exit status: 0,taskNum:11
9:3 started, pid = 28273        //应该执行到第20秒正常退出
9:now i = 6,will sleep 2
10:28266 done,exit status: 1,taskNum:14
10:now i = 7,will sleep 1
11:4 started, pid = 28274       //四号应该在19秒正常退出
11:now i = 8,will sleep 1
12:now i = 9,will sleep 1
13:5 started, pid = 28276
13:now i = 10,will sleep 2
15:1 lasted too long and killed         //11秒以后编号为1的任务还没执行完被kill掉
15:6 started, pid = 28278
15:13 lasted too long and killed
15:now i = 12,will sleep 1
15:28268 done,exit status: 0    //编号1任务如愿被kill,至于status为0,to do
15:28265 done,exit status: 0    //同理编号为13的,提前两秒执行的任务也在15秒被kill
15:now i = 12,will sleep 1
16:7 started, pid = 28279
16:now i = 13,will sleep 2
18:8 started, pid = 28280
18:now i = 15,will sleep 1
18:28272 done,exit status: 0,taskNum:2
18:now i = 15,will sleep 1
19:9 started, pid = 28281
19:10 started, pid = 28282
19:now i = 16,will sleep 1
19:28274 done,exit status: 0,taskNum:4  //四号正常退出
19:now i = 16,will sleep 1
20:3 lasted too long and killed         //任务3超时被kill
20:11 started, pid = 28283
20:now i = 17,will sleep 1
20:28273 done,exit status: 0
20:now i = 17,will sleep 1
21:12 started, pid = 28284
21:now i = 18,will sleep 1
21:28282 done,exit status: 0,taskNum:10
21:now i = 18,will sleep 1
22:28279 done,exit status: 0,taskNum:7
22:13 started, pid = 28288
22:14 started, pid = 28289
22:now i = 19,will sleep 2
24:1 started, pid = 28291       //被kill的1在第二个轮回如约重启
24:now i = 1,will sleep 1
25:28276 done,exit status: 0,taskNum:5
25:now i = 2,will sleep 2
26:28281 done,exit status: 0,taskNum:9
26:now i = 3,will sleep 1
27:28278 done,exit status: 0,taskNum:6
27:now i = 4,will sleep 1

算接触过的程序里比较麻烦点的。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值