题目:一个XXX系统,有多个数据来源,需要每个一段时间去数据提供方拉取数据,每次拉取有个超时上限,如果超时kill掉并且记录到日志。因为拉取数据的需求经常变,并且每个数据源变的需求都不一样,所以拉取操作希望能用脚本语言比如php实现,而调度进程>无要求。
php:
算接触过的程序里比较麻烦点的。
解:调度进程用c++,php脚本接收两个参数超时时间和任务号,php脚本用sleep模拟拉取数据和处理数据操作。
c++调度程序:
#include <stdlib.h>
#include <unistd.h>
#include <vector>
#include <sys/time.h>
#include <sys/types.h>
#include <signal.h>
#include <string>
#include <fcntl.h>
#include <stdlib.h>
#include <sys/time.h>
#include <sys/wait.h>
#include <signal.h>
#include <algorithm>
#include <string.h>
//所有的异常先忽略
using namespace std;
//任务结构体,开始时间、超时时间
typedef struct{
int id;
unsigned begin;
unsigned duration;
} Task;
static int gStartSec;
//测试20s周期即可
const unsigned timeRound = 20;
Task tasks[] = {
{1,1,11},
{2,5,12},
{3,6,11},
{4,8,14},
{5,10,14},
{6,12,12},
{7,13,11},
{8,15,13},
{9,16,12},
{10,16,12},
{11,17,11},
{12,18,11},
{13,19,13},
{14,19,13}
};
//大循环应该sleep的时间,就是下一个有超时或启动任务的时间
inline unsigned getInterval(const vector<vector<unsigned>> &timeMap,unsigned now){
unsigned interval = 0;
do{
if(now == timeMap.size() - 1)
now = 0;
else
now++;
interval++;
}while(timeMap[now].size() == 0);
return interval;
}
//log函数,上不了网,先用c方法写入文件了
void toLog(const string &str){
static int fd = -1;
struct timeval tv;
gettimeofday(&tv,NULL);
if(fd == -1)
fd = open("mainlog",O_WRONLY|O_APPEND|O_CLOEXEC|O_APPEND|O_CREAT,S_IRWXU|S_IRWXG|S_IRWXO);
//write(fd,(to_string(tv.tv_sec - gStartSec) + ":" + str + "\n").c_str(),str.size());
string log = to_string(tv.tv_sec - gStartSec) + ":" + str;
write(fd,log.c_str(),log.size());
}
//每个任务当前执行进程pid数组,0说明没有该编号任务在执行
vector<pid_t> pids(sizeof(tasks) / sizeof(Task),0);
void child_handler(int signo,siginfo_t *psi,void *pv){
pid_t pid = psi->si_pid;
int status;
string log;
//僵尸done
waitpid(pid,&status,WNOHANG);
//防止多个子进程同时挂掉发信号用循环
while(pid > 0){
vector<pid_t>::iterator iter = find(pids.begin(),pids.end(),pid);
log = to_string(pid) + " done,exit status: " + to_string(WEXITSTATUS(status));
if(iter != pids.end()){
*iter = 0;
log += ",taskNum:" + to_string(iter - pids.begin() + 1);
}
log += "\n";
toLog(log);
pid = waitpid(-1,&status,WNOHANG);
}
}
int main(){
//lastTime为了防止本秒任务被重复执行
unsigned i = 0,j,k,taskNum,interval,lastTime = timeRound * 2;
int status;
string log;
vector<vector<unsigned>> timeMap(timeRound);
pid_t pid;
struct itimerval iv;
struct timeval tv;
gettimeofday(&tv,NULL);
gStartSec = tv.tv_sec;
struct sigaction sa;
memset(&sa,0,sizeof(sa));
sa.sa_sigaction = child_handler;
//sleep可能会用SIGALRM实现
sigemptyset(&(sa.sa_mask));
sigaddset(&(sa.sa_mask),SIGALRM);
sa.sa_flags = SA_SIGINFO;
//安装sigchld处理程序
sigaction(SIGCHLD,&sa,NULL);
//初始化timeMap“事件时间图”,执行事件以0为个位数,超时事件以1为个位数
while(i < sizeof(tasks) / sizeof(Task)){
timeMap[tasks[i].begin].push_back(i * 10);
timeMap[(tasks[i].begin + tasks[i].duration) % timeRound].push_back(i * 10 + 1);
i++;
}
//bool roundOne = true;
while(true){
gettimeofday(&tv,NULL);
i = tv.tv_sec % 20;
if(lastTime != i && timeMap[i].size() != 0){
j = 0;
while(j < timeMap[i].size()){
k = timeMap[i][j];
taskNum = k / 10;
//应该开始
if(k % 10 == 0){
//新的都要开始了老的竟然没有执行完!!!
if(pids[taskNum] != 0){
//还活着没?是不是子进程?免得误伤
if(waitpid(pids[taskNum],&status,WNOHANG) == 0){
kill(pids[taskNum],SIGKILL);
log = string() + to_string(tasks[taskNum].id) + " lasted too long and killed\n";
toLog(log);
}
pids[taskNum] = 0;
}
if((pid = fork()) == 0){
if(execl("/usr/bin/php","php","/home/ubuntu/liu3/work/sleep.php",to_string(tasks[taskNum].duration).c_str(),to_string(tasks[taskNum].id).c_str(),NULL) == -1)
perror(NULL);
//should never go there
exit(0);
}
pids[taskNum] = pid;
log = string() + to_string(tasks[taskNum].id) + " started, pid = " + to_string(pid) + "\n";
toLog(log);
}
//应该结束
else if(k % 10 == 1){
//重复操作可以define成宏
if(pids[taskNum] != 0){
if(waitpid(pids[taskNum],&status,WNOHANG) == 0){
kill(pids[taskNum],SIGKILL);
log = string() + to_string(tasks[taskNum].id) + " lasted too long and killed\n";
toLog(log);
}
pids[taskNum] = 0;
}
}
j++;
}
lastTime = i;
}
interval = getInterval(timeMap,i);
log = string("now i = ") + to_string(i) + ",will sleep " + to_string(interval) + "\n";
toLog(log);
//sleep被唤醒可能是子进程结束,也可能是sleep足时间有新事件要处理被唤醒
sleep(interval);
}
}
php:
<?php
global $argv;
$time = $argv[1];
$taskId = $argv[2];
//假设time >= 20
//随机小概率超时
$sleepTime = $time + rand(-10,1);
$fd = fopen("log","a+");
fwrite($fd,"php:$taskId:".(time() - 1499855185).':'.posix_getpid().":$time:$sleepTime\n");
sleep($sleepTime);
//随机小概率返回执行失败返回码验证父进程处理sigchld
if(rand(1,10) < 2)
exit(1);
exit(0);
测试结果,大致的测试和解说:
php:11:73312:28261:11:9
php:12:73313:28262:11:4
php:14:73314:28266:13:8
php:13:73314:28265:13:14 //编号13的任务执行超时,后面应该被kill
php:1:73316:28268:11:12 //编号1的任务本该执行11秒,现在随机到执行12秒
php:2:73320:28272:12:10
php:3:73321:28273:11:12 //任务3也应该被kill,在第20秒
php:4:73323:28274:14:8 //四号任务8秒后应该寿终正寝
php:5:73325:28276:14:12
php:6:73327:28278:12:12
php:7:73328:28279:11:6
php:8:73330:28280:13:10
php:10:73331:28282:12:2
php:9:73331:28281:12:7
php:11:73332:28283:11:10
php:12:73333:28284:11:8
php:14:73334:28289:13:13
php:13:73334:28288:13:12
php:1:73336:28291:11:4 //被kill的任务1 20秒后重启
php:2:73340:28292:12:5
php:3:73341:28293:11:5
php:4:73343:28296:14:7
php:5:73345:28299:14:10
php:6:73347:28302:12:7
php:7:73348:28304:11:12
php:8:73350:28305:13:11
php:10:73351:28307:12:7
php:9:73351:28306:12:11
php:11:73352:28308:11:5
php:12:73353:28309:11:12
php:14:73354:28313:13:5
php:13:73354:28312:13:11
php:1:73356:28330:11:4
0:11 started, pid = 28261
0:now i = 17,will sleep 1
1:12 started, pid = 28262
1:now i = 18,will sleep 1
2:13 started, pid = 28265
2:14 started, pid = 28266
2:now i = 19,will sleep 2
4:1 started, pid = 28268 //第四秒编号1的任务执行
4:now i = 1,will sleep 1
5:now i = 2,will sleep 2
5:28262 done,exit status: 0,taskNum:12
5:now i = 2,will sleep 2
7:now i = 4,will sleep 1
8:2 started, pid = 28272
8:now i = 5,will sleep 1
9:28261 done,exit status: 0,taskNum:11
9:3 started, pid = 28273 //应该执行到第20秒正常退出
9:now i = 6,will sleep 2
10:28266 done,exit status: 1,taskNum:14
10:now i = 7,will sleep 1
11:4 started, pid = 28274 //四号应该在19秒正常退出
11:now i = 8,will sleep 1
12:now i = 9,will sleep 1
13:5 started, pid = 28276
13:now i = 10,will sleep 2
15:1 lasted too long and killed //11秒以后编号为1的任务还没执行完被kill掉
15:6 started, pid = 28278
15:13 lasted too long and killed
15:now i = 12,will sleep 1
15:28268 done,exit status: 0 //编号1任务如愿被kill,至于status为0,to do
15:28265 done,exit status: 0 //同理编号为13的,提前两秒执行的任务也在15秒被kill
15:now i = 12,will sleep 1
16:7 started, pid = 28279
16:now i = 13,will sleep 2
18:8 started, pid = 28280
18:now i = 15,will sleep 1
18:28272 done,exit status: 0,taskNum:2
18:now i = 15,will sleep 1
19:9 started, pid = 28281
19:10 started, pid = 28282
19:now i = 16,will sleep 1
19:28274 done,exit status: 0,taskNum:4 //四号正常退出
19:now i = 16,will sleep 1
20:3 lasted too long and killed //任务3超时被kill
20:11 started, pid = 28283
20:now i = 17,will sleep 1
20:28273 done,exit status: 0
20:now i = 17,will sleep 1
21:12 started, pid = 28284
21:now i = 18,will sleep 1
21:28282 done,exit status: 0,taskNum:10
21:now i = 18,will sleep 1
22:28279 done,exit status: 0,taskNum:7
22:13 started, pid = 28288
22:14 started, pid = 28289
22:now i = 19,will sleep 2
24:1 started, pid = 28291 //被kill的1在第二个轮回如约重启
24:now i = 1,will sleep 1
25:28276 done,exit status: 0,taskNum:5
25:now i = 2,will sleep 2
26:28281 done,exit status: 0,taskNum:9
26:now i = 3,will sleep 1
27:28278 done,exit status: 0,taskNum:6
27:now i = 4,will sleep 1
算接触过的程序里比较麻烦点的。