首先看一条kafka cube构建语句
curl -X PUT –user ADMIN:KYLIN -H “Content-Type: application/json;charset=utf-8” -d ‘{ “sourceOffsetStart”: 0,”sourceOffsetEnd”: 9223372036854775807,”buildType”: “BUILD”}’ http://99.48.2.1:7070/kylin/api/cubes/cubename/build2
源码入口CubeController
/**
* Build/Rebuild a cube segment by source offset
*/
@RequestMapping(value = "/{cubeName}/build2", method = { RequestMethod.PUT }, produces = { "application/json" })
@ResponseBody
public JobInstance build2(@PathVariable String cubeName, @RequestBody JobBuildRequest2 req) {
boolean existKafkaClient = false;
...
return rebuild2(cubeName, req);
}
private JobInstance buildInternal(String cubeName, TSRange tsRange, SegmentRange segRange, //
Map<Integer, Long> sourcePartitionOffsetStart, Map<Integer, Long> sourcePartitionOffsetEnd,
String buildType, boolean force) {
return jobService.submitJob(cube, tsRange, segRange, sourcePartitionOffsetStart, sourcePartitionOffsetEnd,
CubeBuildTypeEnum.valueOf(buildType), force, submitter);
}
这里出现了jobService,我们进入看看
public class JobService extends BasicService implements InitializingBean {
看到InitializingBean自然想到了afterPropertiesSet(),方法里面做了什么呢?
@SuppressWarnings("unchecked")
@Override
public void afterPropertiesSet() throws Exception {
...
new Thread(new Runnable() {
@Override
public void run() {
try {
scheduler.init(new JobEngineConfig(kylinConfig), new ZookeeperJobLock());
if (!scheduler.hasStarted()) {
logger.info("scheduler has not been started");
}
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}).start();
...
}
这里启动了一个调度器线程,跟进去看init方法,这里面很重要。
@Override
public synchronized void init(JobEngineConfig jobEngineConfig, JobLock jobLock) throws SchedulerException {
String serverMode = jobEngineConfig.getConfig().getServerMode();
if (!("job".equals(serverMode.toLowerCase()) || "all".equals(serverMode.toLowerCase()))) {
logger.info("server mode: " + serverMode + ", no need to run job scheduler");
return;
}
logger.info("Initializing Job Engine ....");
if (!initialized) {
initialized = true;
} else {
return;
}
this.jobEngineConfig = jobEngineConfig;
this.jobLock = (DistributedLock) jobLock;
this.serverName = this.jobLock.getClient(); // the lock's client string contains node name of this server
executableManager = ExecutableManager.getInstance(jobEngineConfig.getConfig());
//load all executable, set them to a consistent status
fetcherPool = Executors.newScheduledThreadPool(1);
//watch the zookeeper node change, so that when one job server is down, other job servers can take over.
watchPool = Executors.newFixedThreadPool(1);
WatcherProcessImpl watcherProcess = new WatcherProcessImpl(this.serverName);
lockWatch = this.jobLock.watchLocks(getWatchPath(), watchPool, watcherProcess);
int corePoolSize = jobEngineConfig.getMaxConcurrentJobLimit();
jobPool = new ThreadPoolExecutor(corePoolSize, corePoolSize, Long.MAX_VALUE, TimeUnit.DAYS,
new SynchronousQueue<Runnable>());
context = new DefaultContext(Maps.<String, Executable> newConcurrentMap(), jobEngineConfig.getConfig());
int pollSecond = jobEngineConfig.getPollIntervalSecond();
logger.info("Fetching jobs every {} seconds", pollSecond);
fetcher = new FetcherRunner();
fetcherPool.scheduleAtFixedRate(fetcher, pollSecond / 10, pollSecond, TimeUnit.SECONDS);
hasStarted = true;
resumeAllRunningJobs();
}
方法里会判断服务器配置的模式,如果不是job或者all就o了,否者继续。接下来的是系统设计的关键,kylin会起主要的几个线程池,为后面的系统
cube构建做准备
先看fetcherPool,pollSecond / 10这里是轮询待构建的job时间间隔,参数为kylin.job.scheduler.poll-interval-second,默认30/10
scheduleAtFixedRate(fetcher, pollSecond / 10, pollSecond, TimeUnit.SECONDS);
这个方法传入了FetcherRunner,自然要想到这里面的run()方法
private class FetcherRunner implements Runnable {
@Override
synchronized public void run() {
try {
Map<String, Executable> runningJobs = context.getRunningJobs();
if (runningJobs.size() >= jobEngineConfig.getMaxConcurrentJobLimit()) {
logger.warn("There are too many jobs running, Job Fetch will wait until next schedule time");
return;
}
int nRunning = 0, nOtherRunning = 0, nReady = 0, nOthers = 0;
for (final String id : executableManager.getAllJobIds()) {
if (runningJobs.containsKey(id)) {
nRunning++;
continue;
}
final Output output = executableManager.getOutput(id);
if ((output.getState() != ExecutableState.READY)) {
if (output.getState() == ExecutableState.RUNNING) {
nOtherRunning++;
} else {
nOthers++;
}
continue;
}
nReady++;
final AbstractExecutable executable = executableManager.getJob(id);
try {
jobPool.execute(new JobRunner(executable));
} catch (Exception ex) {
logger.warn(executable.toString() + " fail to schedule in server: " + serverName, ex);
}
}
...
}
写了不少,细节就不说了,直接看jobPool.execute(new JobRunner(executable))这个方法的JobRunner。
@Override
public void run() {
...
context.addRunningJob(executable);
segmentWithLocks.add(segmentId);
executable.execute(context);
...
}
这里我们看看execute方法
@Override
public final ExecuteResult execute(ExecutableContext executableContext) throws ExecuteException {
...
try {
result = doWork(executableContext);
} catch (Throwable e) {
logger.error("error running Executable: " + this.toString());
exception = e;
}
retry++;
...
}
我们看doWork(executableContext),具体是哪个类呢MapReduceExecutable这就是我们要找的类?
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
final String mapReduceJobClass = getMapReduceJobClass();
Preconditions.checkNotNull(mapReduceJobClass);
try {
Job job;
ExecutableManager mgr = getManager();
Configuration conf = new Configuration(HadoopUtil.getCurrentConfiguration());
String[] jobArgs = overwriteJobConf(conf, context.getConfig(), getMapReduceParams().trim().split("\\s+"));
final Map<String, String> extra = mgr.getOutput(getId()).getExtra();
if (extra.containsKey(ExecutableConstants.MR_JOB_ID)) {
job = new Cluster(conf).getJob(JobID.forName(extra.get(ExecutableConstants.MR_JOB_ID)));
logger.info("mr_job_id:" + extra.get(ExecutableConstants.MR_JOB_ID) + " resumed");
} else {
final Constructor<? extends AbstractHadoopJob> constructor = ClassUtil
.forName(mapReduceJobClass, AbstractHadoopJob.class).getConstructor();
final AbstractHadoopJob hadoopJob = constructor.newInstance();
hadoopJob.setConf(conf);
hadoopJob.setAsync(true); // so the ToolRunner.run() returns right away
logger.info("parameters of the MapReduceExecutable: {}", getMapReduceParams());
try {
//这里是重点,hadoop tool的run方法是MapReduce的入口
hadoopJob.run(jobArgs);
if (hadoopJob.isSkipped()) {
return new ExecuteResult(ExecuteResult.State.SUCCEED, "skipped");
}
} catch (Exception ex) {
StringBuilder log = new StringBuilder();
logger.error("error execute " + this.toString(), ex);
StringWriter stringWriter = new StringWriter();
ex.printStackTrace(new PrintWriter(stringWriter));
log.append(stringWriter.toString()).append("\n");
log.append("result code:").append(2);
return new ExecuteResult(ExecuteResult.State.ERROR, log.toString(), ex);
}
job = hadoopJob.getJob();
}
final StringBuilder output = new StringBuilder();
final HadoopCmdOutput hadoopCmdOutput = new HadoopCmdOutput(job, output);
JobStepStatusEnum status = JobStepStatusEnum.NEW;
while (!isDiscarded() && !isPaused()) {
JobStepStatusEnum newStatus = HadoopJobStatusChecker.checkStatus(job, output);
if (status == JobStepStatusEnum.KILLED) {
mgr.updateJobOutput(getId(), ExecutableState.ERROR, hadoopCmdOutput.getInfo(), "killed by admin");
return new ExecuteResult(ExecuteResult.State.FAILED, "killed by admin");
}
if (status == JobStepStatusEnum.WAITING && (newStatus == JobStepStatusEnum.FINISHED
|| newStatus == JobStepStatusEnum.ERROR || newStatus == JobStepStatusEnum.RUNNING)) {
final long waitTime = System.currentTimeMillis() - getStartTime();
setMapReduceWaitTime(waitTime);
}
mgr.addJobInfo(getId(), hadoopCmdOutput.getInfo());
status = newStatus;
if (status.isComplete()) {
final Map<String, String> info = hadoopCmdOutput.getInfo();
readCounters(hadoopCmdOutput, info);
mgr.addJobInfo(getId(), info);
if (status == JobStepStatusEnum.FINISHED) {
return new ExecuteResult(ExecuteResult.State.SUCCEED, output.toString());
} else {
return ExecuteResult.createFailed(new MapReduceException(output.toString()));
}
}
Thread.sleep(context.getConfig().getYarnStatusCheckIntervalSeconds() * 1000L);
}
// try to kill running map-reduce job to release resources.
if (job != null) {
try {
job.killJob();
} catch (Exception e) {
logger.warn("failed to kill hadoop job: " + job.getJobID(), e);
}
}
if (isDiscarded()) {
return new ExecuteResult(ExecuteResult.State.DISCARDED, output.toString());
} else {
return new ExecuteResult(ExecuteResult.State.STOPPED, output.toString());
}
} catch (ReflectiveOperationException e) {
logger.error("error getMapReduceJobClass, class name:" + getParam(KEY_MR_JOB), e);
return ExecuteResult.createError(e);
} catch (Exception e) {
logger.error("error execute " + this.toString(), e);
return ExecuteResult.createError(e);
}
}
hadoopJob.run(jobArgs);
接着往下看AbstractHadoopJob然后找到KafkaFlatTableJob这是具体运行MapReduce的入口。
public class KafkaFlatTableJob extends AbstractHadoopJob {
public abstract class AbstractHadoopJob extends Configured implements Tool {
Run a Hadoop Job to process the stream data in kafka;
KafkaFlatTableJob实现了hadoop接口Tool,我们直接看run这个这个方法入口
public int run(String[] args) throws Exception {
//初始化job实例
job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));
//设置job对应的参数
job.setMapperClass(KafkaFlatTableMapper.class);
//这里是重点,为什么实现KafkaInputFormat类
job.setInputFormatClass(KafkaInputFormat.class);
job.setOutputKeyClass(BytesWritable.class);
job.setOutputValueClass(Text.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setNumReduceTasks(0);
}
//打成jar包的hadoop mapreduce入口
public static void main(String[] args) throws Exception {
KafkaFlatTableJob job = new KafkaFlatTableJob();
int exitCode = ToolRunner.run(job, args);
System.exit(exitCode);
}
KafkaInputFormat是kylin实现kafka适配hadoop hdfs。
public class KafkaInputFormat extends InputFormat