1.processEvent方法
//接口继承关系 private[scheduler] sealed trait JobSchedulerEvent private[scheduler] case class JobStarted(job: Job, startTime: Long) extends JobSchedulerEvent private[scheduler] case class JobCompleted(job: Job, completedTime: Long) extends JobSchedulerEvent private[scheduler] case class ErrorReported(msg: String, e: Throwable) extends JobSchedulerEvent
private def processEvent(event: JobSchedulerEvent) {
try {
event match {
//开始监听作业时间,统计开始时间
case JobStarted(job, startTime) => handleJobStart(job, startTime)
//结束时间信息统计
case JobCompleted(job, completedTime) => handleJobCompletion(job, completedTime)
//持有的loack全部采用condition.signalAll方式进行提交
case ErrorReported(m, e) => handleError(m, e)
}
} catch {
case e: Throwable =>
reportError("Error in job scheduler", e)
}
}
1.1 handleJobStart方法
private def handleJobStart(job: Job, startTime: Long) {
//设置hash表值
val jobSet = jobSets.get(job.time)
//判断开始时间是否是set的第一个,如果小于0就是第一个
val isFirstJobOfJobSet = !jobSet.hasStarted
//开始设置开始时间
jobSet.handleJobStart(job)
//是第一个
if (isFirstJobOfJobSet) {
//将事件放置到阻塞队里中
listenerBus.post(StreamingListenerBatchStarted(jobSet.toBatchInfo))
}
//设置开始时间
job.setStartTime(startTime)
listenerBus.post(StreamingListenerOutputOperationStarted(job.toOutputOperationInfo))
logInfo("Starting job " + job.id + " from job set of time " + jobSet.time)
}
1.2 handleJobCompletion方法
private def handleJobCompletion(job: Job, completedTime: Long) {
val jobSet = jobSets.get(job.time)
jobSet.handleJobCompletion(job)
job.setEndTime(completedTime)
listenerBus.post(StreamingListenerOutputOperationCompleted(job.toOutputOperationInfo))
logInfo("Finished job " + job.id + " from job set of time " + jobSet.time)
if (jobSet.hasCompleted) {
listenerBus.post(StreamingListenerBatchCompleted(jobSet.toBatchInfo))
}
job.result match {
case Failure(e) =>
reportError("Error running job " + job, e)
case _ =>
if (jobSet.hasCompleted) {
//jobset没有元素了移除jobset的时间
jobSets.remove(jobSet.time)
//设置clearmetadata时间
jobGenerator.onBatchCompletion(jobSet.time)
//后面就是时间计算。
logInfo("Total delay: %.3f s for time %s (execution: %.3f s)".format(
jobSet.totalDelay / 1000.0, jobSet.time.toString,
jobSet.processingDelay / 1000.0
))
}
}
2. start方法
def start(): Unit = synchronized {
if (eventLoop != null) return // scheduler has already been started
logDebug("Starting JobScheduler")
//这行代码很巧妙,EventLoop是一个抽象类,new对象的时候首先是初始化变量,会初始化一个EventLoop的线程而这个线程,而这个线程又不停的从阻塞队里中取元素,然后调用子类的onReceive方法。
eventLoop = new EventLoop[JobSchedulerEvent]("JobScheduler") {
override protected def onReceive(event: JobSchedulerEvent): Unit = processEvent(event)
override protected def onError(e: Throwable): Unit = reportError("Error in job scheduler", e)
}
//后台监视事件,前台来开始线程。这个函数注意有一个onStart方法主要用来保证onReceive在开始之后才跑,因为这个事件队里是阻塞式的事件队列,当没有元素的时候是不会开始循环调onReceive方法的。然后真正将后台的线程开始启动起来。
eventLoop.start()
// attach rate controllers of input streams to receive batch completion updates
for {
//输入流
inputDStream <- ssc.graph.getInputStreams
//跟踪消费速度
rateController <- inputDStream.rateController
}
//放到一个copyonwrite的集合里,为啥?当然是实时可以看到【准确】图像啦
ssc.addStreamingListener(rateController)
listenerBus.start()
//初始化:
receiverTracker = new ReceiverTracker(ssc)
inputInfoTracker = new InputInfoTracker(ssc)
val executorAllocClient: ExecutorAllocationClient = ssc.sparkContext.schedulerBackend match {
case b: ExecutorAllocationClient => b.asInstanceOf[ExecutorAllocationClient]
case _ => null
}
//主要用于管理分配给StreamingContext的executor,动态请求,杀死执行器。可收缩可扩展,不错啊。val averageBatchProcTime = //batchProcTimeSum / batchProcTimeCount
//val ratio = averageBatchProcTime.toDouble / batchDurationMs
//if (ratio >= scalingUpRatio) {
// logDebug("Requesting executors")
// val numNewExecutors = math.max(math.round(ratio).toInt, 1)
/**这种情况EXCUTOR会被kill掉**/
//if (ratio <= scalingDownRatio) {
// logDebug("Killing executors")
// killExecutor()
//}
//怎么调?
//private val scalingUpRatio = conf.getDouble(SCALING_UP_RATIO_KEY, SCALING_UP_RATIO_DEFAULT)
//SCALING_UP_RATIO_KEY的值是啥?
// SCALING_UP_RATIO_KEY =“spark.streaming.dynamicAllocation.scalingUpRatio”的值
// val SCALING_UP_RATIO_DEFAULT = 0.9 默认的值
//private val scalingDownRatio = conf.getDouble(SCALING_DOWN_RATIO_KEY, SCALING_DOWN_RATIO_DEFAULT)
// SCALING_DOWN_RATIO_KEY ="spark.streaming.dynamicAllocation.scalingDownRatio"的值
// val SCALING_DOWN_RATIO_DEFAULT = 0.3
//private val minNumExecutors = conf.getInt(MIN_EXECUTORS_KEY,math.max(1, receiverTracker.numReceivers(这个值//是receiverInputStreams数组的大小)))
// val MIN_EXECUTORS_KEY = "spark.streaming.dynamicAllocation.minExecutors"
//private val maxNumExecutors = conf.getInt(MAX_EXECUTORS_KEY, Integer.MAX_VALUE)
// val MAX_EXECUTORS_KEY = "spark.streaming.dynamicAllocation.maxExecutors"
//private val timer = new RecurringTimer(clock, scalingIntervalSecs * 1000,
// _ => manageAllocation(), "streaming-executor-allocation-manager")
//设置周期多少时间去调用manageAllocation这个修改excutor的函数
//val SCALING_INTERVAL_KEY = "spark.streaming.dynamicAllocation.scalingInterval"
// val SCALING_INTERVAL_DEFAULT_SECS = 60
//如果你要动态分配val ENABLED_KEY = "spark.streaming.dynamicAllocation.enabled"这个设置为true并且实例个数不要设置
// val numExecutor = conf.getInt("spark.executor.instances", 0)
// val streamingDynamicAllocationEnabled = conf.getBoolean(ENABLED_KEY, false)
// if (numExecutor != 0 && streamingDynamicAllocationEnabled) {
executorAllocationManager = ExecutorAllocationManager.createIfEnabled(
executorAllocClient,
receiverTracker,
ssc.conf,
ssc.graph.batchDuration.milliseconds,
clock)
executorAllocationManager.foreach(ssc.addStreamingListener)
// endpoint = ssc.env.rpcEnv.setupEndpoint(
// "ReceiverTracker", new ReceiverTrackerEndpoint(ssc.env.rpcEnv))
//if (!skipReceiverLaunch) launchReceivers()分发到各个节点上,怎么发的下次再写
receiverTracker.start()
//同样道理EventLoop
jobGenerator.start()
executorAllocationManager.foreach(_.start())//这里的start方法如果配置了动态的就动态调excutor数量了
logInfo("Started JobScheduler")
}
3.submitJobSet方法
// listenerBus.post(StreamingListenerBatchSubmitted(jobSet.toBatchInfo))
// jobSets.put(jobSet.time, jobSet)
// jobSet.jobs.foreach(job => jobExecutor.execute(new JobHandler(job)))
// logInfo("Added jobs for time " + jobSet.time)
jobExecutor其实就是一个线程池而已
//private val jobExecutor =ThreadUtils.newDaemonFixedThreadPool(numConcurrentJobs, "streaming-job-executor")
// private val numConcurrentJobs = ssc.conf.getInt("spark.streaming.concurrentJobs", 1)默认作业数量是1
4.JobHandler这个线程
private class JobHandler(job: Job) extends Runnable with Logging {
import JobScheduler._
def run() {
//获取配置信息
val oldProps = ssc.sparkContext.getLocalProperties
try {
ssc.sparkContext.setLocalProperties(SerializationUtils.clone(ssc.savedProperties.get()))
val formattedTime = UIUtils.formatBatchTime(
job.time.milliseconds, ssc.graph.batchDuration.milliseconds, showYYYYMMSS = false)
val batchUrl = s"/streaming/batch/?id=${job.time.milliseconds}"
val batchLinkText = s"[output operation ${job.outputOpId}, batch time ${formattedTime}]"
ssc.sc.setJobDescription(
s"""Streaming job from <a href="$batchUrl">$batchLinkText</a>""")
val BATCH_TIME_PROPERTY_KEY = "spark.streaming.internal.batchTime"
val OUTPUT_OP_ID_PROPERTY_KEY = "spark.streaming.internal.outputOpId"
ssc.sc.setLocalProperty(BATCH_TIME_PROPERTY_KEY, job.time.milliseconds.toString)
ssc.sc.setLocalProperty(OUTPUT_OP_ID_PROPERTY_KEY, job.outputOpId.toString)
//检查checkpoint血缘关系被截断。
ssc.sparkContext.setLocalProperty(RDD.CHECKPOINT_ALL_MARKED_ANCESTORS, "true")
var _eventLoop = eventLoop
if (_eventLoop != null) {
//监听到事件准备开始啦
_eventLoop.post(JobStarted(job, clock.getTimeMillis()))
PairRDDFunctions.disableOutputSpecValidation.withValue(true) {
//开始啦
job.run()
}
_eventLoop = eventLoop
if (_eventLoop != null) {
_eventLoop.post(JobCompleted(job, clock.getTimeMillis()))
}
} else {
// JobScheduler has been stopped.
}
} finally {
ssc.sparkContext.setLocalProperties(oldProps)
}
}
}
本文深入探讨Spark Streaming中JobScheduler的工作原理,包括事件处理、作业开始与完成的管理、错误报告处理,以及如何通过JobScheduler启动和管理作业执行。文章还详细介绍了JobScheduler中的关键方法如processEvent、handleJobStart、handleJobCompletion等,并解释了如何动态调整executor的数量。
1435

被折叠的 条评论
为什么被折叠?



