KafkaServer.startup -> replicaManager.startup()
定时调度的线程
scheduler.schedule("isr-expiration", maybeShrinkIsr, period = config.replicaLagTimeMaxMs, unit = TimeUnit.MILLISECONDS)
-> partition.maybeShrinkIsr
private def tryCompleteDelayedRequests() {
val requestKey = new TopicPartitionOperationKey(this.topic, this.partitionId)
replicaManager.tryCompleteDelayedFetch(requestKey)
replicaManager.tryCompleteDelayedProduce(requestKey)
}
def maybeShrinkIsr(replicaMaxLagTimeMs: Long) {
val leaderHWIncremented = inWriteLock(leaderIsrUpdateLock) {
leaderReplicaIfLocal() match {
case Some(leaderReplica) =>
//获取到要被移除出去的replica
//TODO 这个就是我们这节课的重点
//面试的时候也有可能会问
//或者对于我们理解kafka ISR机制是有帮助。
val outOfSyncReplicas = getOutOfSyncReplicas(leaderReplica, replicaMaxLagTimeMs)
if(outOfSyncReplicas.nonEmpty) {
//ISR列表里面 减去 要被移除出去的 等于 新的ISR列表
val newInSyncReplicas = inSyncReplicas -- outOfSyncReplicas
assert(newInSyncReplicas.nonEmpty)
info("Shrinking ISR for partition [%s,%d] from %s to %s".format(topic, partitionId,
inSyncReplicas.map(_.brokerId).mkString(","), newInSyncReplicas.map(_.brokerId).mkString(",")))
// update ISR in zk and in cache
//更新ISR列表
updateIsr(newInSyncReplicas)
// we may need to increment high watermark since ISR could be down to 1
replicaManager.isrShrinkRate.mark()
//ISR列表更新了以后,HW的值有可能要发生变化
//HW min(p0,p1,p2)LEO p0=100 p1=110 p2=80 HW=80
//HW min(p0,p1)LEO HW=100
maybeIncrementLeaderHW(leaderReplica)
} else {
false
}
case None => false // do nothing if no longer leader
}
}
// some delayed operations may be unblocked after HW changed
if (leaderHWIncremented)
tryCompleteDelayedRequests()
}
getOutOfSyncReplicas()
def getOutOfSyncReplicas(leaderReplica: Replica, maxLagMs: Long): Set[Replica] = {
/**
* there are two cases that will be handled here -
* 1. Stuck followers: If the leo of the replica hasn't been updated for maxLagMs ms,
* the follower is stuck and should be removed from the ISR
* 2. Slow followers: If the replica has not read up to the leo within the last maxLagMs ms,
* then the follower is lagging and should be removed from the ISR
* Both these cases are handled by checking the lastCaughtUpTimeMs which represents
* the last time when the replica was fully caught up. If either of the above conditions
* is violated, that replica is considered to be out of sync
*
**/
val leaderLogEndOffset = leaderReplica.logEndOffset
val candidateReplicas = inSyncReplicas - leaderReplica
//过滤延迟的replica
//TODO 移除延迟的replica只有一个条件,至少在咱们看的这个源码里面只有一个条件
//0.10.1.0
// (time.milliseconds - r.lastCaughtUpTimeMs) > maxLagMs
// 当前时间 - 上一次过来同步数据的时间 大于 一个最大延迟时间,就把这个replica
//从ISR列表里面移除出去。
//说明了意思就是,如果一个replica长时间【10秒】没有发送请求到leader partition去同步数据
//那么就从ISR列表里面移除出去。
//TODO 结论:
//如果一个replica 超过10秒没有到leader parttion拉取数据,那么就会从ISR列表里面移除出去。
//ISR(p0,p1,p2)
//leader HW=min(20000,20010) 20000
//HW 值前面的数据。消费者才能看得到。
val laggingReplicas = candidateReplicas.filter(r => (time.milliseconds - r.lastCaughtUpTimeMs) > maxLagMs)
if(laggingReplicas.nonEmpty)
debug("Lagging replicas for partition %s are %s".format(TopicAndPartition(topic, partitionId), laggingReplicas.map(_.brokerId).mkString(",")))
laggingReplicas
}
kafka 副本同步-ISR定时检查线程
最新推荐文章于 2024-07-20 18:08:51 发布