KafkaServer.startup()//TODO 初始化logManager
logManager =createLogManager(zkUtils.zkClient, brokerState)//TODO 启动logManager
logManager.startup()-> createLogManager ->newLogManager(){createAndValidateLogDirs(logDirs)loadLogs()}private def createAndValidateLogDirs(dirs: Seq[File]){//判断是否有重复的目录if(dirs.map(_.getCanonicalPath).toSet.size < dirs.size)thrownewKafkaException("Duplicate log directory found: "+ logDirs.mkString(", "))//遍历所有我们配置的目录for(dir <- dirs){if(!dir.exists){info("Log directory '"+ dir.getAbsolutePath +"' not found, creating it.")//如果我们的代码是第一次进来,那么就需要创建好所有的目录。
val created = dir.mkdirs()if(!created)thrownewKafkaException("Failed to create data directory "+ dir.getAbsolutePath)}if(!dir.isDirectory ||!dir.canRead)thrownewKafkaException(dir.getAbsolutePath +" is not a readable log directory.")}}private def loadLogs(): Unit ={info("Loading logs.")
val startMs = time.milliseconds
val threadPools = mutable.ArrayBuffer.empty[ExecutorService]
val jobs = mutable.Map.empty[File, Seq[Future[_]]]//遍历所有的目录(配置的存储日志的目录)for(dir <-this.logDirs){//为每个目录都创建一个线程池//后面肯定是启动线程池里面的线程去加载Log
val pool = Executors.newFixedThreadPool(ioThreads)
threadPools.append(pool)
val cleanShutdownFile =newFile(dir, Log.CleanShutdownFile)if(cleanShutdownFile.exists){debug("Found clean shutdown file. "+"Skipping recovery for all logs in data directory: "+
dir.getAbsolutePath)}else{// log recovery itself is being performed by `Log` class during initialization
brokerState.newState(RecoveringFromUncleanShutdown)}
var recoveryPoints = Map[TopicAndPartition, Long]()try{
recoveryPoints =this.recoveryPointCheckpoints(dir).read
}catch{case e: Exception =>warn("Error occured while reading recovery-point-offset-checkpoint file of directory "+ dir, e)warn("Resetting the recovery checkpoint to 0")}
val jobsForDir =for{//
dirContent <-Option(dir.listFiles).toList
//TODO 目前这儿的LogDir代表就是一个分区的目录
logDir <- dirContent if logDir.isDirectory
} yield {
CoreUtils.runnable {debug("Loading log '"+ logDir.getName +"'")//TODO 获取分区信息
val topicPartition = Log.parseTopicPartitionName(logDir)
val config = topicConfigs.getOrElse(topicPartition.topic, defaultConfig)
val logRecoveryPoint = recoveryPoints.getOrElse(topicPartition,0L)//TODO 创建一个Log对象。
val current =newLog(logDir, config, logRecoveryPoint, scheduler, time)//TODO 把Log对象放入了logs里面
val previous =this.logs.put(topicPartition, current)if(previous != null){thrownewIllegalArgumentException("Duplicate log directories found: %s, %s!".format(
current.dir.getAbsolutePath, previous.dir.getAbsolutePath))}}}jobs(cleanShutdownFile)= jobsForDir.map(pool.submit).toSeq
}-> logManager.startup()
def startup(){//定时调度了三个任务/* Schedule the cleanup task to delete old logs */if(scheduler != null){info("Starting log cleanup with a period of %d ms.".format(retentionCheckMs))//TODO 1)定时检查文件,清理超时的文件。 cleanupLogs
scheduler.schedule("kafka-log-retention",
cleanupLogs,
delay = InitialTaskDelayMs,
period = retentionCheckMs,
TimeUnit.MILLISECONDS)info("Starting log flusher with a default period of %d ms.".format(flushCheckMs))//TODO 2)定时把内存里面的数据刷写到磁盘 flushDirtyLogs
scheduler.schedule("kafka-log-flusher",
flushDirtyLogs,
delay = InitialTaskDelayMs,
period = flushCheckMs,
TimeUnit.MILLISECONDS)// TODO checkpointRecoveryPointOffsets//定时更新一个检查点的文件//kafka服务有时候会涉及到重启。//我重启应该要恢复哪些数据?//其实这儿会定时更新一个检查点文件 -》服务于Kafka服务重启的时候恢复数据使用。
scheduler.schedule("kafka-recovery-point-checkpoint",
checkpointRecoveryPointOffsets,
delay = InitialTaskDelayMs,
period = flushCheckpointMs,
TimeUnit.MILLISECONDS)}if(cleanerConfig.enableCleaner)
cleaner.startup()}1)定时检查文件,清理超时的文件 cleanupLogs
def cleanupLogs(){debug("Beginning log cleanup...")
var total =0
val startMs = time.milliseconds
//TODO 遍历所有的logfor(log <- allLogs;if!log.config.compact){debug("Garbage collecting '"+ log.name +"'")//删除满足删除条件的文件 时间和大小
total += log.deleteOldSegments()}debug("Log cleanup completed. "+ total +" files deleted in "+(time.milliseconds - startMs)/1000+" seconds")}2)定时把内存里面的数据刷写到磁盘 flushDirtyLogs
private def flushDirtyLogs()={debug("Checking for dirty logs to flush...")for((topicAndPartition, log)<- logs){try{
val timeSinceLastFlush = time.milliseconds - log.lastFlushTime
debug("Checking if flush is needed on "+ topicAndPartition.topic +" flush interval "+ log.config.flushMs +" last flushed "+ log.lastFlushTime +" time since last flush: "+ timeSinceLastFlush)//按照一定的频率//但是我们发现这个频率的阈值 这儿控制,kafka给的是一个long的最大值。//也就是意味kafka这儿是不会主动的把内存里面的数据刷写到磁盘//把内存里面的数据刷写到磁盘这个操作是由 操作系统完成的。//当然也可以自己去配置这个值。if(timeSinceLastFlush >= log.config.flushMs)
log.flush
}catch{case e: Throwable =>error("Error flushing topic "+ topicAndPartition.topic, e)}}}3) 定时更新一个检查点的文件 checkpointRecoveryPointOffsets
def checkpointRecoveryPointOffsets(){this.logDirs.foreach(checkpointLogsInDir)}private def checkpointLogsInDir(dir: File): Unit ={
val recoveryPoints =this.logsByDir.get(dir.toString)if(recoveryPoints.isDefined){this.recoveryPointCheckpoints(dir).write(recoveryPoints.get.mapValues(_.recoveryPoint))}}