KafkaControllerChannelManager
在多个brokers中,有一个brokers会诶选举为controller Leader,负责管理整个集群中所有的分区和副本的状态。例如,当分区的leader副本出现故障时,由controller负责为该分区的重新选举新的leader副本,isr集合发生变化时,由controller通知其他brokers更新MetadataCache信息。介绍controller之前,先了解kafka在zookeeper上注册的节点的含义:
/brokers/ids:记录了集群中可用的Broker的id
/brokers/topics/[topic]/partitions/[partition_id]/state: 记录了某Partition的Leader副本的坐在的brokerID、lead_epoch、ISR集合、ZKVersion等信息
/controller: 当前controllerleader的id,同时也用于controller Leader的选举。
/admin/reassign_partitions: 记录了需要进行副本重新分配的分区
/admin/preferred_replica_election: 记录了需要进行“优先副本”选举的分区。"优先副本"是创建分区时为其制定的第一个副本。
/admin/delete_topics: 记录了待删除的topic
/isr_change_notification: 记录了一段时间内ISR集合发生变化的分区
KafkaController是Zookeeper和kafka集群交互的桥梁,它一方面对zk进行监听,其中包括Broker写入到zk中的数据,也包括管理员使用脚本写入的数据;另一方面根据ZK中的数据变化做出相应的处理,通过LeaderAndISRRequest、StopReplicaRequest、UpdateMetadataRequest等请求控制每个Broker的工作。
KafkaChannelManager
leader通过发送多种请求管理集群中的其他broker,KafkaController使用KafkaChannelManager管理其与集群中各个Broker之间的网络交互。KafkaChannelManager中使用ControllerBrokerStateInfo类表示与一个Broker连接的各种信息。
case class ControllerBrokerStateInfo(networkClient: NetworkClient,//维护Controller和对应Broker通信的网络连接
brokerNode: Node,//对应Broker的网络位置信息
messageQueue: BlockingQueue[QueueItem],// 缓冲队列,存放了发往对应Broker的请求
requestSendThread: RequestSendThread)//发送请求的线程
requestSendThread继承了ShutdownableThread,在线程停止之前会循环执行doWork()方法,通过NetWorkClientBlockingOps完成发送请求并阻塞等待响应。
override def doWork(): Unit = {
def backoff(): Unit = CoreUtils.swallowTrace(Thread.sleep(300))
val QueueItem(apiKey, apiVersion, request, callback) = queue.take()
import NetworkClientBlockingOps._
var clientResponse: ClientResponse = null
try {
lock synchronized {
var isSendSuccessful = false
while (isRunning.get() && !isSendSuccessful) {
// 当Broker宕机后,会触发Zookeeper的监听器调用removeBroker()方法把他的线程停止,在停止前会一直重试
// if a broker goes down for a long time, then at some point the controller's zookeeper listener will trigger a
// removeBroker which will invoke shutdown() on this thread. At that point, we will stop retrying.
try {
if (!brokerReady()) {// 阻塞等待符合发送条件
isSendSuccessful = false
backoff()//退避三秒
}
else {//创建ClientRequest对象
val requestHeader = apiVersion.fold(networkClient.nextRequestHeader(apiKey))(networkClient.nextRequestHeader(apiKey, _))
val send = new RequestSend(brokerNode.idString, requestHeader, request.toStruct)
val clientRequest = new ClientRequest(time.milliseconds(), true, send, null)
clientResponse = networkClient.blockingSendAndReceive(clientRequest)(time)
isSendSuccessful = true
}
} catch {
case e: Throwable => // if the send was not successful, reconnect to broker and resend the message
warn(("Controller %d epoch %d fails to send request %s to broker %s. " +
"Reconnecting to broker.").format(controllerId, controllerContext.epoch,
request.toString, brokerNode.toString()), e)
networkClient.close(brokerNode.idString)
isSendSuccessful = false
backoff()
}
}
if (clientResponse != null) {
//检测请求类型,Controller只能发送LeaderAndISRRequest、StopReplicaRequest、UpdateMetadataRequest请求
//用messageQueue封装回调函数
val response = ApiKeys.forId(clientResponse.request.request.header.apiKey) match {
case ApiKeys.LEADER_AND_ISR => new LeaderAndIsrResponse(clientResponse.responseBody)
case ApiKeys.STOP_REPLICA => new StopReplicaResponse(clientResponse.responseBody)
case ApiKeys.UPDATE_METADATA_KEY => new UpdateMetadataResponse(clientResponse.responseBody)
case apiKey => throw new KafkaException(s"Unexpected apiKey received: $apiKey")
}
stateChangeLogger.trace("Controller %d epoch %d received response %s for a request sent to broker %s"
.format(controllerId, controllerContext.epoch, response.toString, brokerNode.toString))
if (callback != null) {
callback(response)
}
}
}
} catch {
case e: Throwable =>
error("Controller %d fails to send a request to broker %s".format(controllerId, brokerNode.toString()), e)
// If there is any socket error (eg, socket timeout), the connection is no longer usable and needs to be recreated.
networkClient.close(brokerNode.idString)
}
}
KafkaChannelManager.addNewBroker()方法和removeBroker()方法实现了对brokerStateInfo集合的管理。
private def addNewBroker(broker: Broker) {
val messageQueue = new LinkedBlockingQueue[QueueItem]//创建消息队列
debug("Controller %d trying to connect to broker %d".format(config.brokerId, broker.id))
val brokerEndPoint = broker.getBrokerEndPoint(config.interBrokerSecurityProtocol)
val brokerNode = new Node(broker.id, brokerEndPoint.host, brokerEndPoint.port)
val networkClient = {//创建NetworkClient对象
val channelBuilder = ChannelBuilders.create(
config.interBrokerSecurityProtocol,
Mode.CLIENT,
LoginType.SERVER,
config.values,
config.saslMechanismInterBrokerProtocol,
config.saslInterBrokerHandshakeRequestEnable
)
val selector = new Selector(
NetworkReceive.UNLIMITED,
config.connectionsMaxIdleMs,
metrics,
time,
"controller-channel",
Map("broker-id" -> broker.id.toString).asJava,
false,
channelBuilder
)
new NetworkClient(
selector,
new ManualMetadataUpdater(Seq(brokerNode).asJava),
config.brokerId.toString,
1,
0,
Selectable.USE_DEFAULT_BUFFER_SIZE,
Selectable.USE_DEFAULT_BUFFER_SIZE,
config.requestTimeoutMs,
time
)
}
val threadName = threadNamePrefix match {
case None => "Controller-%d-to-broker-%d-send-thread".format(config.brokerId, broker.id)
case Some(name) => "%s:Controller-%d-to-broker-%d-send-thread".format(name, config.brokerId, broker.id)
}
val requestThread = new RequestSendThread(config.brokerId, controllerContext, messageQueue, networkClient,
brokerNode, config, time, threadName)
requestThread.setDaemon(false)
//填充brokerStateInfo集合
brokerStateInfo.put(broker.id, new ControllerBrokerStateInfo(networkClient, brokerNode, messageQueue, requestThread))
}
private def removeExistingBroker(brokerState: ControllerBrokerStateInfo) {
try {
brokerState.networkClient.close()//关闭底层连接
brokerState.messageQueue.clear()//清空队列
brokerState.requestSendThread.shutdown()//关闭发送请求的线程
brokerStateInfo.remove(brokerState.brokerNode.id)//清除brokerStateInfo对象
} catch {
case e: Throwable => error("Error while removing broker by the controller", e)
}
}
def sendRequest(brokerId: Int, apiKey: ApiKeys, apiVersion: Option[Short], request: AbstractRequest, callback: AbstractRequestResponse => Unit = null) {
brokerLock synchronized {
val stateInfoOpt = brokerStateInfo.get(brokerId)
stateInfoOpt match {
case Some(stateInfo) =>
//放到一个队列里面缓存
stateInfo.messageQueue.put(QueueItem(apiKey, apiVersion, request, callback))
case None =>
warn("Not sending request %s to broker %d, since it is offline.".format(request, brokerId))
}
}
}