上一小节我们分析到客户端发送过来的请求都被封装成Request对象存储到了RequestChannel的requestQueue队列里了,这一小节我们看一下Server端是如何处理队列里的Request的?
/**
* Start up API for bringing up a single instance of the Kafka server.
* Instantiates the LogManager, the SocketServer and the request handlers - KafkaRequestHandlers
*/
def startup() {
try {
. . . 略
//这个地方是之前我们看到的启动SocketServe服务的地方
//现在SocketServer服务已经启动了,也并且接受到了请求数据了
//我们这次分析的是如何处理接收到的这些数据,所以我们代码要接着往下看。
//TODO 启动socketserver服务
socketServer = new SocketServer(config, metrics, kafkaMetricsTime)
socketServer.startup()
. . . 略
/* start processing requests */
//我们看上面的源码作者给的注释 就很清楚,这儿就是处理接收到的请求的。
apis = new KafkaApis(socketServer.requestChannel, replicaManager, adminManager, groupCoordinator,
kafkaController, zkUtils, config.brokerId, config, metadataCache, metrics, authorizer, quotaManagers, clusterId)
//在Scala里面,new 一个对象就会导致这个类的主构造函数运行,
//在Scala的代码里,经常会遇到源码作者把初始化的操作放到主构造函数里。
//所以这儿我们看一下KafkaRequestHandlerPool的主构造函数。
requestHandlerPool = new KafkaRequestHandlerPool(config.brokerId, socketServer.requestChannel, apis, config.numIoThreads)
. . . 略
}
}
catch {
case e: Throwable =>
fatal("Fatal error during KafkaServer startup. Prepare to shutdown", e)
isStartingUp.set(false)
shutdown()
throw e
}
}
分析一下KafkaRequestHandlerPool的主构造函数:
class KafkaRequestHandlerPool(val brokerId: Int,
val requestChannel: RequestChannel,
val apis: KafkaApis,
numThreads: Int) extends Logging with KafkaMetricsGroup {
/* a meter to track the average free capacity of the request handlers */
private val aggregateIdleMeter = newMeter("RequestHandlerAvgIdlePercent", "percent", TimeUnit.NANOSECONDS)
this.logIdent = "[Kafka Request Handler on Broker " + brokerId + "], "
val threads = new Array[Thread](numThreads)
val runnables = new Array[KafkaRequestHandler](numThreads)
//创建numThreads个KafkaRequestHandler线程并启动
//默认numThreads数是8,由num.io.threads参数控制
for(i <- 0 until numThreads) {
runnables(i) = new KafkaRequestHandler(i, brokerId, aggregateIdleMeter, numThreads, requestChannel, apis)
threads(i) = Utils.daemonThread("kafka-request-handler-" + i, runnables(i))
threads(i).start()
}
def shutdown() {
info("shutting down")
for(handler <- runnables)
handler.shutdown
for(thread <- threads)
thread.join
info("shut down completely")
}
}
我们接着在看KafkaRequestHandler线程的run方法,看启动起来到底是要做什么?
def run() {
while(true) {
try {
var req : RequestChannel.Request = null
while (req == null) {
// We use a single meter for aggregate idle percentage for the thread pool.
// Since meter is calculated as total_recorded_value / time_window and
// time_window is independent of the number of threads, each recorded idle
// time should be discounted by # threads.
val startSelectTime = SystemTime.nanoseconds
// 这儿就是不断的从队列里面获取Request
req = requestChannel.receiveRequest(300)
val idleTime = SystemTime.nanoseconds - startSelectTime
aggregateIdleMeter.mark(idleTime / totalHandlerThreads)
}
if(req eq RequestChannel.AllDone) {
debug("Kafka request handler %d on broker %d received shut down command".format(
id, brokerId))
return
}
req.requestDequeueTimeMs = SystemTime.milliseconds
trace("Kafka request handler %d on broker %d handling request %s".format(id, brokerId, req))
//TODO 对该请求处理
apis.handle(req)
} catch {
case e: Throwable => error("Exception when handling request", e)
}
}
}
我们看到启动的这8个线程就是去获取队列里面的Request的。我们看到每获取到一个Request都交给了KafkaApis处理。继续跟踪下去看:
def handle(request: RequestChannel.Request) {
try {
trace("Handling request:%s from connection %s;securityProtocol:%s,principal:%s".
format(request.requestDesc(true), request.connectionId, request.securityProtocol, request.session.principal))
ApiKeys.forId(request.requestId) match {
// 处理生产者的请求
case ApiKeys.PRODUCE => handleProducerRequest(request)
case ApiKeys.FETCH => handleFetchRequest(request)
. . . 略
继续跟踪 handleProducerRequest(request)方法
/**
* Handle a produce request
*/
def handleProducerRequest(request: RequestChannel.Request) {
val produceRequest = request.body.asInstanceOf[ProduceRequest]
val numBytesAppended = request.header.sizeOf + produceRequest.sizeOf
val (existingAndAuthorizedForDescribeTopics, nonExistingOrUnauthorizedForDescribeTopics) = produceRequest.partitionRecords.asScala.partition {
case (topicPartition, _) => authorize(request.session, Describe, new Resource(auth.Topic, topicPartition.topic)) && metadataCache.contains(topicPartition.topic)
}
val (authorizedRequestInfo, unauthorizedForWriteRequestInfo) = existingAndAuthorizedForDescribeTopics.partition {
case (topicPartition, _) => authorize(request.session, Write, new Resource(auth.Topic, topicPartition.topic))
}
// the callback for sending a produce response
// 第二步
def sendResponseCallback(responseStatus: Map[TopicPartition, PartitionResponse]) {
val mergedResponseStatus = responseStatus ++
unauthorizedForWriteRequestInfo.mapValues(_ =>
new PartitionResponse(Errors.TOPIC_AUTHORIZATION_FAILED.code, -1, Message.NoTimestamp)) ++
nonExistingOrUnauthorizedForDescribeTopics.mapValues(_ =>
new PartitionResponse(Errors.UNKNOWN_TOPIC_OR_PARTITION.code, -1, Message.NoTimestamp))
var errorInResponse = false
mergedResponseStatus.foreach { case (topicPartition, status) =>
if (status.errorCode != Errors.NONE.code) {
errorInResponse = true
debug("Produce request with correlation id %d from client %s on partition %s failed due to %s".format(
request.header.correlationId,
request.header.clientId,
topicPartition,
Errors.forCode(status.errorCode).exceptionName))
}
}
// 第四步
def produceResponseCallback(delayTimeMs: Int) {
if (produceRequest.acks == 0) {
// no operation needed if producer request.required.acks = 0; however, if there is any error in handling
// the request, since no response is expected by the producer, the server will close socket server so that
// the producer client will know that some error has happened and will refresh its metadata
if (errorInResponse) {
val exceptionsSummary = mergedResponseStatus.map { case (topicPartition, status) =>
topicPartition -> Errors.forCode(status.errorCode).exceptionName
}.mkString(", ")
info(
s"Closing connection due to error during produce request with correlation id ${request.header.correlationId} " +
s"from client id ${request.header.clientId} with ack=0\n" +
s"Topic and partition to exceptions: $exceptionsSummary"
)
requestChannel.closeConnection(request.processor, request)
} else {
requestChannel.noOperation(request.processor, request)
}
} else {
val respHeader = new ResponseHeader(request.header.correlationId)
val respBody = request.header.apiVersion match {
case 0 => new ProduceResponse(mergedResponseStatus.asJava)
case version@(1 | 2) => new ProduceResponse(mergedResponseStatus.asJava, delayTimeMs, version)
// This case shouldn't happen unless a new version of ProducerRequest is added without
// updating this part of the code to handle it properly.
case version => throw new IllegalArgumentException(s"Version `$version` of ProduceRequest is not handled. Code must be updated.")
}
// 第五步
requestChannel.sendResponse(new RequestChannel.Response(request, new ResponseSend(request.connectionId, respHeader, respBody)))
}
}
// When this callback is triggered, the remote API call has completed
request.apiRemoteCompleteTimeMs = SystemTime.milliseconds
// 第三步
quotas.produce.recordAndMaybeThrottle(
request.session.sanitizedUser,
request.header.clientId,
numBytesAppended,
produceResponseCallback)
}
if (authorizedRequestInfo.isEmpty)
sendResponseCallback(Map.empty)
else {
val internalTopicsAllowed = request.header.clientId == AdminUtils.AdminClientId
// Convert ByteBuffer to ByteBufferMessageSet
val authorizedMessagesPerPartition = authorizedRequestInfo.map {
case (topicPartition, buffer) => (topicPartition, new ByteBufferMessageSet(buffer))
}
// call the replica manager to append messages to the replicas
// 第一步:写入数据
replicaManager.appendMessages(
produceRequest.timeout.toLong,
produceRequest.acks,
internalTopicsAllowed,
authorizedMessagesPerPartition,
sendResponseCallback)
// if the request is put into the purgatory, it will have a held reference
// and hence cannot be garbage collected; hence we clear its data here in
// order to let GC re-claim its memory since it is already appended to log
produceRequest.clearPartitionRecords()
}
}
整一下了一下代码调用顺序,如下:
上面代码跟踪到第五步,点击如下
/** Send a response back to the socket server to be sent over the network */
def sendResponse(response: RequestChannel.Response) {
//TODO 把响应装到responseQueues队列里
responseQueues(response.processor).put(response)
for(onResponse <- responseListeners)
onResponse(response.processor)
}
我们发现是将response最终添加到了一个阻塞队列里面
// new Array[BlockingQueue[RequestChannel.Response]](numProcessors)
// 也就是说每个Processor都对应一个BlockingQueue 阻塞队列。
private val responseQueues = new Array[BlockingQueue[RequestChannel.Response]](numProcessors)
每个线程处理了请求先把数据写到了磁盘,写完磁盘以后封装了Response响应,接着把响应封装到了3个ResponseQueue队列里面,一个ResponseQueue对应一个Processor线程。现在返回给客户端的响应已经存储到了队列了。