仓库位置
日志生成:https://github.com/SmallScorpion/gmall-mock.git
日志服务器:https://github.com/SmallScorpion/gmall-spark-ch-es-realtime.git
手动提交偏移量
- 第一次读取会加载Redis数据,若是redis中没有数据那么将消费kafka起始数据,经过业务计算保存到redis中
- 中间循环过程是业务到redis保存数据的过程
- 若中途宕机,重启进程服务都会读取redis偏移量位置进行消费
OffsetManager
package com.warehouse.gmall.realtime.util
import java.util
import org.apache.kafka.common.TopicPartition
import redis.clients.jedis.Jedis
import scala.collection.mutable
object OffsetManager {
/**
* 从redis中读取偏移量
* @param topicName
* @param groupId
* @return
*/
def getOffset( topicName: String, groupId: String ): Map[TopicPartition, Long] ={
// Redis => type->hash key->offset:[topic]:[groupid] field->partition_id value->offset
// 存入 -> hmseet offset:GMALL_START:group_dau 0 12 1 15 2 7 3 18
// 取出 -> hgetall offset:GMALL_START:group_dau
val jedis: Jedis = RedisUtil.getJedisClient
val offsetKey = "offset:" + topicName + ":" + groupId
val offsetMap: util.Map[String, String] = jedis.hgetAll(offsetKey)
import scala.collection.JavaConversions._
val kafkaOffsetMap: Map[TopicPartition, Long] = offsetMap.map { case (partitionId, offset) =>
(new TopicPartition(topicName, partitionId.toInt), offset.toLong)
}.toMap
kafkaOffsetMap
}
// TODO 把偏移量写入redis
}
业务代码
val topic: String = "GMALL_SPARK_CK_ES_START"
val groupId = "DAU_GROUP"
// TODO 读取偏移量
val kafkaOffsetMap: Map[TopicPartition, Long] = OffsetManager.getOffset( topic, groupId )
// TODO 消费kafka数据
var recordInputStream: InputDStream[ConsumerRecord[String, String]] = null
if( kafkaOffsetMap != null && kafkaOffsetMap.nonEmpty ) {
recordInputStream = MyKafkaUtil.getKafkaStream( topic, ssc, kafkaOffsetMap, groupId )
} else {
recordInputStream = MyKafkaUtil.getKafkaStream( topic, ssc )
}