基于Scala实现根据指定时间戳开始消费Kafka的数据

本文介绍了如何使用Scala编写一个Util工具类,该类能够从Kafka中消费数据,并根据指定的时间戳开始读取。通过这个工具,用户可以高效地定位并处理特定时间范围内的消息。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

import java.text.SimpleDateFormat
import java.util.Properties
import java.util.concurrent.{Callable, Executors, FutureTask}
import org.apache.kafka.clients.consumer.KafkaConsumer
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.common.serialization.StringDeserializer

import scala.collection.JavaConversions._

/**
  * Create by 写Scala的老刘 2019-05-13 09:43
  */
object GetDataByTimeStamp {
  def main(args: Array[String]): Unit = {
    val topic = "TestTopic"
    val consumerProp = new Properties()
    consumerProp.put("bootstrap.servers", "Ip:port")
    consumerProp.put("group.id", "test")
    consumerProp.put("enable.auto.commit", "false")
    consumerProp.put("auto.commit.interval.ms", "1000")
//kafka权限认证的东西,如果您的kakfa没有配置权限认证,下面三行需要去掉
    System.setProperty("java.security.auth.login.config", "/home/hadoop/Test/config/kafka_client_jaas.conf")
    consumerProp.put("security.protocol", "SASL_PLAINTEXT")
    consumerProp.put("sasl.mechanism", "PLAIN")
    consumerProp.put("key.deserializer", classOf[StringDeserializer].getName)
    //consumerProp.put("value.deserializer", classOf[ByteArrayDeserializer].getName)
    consumerProp.put("value.deserializer", classOf[StringDeserializer].getName)

    val kafkaConsumer = new KafkaConsumer(consumerProp)
//指定时间
    val startTime = "2019-05-06 12:00:00"
    val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    val fetchTime = sdf.parse(startTime).getTime

//Utils工具类,下面会给出
    val startOffset = Utils.getStartOffset(topic, fetchTime, consumerProp)
    //val startOffset=collection.immutable.HashMap[Int,Long]()

    println(startOffset)
//使用多线程的方式  由于kafka是3个分区,这里就创建3个线程即可
    val threadPool = Executors.newFixedThreadPool(3)
    for (i <- 0 to 2) {
      try {

        val future = new FutureTask(new Callable[String] {
          override def call(): String = {
            val tp = new TopicPartition(topic, i)
            kafkaConsumer.assign(java.util.Arrays.asList(tp))
            kafkaConsumer.seek(tp, startOffset.get(i).get)
            while (true) {
            val records=kafkaConsumer.poll(100)
              println("records:"+records.size)
              for(record<- records){
                val data:String=record.value()
               
                println(s"partition:${record.partition()}  offset:${record.offset()} value:$data")
                
              }
            }
            return "i am result"
          }
        })
        threadPool.execute(future)
        println(future.get())
      } finally {
        threadPool.shutdown()
      }
    }
  }
}

Utils工具类:

import java.util
import java.util.{ArrayList, Properties}
import collection.mutable.Map
import org.apache.kafka.clients.consumer.KafkaConsumer
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.common.serialization.{ByteArrayDeserializer, StringDeserializer}
import scala.collection.JavaConversions._

object Utils {
  def getStartOffset(topic:String,fetchDataTime:Long,properties: Properties ): Map[Int, Long] = {
    //记录tp
    val topicPartitions: ArrayList[TopicPartition] = new ArrayList[TopicPartition]
    //记录(topic,分区) --->对应时间戳
    val timestampToSearch: util.Map[TopicPartition, java.lang.Long] = new util.HashMap[TopicPartition, java.lang.Long]()
    //记录分区和他对应的offset
    val partitionOffset = Map[Int, Long]()
    val consumer = new KafkaConsumer[String, Array[Byte]](properties)
    //获取topic的partition信息   可以得到这个topic的所有partition  返回值是一个list[PartitionInfo] [0,1,2,3,4,5,,6,7,8,9]
    val partitionInfos = consumer.partitionsFor(topic)
    //将分区,topic 和时间戳保存到集合中   10个分区,都从fetchDataTime这个时间点开始找offset
    for (partitionInfo <- partitionInfos) {
      val tp = new TopicPartition(partitionInfo.topic(), partitionInfo.partition());
      topicPartitions.add(tp)
      timestampToSearch.put(tp, fetchDataTime)
    }
    //Map<TopicPartition, OffsetAndTimestamp>   根据时间戳找offset   返回的是一个java类型的map
    val topicPartitionOffsetAndTimestampMap = consumer.offsetsForTimes(timestampToSearch)
    //tp 和offset
    for ((tp, offsetAndTimeStamp) <- topicPartitionOffsetAndTimestampMap) {
      val partition = tp.partition()
      //println(offsetAndTimeStamp.timestamp())
      val offset = offsetAndTimeStamp.offset()
      partitionOffset.put(partition, offset)
    }
    consumer.close()
    partitionOffset

  }
}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值