kafka偏移量offset--java

最新推荐文章于 2025-06-05 12:50:10 发布

原创最新推荐文章于 2025-06-05 12:50:10 发布 · 5.5k 阅读

1 ·

CC 4.0 BY-SA版权

文章标签：

#kafka偏移量 #offset

kafka 专栏收录该内容

3 篇文章

订阅专栏

本文介绍了一个使用Apache Spark与Kafka集成的应用案例，并详细解释了如何通过Spark Kafka消费者API来配置并管理Kafka集群的消费组偏移量。文章还提供了具体的实现代码，包括如何获取和设置Kafka的开始读取偏移量。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

maven

<dependency>
   <groupId>org.apache.kafka</groupId>
   <artifactId>kafka_2.10</artifactId>
   <version>0.9.0-kafka-2.0.2</version>
</dependency>

private SparkKafka kafka      = null ;

private static final String TOPIC_SOURCE = "TP_LABEL";

public SparkStoredKuduApp(String[] args){

    kafka_conf = KafkaPool.getInstance().getConfig();

    kafka_conf.setProperty("zookeeper_connect", "personas1:2181,personas2:2181,personas4:2181");
    kafka_conf.setProperty("groupid_tdx", "tpsc01"); //tpsc01
    kafka_conf.setProperty("bootstrap.servers", "personas1:9092,personas2:9092,personas4:9092");

    kafka = new SparkKafka(kafkaParams());
    kafka.setTopics(new HashSet<>(Arrays.asList(TOPIC_SOURCE)));
}

private Map<String, String> kafkaParams() {
    Map<String, String> kafkaParams = new HashMap<String, String>();
    kafkaParams.put(ConsumerConfig.GROUP_ID_CONFIG, kafka_conf.getProperty("groupid_tdx"));
    kafkaParams.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, kafka_conf.getProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG));
    kafkaParams.put("zookeeper.connect", kafka_conf.getProperty("zookeeper_connect"));
    return kafkaParams;
}

// 获取kafka开始读取偏移量
Map<TopicAndPartition, Long> fromOffsets = kafka.getOffset();

public class SparkKafka implements Serializable {
   private static final long serialVersionUID = -7633373735487600970L;
   private Map<String, String> kafkaParams = null;
   private Set<String> topics = null;
   private KafkaCluster kafkaCluster = null;

   public SparkKafka(Map<String, String> kafkaParams) {
      this.kafkaParams = kafkaParams;
      init();
   }

   private void init() {
      scala.collection.mutable.Map<String, String> mutableKafkaParam = JavaConversions.mapAsScalaMap(kafkaParams);
      scala.collection.immutable.Map<String, String> immutableKafkaParam = mutableKafkaParam
            .toMap(new Predef.$less$colon$less<Tuple2<String, String>, Tuple2<String, String>>() {
               @Override
               public Tuple2<String, String> apply(Tuple2<String, String> v1) {
                  return v1;
               }
            });
      kafkaCluster = new KafkaCluster(immutableKafkaParam);
   }

   /**
    * 获取kafka offset
    * 
    * @return
    */
   public Map<TopicAndPartition, Long> getOffset() {
      Map<TopicAndPartition, Long> fromOffsets = new HashMap<TopicAndPartition, Long>();

      scala.collection.mutable.Set<String> mutableTopics = JavaConversions.asScalaSet(this.topics);
      scala.collection.immutable.Set<String> immutableTopics = mutableTopics.toSet();
      scala.collection.immutable.Set<TopicAndPartition> scalaTopicAndPartitionSet = kafkaCluster
            .getPartitions(immutableTopics).right().get();

      // 首次消费
      if (kafkaCluster.getConsumerOffsets(kafkaParams.get(ConsumerConfig.GROUP_ID_CONFIG), scalaTopicAndPartitionSet)
            .isLeft()) {
         scala.collection.immutable.Map<TopicAndPartition, LeaderOffset> earliestOffsetsTemp = kafkaCluster
               .getEarliestLeaderOffsets(scalaTopicAndPartitionSet).right().get();
         Set<TopicAndPartition> javaTopicAndPartitionSet = JavaConversions.setAsJavaSet(scalaTopicAndPartitionSet);
         Map<TopicAndPartition, LeaderOffset> earliestOffsets = JavaConversions.mapAsJavaMap(earliestOffsetsTemp);
         for (TopicAndPartition topicAndPartition : javaTopicAndPartitionSet) {
            LeaderOffset latestOffset = earliestOffsets.get(topicAndPartition);
            fromOffsets.put(topicAndPartition, latestOffset.offset());
         }
      } else {
         scala.collection.immutable.Map<TopicAndPartition, LeaderOffset> earliestOffsetsTemp = kafkaCluster
               .getEarliestLeaderOffsets(scalaTopicAndPartitionSet).right().get();
         scala.collection.immutable.Map<TopicAndPartition, Object> consumerOffsetsTemp = kafkaCluster
               .getConsumerOffsets(kafkaParams.get(ConsumerConfig.GROUP_ID_CONFIG), scalaTopicAndPartitionSet)
               .right().get();
         Map<TopicAndPartition, LeaderOffset> earliestOffsets = JavaConversions.mapAsJavaMap(earliestOffsetsTemp);
         Map<TopicAndPartition, Object> consumerOffsets = JavaConversions.mapAsJavaMap(consumerOffsetsTemp);
         Set<TopicAndPartition> javaTopicAndPartitionSet = JavaConversions.setAsJavaSet(scalaTopicAndPartitionSet);
         for (TopicAndPartition topicAndPartition : javaTopicAndPartitionSet) {
            LeaderOffset earliestOffset = earliestOffsets.get(topicAndPartition);
            Long offset = (Long) consumerOffsets.get(topicAndPartition);
            // 如果消费的offset小于leader的earlistOffset，有可能是kafka定时清理已删除该offset文件
            // 这时将过期的offset更新为leader的earlistOffset开始消费，避免offsetOutOfRang异常
            if (offset < earliestOffset.offset()) {
               offset = earliestOffset.offset();
            }
            fromOffsets.put(topicAndPartition, offset);
         }
      }
      return fromOffsets;
   }

   /**
    * 设置kafka offset
    * 
    * @param range
    */
   public void setOffset(HasOffsetRanges range) {
      OffsetRange[] offsets = range.offsetRanges();
      for (OffsetRange o : offsets) {
         // 封装topic.partition 与 offset对应关系 java Map
         TopicAndPartition topicAndPartition = new TopicAndPartition(o.topic(), o.partition());
         Map<TopicAndPartition, Object> topicAndPartitionObjectMap = new HashMap<TopicAndPartition, Object>();
         topicAndPartitionObjectMap.put(topicAndPartition, o.untilOffset());

         // 转换java map to scala immutable.map
         scala.collection.mutable.Map<TopicAndPartition, Object> map = JavaConversions
               .mapAsScalaMap(topicAndPartitionObjectMap);
         scala.collection.immutable.Map<TopicAndPartition, Object> scalatopicAndPartitionObjectMap = map.toMap(
               new Predef.$less$colon$less<Tuple2<TopicAndPartition, Object>, Tuple2<TopicAndPartition, Object>>() {
                  private static final long serialVersionUID = 1L;

                  public Tuple2<TopicAndPartition, Object> apply(Tuple2<TopicAndPartition, Object> v1) {
                     return v1;
                  }
               });

         // 更新offset到kafkaCluster
         kafkaCluster.setConsumerOffsets(kafkaParams.get(ConsumerConfig.GROUP_ID_CONFIG),
               scalatopicAndPartitionObjectMap);
      }
   }

   @SuppressWarnings("unchecked")
   public static Class<MessageAndMetadata<String, byte[]>> getMsgClass() {
      return (Class<MessageAndMetadata<String, byte[]>>) (Class<?>) MessageAndMetadata.class;
   }

   public Map<String, String> getKafkaParams() {
      return kafkaParams;
   }

   public void setKafkaParams(Map<String, String> kafkaParams) {
      this.kafkaParams = kafkaParams;
   }

   public Set<String> getTopics() {
      return topics;
   }

   public void setTopics(Set<String> topics) {
      this.topics = topics;
   }

   public KafkaCluster getKafkaCluster() {
      return kafkaCluster;
   }

   public void setKafkaCluster(KafkaCluster kafkaCluster) {
      this.kafkaCluster = kafkaCluster;
   }

}