streaming拉取kafka消息代码
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaInputDStream;
import org.apache.spark.streaming.api.java.JavaPairDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.kafka010.ConsumerStrategies;
import org.apache.spark.streaming.kafka010.KafkaUtils;
import org.apache.spark.streaming.kafka010.LocationStrategies;
import scala.Tuple2;
import java.util.*;
public class SparkStreaming {
public static void main(String[] args) {
SparkConf sparkConf = new SparkConf().setAppName("sparkStreaming");
JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));
//开启checkpoint
// jssc.checkpoint("hdfs://test01.cdh6.ecarx.local:8020/tmp/checkpoint_1/");
Set<String> topics = new HashSet<>(Arrays.asList("test"));
Map<String, Object> kafkaParams = new HashMap<>();
kafkaParams.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "test03.cdh.: ");
// kafkaParams.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "");
kafkaParams.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
kafkaParams.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
kafkaParams.put(ConsumerConfig.GROUP_ID_CONFIG,"test");
kafkaParams.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,true);
JavaInputDStream<ConsumerRecord<String, String>> messages = KafkaUtils.createDirectStream(
jssc,
LocationStrategies.PreferConsistent(),
ConsumerStrategies.Subscribe(topics, kafkaParams));
JavaDStream<String> message = messages.flatMap(new FlatMapFunction<ConsumerRecord<String, String>, String>() {
@Override
public Iterator<String> call(ConsumerRecord<String, String> record) throws Exception {
String value = record.value();
return Arrays.asList(value.split(" ")).iterator();
}
});
JavaPairDStream<String, Integer> pairs = message.mapToPair(s -> new Tuple2<>(s, 1));
JavaPairDStream<String, Integer> wordCounts = pairs.reduceByKey((i1, i2) -> i1 + i2);
wordCounts.print();
jssc.start();
try {
jssc.awaitTermination();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}