//创建JavaStreamingContext
JavaStreamingContext jsc = createJavaStreamingContext(null, appName, batchDurationWithSeconds);
//kafkaParams 封装了kafka的参数
Object kafkaParams = new HashMap();
for (Iterator localIterator1 = config.getInputConfigs().entrySet().iterator(); localIterator1.hasNext();)
{
Map.Entry entry = (Map.Entry)localIterator1.next();
((Map)kafkaParams).put(entry.getKey().toString(), entry.getValue());
}
System.out.println("inputParam: " + kafkaParams);
Object outputConfig = new HashMap();
for (Map.Entry<Object, Object> entry : config.getOutputConfigs().entrySet())
{
((HashMap)outputConfig).put(entry.getKey().toString(), entry.getValue().toString());
}
System.out.println("outputConfig: " + outputConfig);
Map<TopicPartition, Long> fromOffsets = new HashMap();
//初始化redis连接
ShardedJedis redis = RedisSingleton.instance((HashMap)outputConfig).getResource();
//获取redis中的offset
Map<String, String> kafkaOffsets = redis.hgetAll("POSITION_KAFKA_OFFSETS");
redis.close();
Pattern patter4value = Pattern.compile(Pattern.quote(":"));
//kafkaOffsets.entrySet() 形式为0:SGN_DATA 12541
for (Map.Entry<String, String> entry : kafkaOffsets.entrySet())
{
String[] info = patter4value.split((CharSequence)entry.getKey(), 2);
if (info.length == 2) {
//如果不是冷启动,fromOffsets中put偏移量
fromOffsets.put(new TopicPartition(info[1], Integer.parseInt(info[0])), Long.valueOf(Long.parseLong((String)entry.getValue())));
}
}
//如果起始redis中的偏移量为0,则插入0
if (fromOffsets.isEmpty()) {
for (Map.Entry<String, Integer> entry : topics.entrySet()) {
if (((Integer)entry.getValue()).intValue() > 0) {
for (int i = 0; i < ((Integer)entry.getValue()).intValue(); i++) {
fromOffsets.put(new TopicPartition((String)entry.getKey(), i), Long.valueOf(0L));
}
}
}
}
System.out.println("fromOffsets: " + fromOffsets);
//创建直连的sparkStreaming对象
Object stream = KafkaUtils
.createDirectStream(jsc,
LocationStrategies.PreferConsistent(),
ConsumerStrategies.Assign(fromOffsets.keySet(), (Map)kafkaParams, fromOffsets));
//通过stream对象,获取各个分区的offset,并将各分区的offset存入到redis中
JavaPairDStream<String, String> mapedDatas = ((JavaInputDStream)stream).transform(new Function() {
private static final long serialVersionUID = -6012908518603112999L;
private StringBuilder msg = new StringBuilder();
public Object call(Object rdd1) throws Exception {
JavaRDD<ConsumerRecord<String, String>> rdd = (JavaRDD<ConsumerRecord<String, String>>) rdd1;
//获取rdd分区的offset
OffsetRange[] offsetRanges = ((HasOffsetRanges)rdd.rdd()).offsetRanges();
ShardedJedis redis = RedisSingleton.instance((HashMap<String, String>) outputConfig).getResource();
for (OffsetRange offset : offsetRanges)
{
this.msg.setLength(0);
redis.hset("POSITION_KAFKA_OFFSETS", offset.partition() + ":" + offset.topic(), offset.fromOffset() + "");
}
redis.close();
return (Object)rdd;
}
}).mapToPair(new PhonePairFunction2());
JavaPairDStream<String, Iterable<String>> groupedData = mapedDatas.filter(new Function() {
private static final long serialVersionUID = 6634203502402639467L;
public Boolean call(Object v1)
throws Exception
{
Tuple2<String, String> record = (Tuple2<String, String>) v1;
return Boolean.valueOf((record != null) && (record._1 != null) && (record._2 != null));
}
}).groupByKey();
groupedData.foreachRDD(new RddProcessor((HashMap)outputConfig));
sparkStreaming连接kafka(Direct)
最新推荐文章于 2025-04-24 09:37:48 发布