流式任务需要7*24小时执行,但是有时涉及到升级代码需要主动停止程序,但是分布式程序,没办法做到一个个进程去杀死,所以配置优雅的关闭就显得至关重要了。
关闭方式:使用外部文件系统来控制内部程序关闭。
1)主程序
package com.atguigu;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.streaming.Duration;
import org.apache.spark.streaming.StreamingContextState;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaInputDStream;
import org.apache.spark.streaming.api.java.JavaPairDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.kafka010.ConsumerStrategies;
import org.apache.spark.streaming.kafka010.KafkaUtils;
import org.apache.spark.streaming.kafka010.LocationStrategies;
import scala.Tuple2;
import java.net.URI;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
public class Test05_Close {
public static void main(String[] args) throws InterruptedException {
// 创建流环境
JavaStreamingContext javaStreamingContext = new JavaStreamingContext("local[*]", "window", Duration.apply(3000L));
// 创建配置参数
HashMap<String, Object> map = new HashMap<>();
map.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"hadoop102:9092,hadoop103:9092,hadoop104:9092");
map.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
map.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
map.put(ConsumerConfig.GROUP_ID_CONFIG,"atguigu");
map.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"latest");
// 需要消费的主题
ArrayList<String> strings = new ArrayList<>();
strings.add("topicA");
JavaInputDStream<ConsumerRecord<String, String>> directStream = KafkaUtils.createDirectStream(javaStreamingContext, LocationStrategies.PreferBrokers(), ConsumerStrategies.<String, String>Subscribe(strings,map));
JavaDStream<String> stringJavaDStream = directStream.flatMap(new FlatMapFunction<ConsumerRecord<String, String>, String>() {
@Override
public Iterator<String> call(ConsumerRecord<String, String> stringStringConsumerRecord) throws Exception {
String[] split = stringStringConsumerRecord.value().split(" ");
return Arrays.asList(split).iterator();
}
});
JavaPairDStream<String, Integer> javaPairDStream = stringJavaDStream.mapToPair(new PairFunction<String, String, Integer>() {
@Override
public Tuple2<String, Integer> call(String s) throws Exception {
return new Tuple2<>(s, 1);
}
});
javaPairDStream.reduceByKeyAndWindow(new Function2<Integer, Integer, Integer>() {
@Override
public Integer call(Integer v1, Integer v2) throws Exception {
return v1 + v2;
}
},Duration.apply(12000L),Duration.apply(6000L)).print();
// 开启监控程序
new Thread(new MonitorStop(javaStreamingContext)).start();
// 执行流的任务
javaStreamingContext.start();
javaStreamingContext.awaitTermination();
}
public static class MonitorStop implements Runnable {
JavaStreamingContext javaStreamingContext = null;
public MonitorStop(JavaStreamingContext javaStreamingContext) {
this.javaStreamingContext = javaStreamingContext;
}
@Override
public void run() {
try {
FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:8020"), new Configuration(), "atguigu");
while (true){
Thread.sleep(5000);
boolean exists = fs.exists(new Path("hdfs://hadoop102:8020/stopSpark"));
if (exists){
StreamingContextState state = javaStreamingContext.getState();
// 获取当前任务是否正在运行
if (state == StreamingContextState.ACTIVE){
// 优雅关闭
javaStreamingContext.stop(true, true);
System.exit(0);
}
}
}
}catch (Exception e){
e.printStackTrace();
}
}
}
}
2)测试
(1)发送数据
[atguigu@hadoop102 ~]$ kafka-console-producer.sh --broker-list hadoop102:9092 --topic topicA
hello spark
(2)启动Hadoop集群
[atguigu@hadoop102 hadoop-3.1.3]$ sbin/start-dfs.sh
[atguigu@hadoop102 hadoop-3.1.3]$ hadoop fs -mkdir /stopSpark
1195

被折叠的 条评论
为什么被折叠?



