package com.mz.iot;
import com.alibaba.fastjson.JSON;
import com.mz.iot.bean.DataOrigin;
import com.mz.iot.bean.UseResult;
import com.mz.iot.utils.Conf;
import com.mz.iot.utils.DateUtil;
import com.mz.iot.utils.EsSinkUtil;
import com.mz.iot.utils.JsonUtil;
import org.apache.flink.api.common.functions.AggregateFunction;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.RuntimeContext;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSinkFunction;
import org.apache.flink.streaming.connectors.elasticsearch.RequestIndexer;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.Requests;
import org.elasticsearch.common.xcontent.XContentType;
/**
* 使用量统计
* 现在包括水电的统一计算
* 窗口有15min
*/
public class UseCntAnalysis {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.enableCheckpointing(1000, CheckpointingMode.EXACTLY_ONCE);
env.getCheckpointConfig().setCheckpointTimeout(60000L);
env.getCheckpointConfig().setMaxConcurrentCheckpoints(2);
env.getCheckpointConfig().setMinPauseBetweenCheckpoints(100L);
env.getCheckpointConfig().setPreferCheckpointForRecovery(true);
env.getCheckpointConfig().setTolerableCheckpointFailureNumber(1);
env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
// env.setStateBackend(new FsStateBackend("hdfs://mz-hadoop-01:8020/ck"));
/**
* 对于使其宕机的数据,重启忽略该数据,就像这个数据从来没有输入过
*/
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(0, 3000L));
DataStream<String> inputStream = env.socketTextStream("192.168.0.162", 6666).uid("use-cnt-input");
inputStream.print("useCntOriginStream=========>");
SingleOutputStreamOperator<DataOrigin> allStream = inputStream.map(new MapFunction<String, DataOrigin>() {
@Override
public DataOrigin map(String value) throws Exception {
DataOrigin dataOrigin = JsonUtil.getObjFromJson(value, DataOrigin.class);
// System.out.println("useOrigin:" + useOrigin);
if (dataOrigin == null || dataOrigin.getId() == null) {
DataOrigin u = new DataOrigin();
u.setId("invalid");
return u;
}
return dataOrigin;
}
})
.uid("use-cnt-origin-map")
.filter(new FilterFunction<DataOrigin>() {
@Override
public boolean filter(DataOrigin value) throws Exception {
return !"invalid".equals(value.getId());
}
})
.uid("use-cnt-filter")
.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<DataOrigin>(Time.seconds(5)) {
@Override
public long extractTimestamp(DataOrigin element) {
return DateUtil.getMillsFromString(element.getAcquisitionTime());
}
})
.uid("use-cnt-assign-wm");
/**
* 水电相关的数据,makeup=0的,正常数据
*/
OutputTag<DataOrigin> f_makeup0 = new OutputTag<DataOrigin>("f0") {
};
/**
* makeup=1的,补偿数据,全局
*/
OutputTag<DataOrigin> f_makeup1 = new OutputTag<DataOrigin>("f1") {
};
/**
* 迟到数据,针对makeup=0的,也就是正常数据(非补偿数据)的迟到数据
*/
OutputTag<DataOrigin> tag_late = new OutputTag<DataOrigin>("late") {
};
SingleOutputStreamOperator<DataOrigin> process = allStream.process(new ProcessFunction<DataOrigin, DataOrigin>() {
@Override
public void processElement(DataOrigin value, Context ctx, Collector<DataOrigin> out) throws Exception {
/**
* 补偿数据
*/
if ("1".equals(value.getMakeUp())) {
ctx.output(f_makeup1, value);
} else {
if ("0".equals(value.getMeterKind()) && "4".equals(value.getType()) || "1".equals(value.getMeterKind())) {
ctx.output(f_makeup0, value);
}
}
}
});
DataStream<DataOrigin> stream0 = process.getSideOutput(f_makeup0);
/**
* 补偿数据单独写出去,和迟到数据合并起来处理
*/
DataStream<DataOrigin> stream_makeup = process.getSideOutput(f_makeup1);
stream_makeup.print("stream_makeup=====>");
/**
* 一刻钟的,统计用量无需考虑type,因为type是固定的
*/
SingleOutputStreamOperator<UseResult> stream_result_1_quarter = stream0
.keyBy("projectId", "meterKind", "meterCode", "name")
.window(TumblingEventTimeWindows.of(Time.minutes(15), Time.minutes(0)))
.allowedLateness(Time.minutes(1))
.sideOutputLateData(tag_late)
.aggregate(new UseAgg(), new WindowUseInfoResult("9999"));
stream_result_1_quarter.print("stream_result_1_quarter==============>");
DataStream<DataOrigin> stream_late_1_quarter = stream_result_1_quarter.getSideOutput(tag_late);
stream_late_1_quarter.print("stream_late_1_quarter");
/**
* 12小时的
*/
// SingleOutputStreamOperator<UseResult> stream_result_12_hour = stream0
// .keyBy("projectId", "meterKind", "meterCode", "name")
// .window(TumblingEventTimeWindows.of(Time.hours(12), Time.hours(4)))
// .allowedLateness(Time.minutes(15))
// .aggregate(new UseAgg(), new WindowUseInfoResult("12"));
//
// stream_result_12_hour.print("stream_result_12_hour==============>");
/**
* 一天的
*/
// SingleOutputStreamOperator<UseResult> stream_result_24_hour = stream0
// .keyBy("projectId", "meterKind", "meterCode", "name")
// .window(TumblingEventTimeWindows.of(Time.hours(24), Time.hours(-8)))
// .allowedLateness(Time.minutes(15))
// .aggregate(new UseAgg(), new WindowUseInfoResult("24"));
// stream_result_24_hour.print("stream_result_24_hour==============>");
int bulkSize = 40;
int sinkParallelism = 5;
EsSinkUtil.addSink("192.168.0.163:9200", bulkSize, sinkParallelism, stream_result_1_quarter, new EsSinkResult(Conf.iot_ads_use_x_hour), "use-cnt-sink-result");
EsSinkUtil.addSink("192.168.0.163:9200", bulkSize, sinkParallelism, stream_result_1_quarter, new EsSinkResult(Conf.iot_ads_use_x_hour_9999), "use-cnt-sink-result_9999");
EsSinkUtil.addSink("192.168.0.163:9200", bulkSize, sinkParallelism, stream_late_1_quarter, new EsSinkLate(), "use-cnt-sink-late");
EsSinkUtil.addSink("192.168.0.163:9200", bulkSize, sinkParallelism, stream_makeup, new EsSinkMakeUp(), "use-cnt-sink-makeup");
env.execute("use cnt analysis");
}
public static class EsSinkLate implements ElasticsearchSinkFunction<DataOrigin> {
@Override
public void process(DataOrigin element, RuntimeContext ctx, RequestIndexer indexer) {
System.out.println("sink late...");
IndexRequest indexRequest = Requests.indexRequest()
.index(Conf.iot_big_data_use_late_index)
.type("_doc")
.id(element.getId() + "")
.source(JSON.toJSONString(element), XContentType.JSON);
indexer.add(indexRequest);
}
}
public static class EsSinkMakeUp implements ElasticsearchSinkFunction<DataOrigin> {
@Override
public void process(DataOrigin element, RuntimeContext ctx, RequestIndexer indexer) {
System.out.println("sink makeup...");
IndexRequest indexRequest = Requests.indexRequest()
.index(Conf.iot_big_data_makeup_index)
.type("_doc")
.id(element.getId() + "")
.source(JSON.toJSONString(element), XContentType.JSON);
indexer.add(indexRequest);
}
}
public static class EsSinkResult implements ElasticsearchSinkFunction<UseResult> {
private final String indexName;
public EsSinkResult(String indexName) {
this.indexName = indexName;
}
@Override
public void process(UseResult element, RuntimeContext ctx, RequestIndexer indexer) {
IndexRequest indexRequest = Requests.indexRequest()
.index(indexName)
.type("_doc")
.id(element.getPk() + "")
.source(JSON.toJSONString(element), XContentType.JSON);
indexer.add(indexRequest);
}
}
public static class UseAgg implements AggregateFunction<DataOrigin, UseResult, UseResult> {
@Override
public UseResult createAccumulator() {
return new UseResult();
}
@Override
public UseResult add(DataOrigin value, UseResult accumulator) {
accumulator.setCnts(accumulator.getCnts() + value.getAddValue());
accumulator.setFrequency(accumulator.getFrequency() + 1);
return accumulator;
}
@Override
public UseResult getResult(UseResult accumulator) {
return accumulator;
}
@Override
public UseResult merge(UseResult a, UseResult b) {
return null;
}
}
public static class WindowUseInfoResult extends ProcessWindowFunction<UseResult, UseResult, Tuple, TimeWindow> {
private final String span;
public WindowUseInfoResult(String span) {
this.span = span;
}
@Override
public void process(Tuple tuple, Context context, Iterable<UseResult> elements, Collector<UseResult> out) throws Exception {
UseResult e = elements.iterator().next();
String projectId = tuple.getField(0);
String meterKind = tuple.getField(1);
String meterCode = tuple.getField(2);
String name = tuple.getField(3);
TimeWindow window = context.window();
long windowStart = window.getStart();
String t_start = DateUtil.getDateStrFromMill(windowStart);
String t_end = DateUtil.getDateStrFromMill(window.getEnd());
String wm = DateUtil.getDateStrFromMill(context.currentWatermark());
System.out.println("window process t_start:" + t_start + ",t_end:" + t_end + ",watermark:" + wm);
String dt = t_start.split(" ")[0];
String dh = t_start.split(" ")[1].split(":")[0];
String dm = t_start.split(" ")[1].split(":")[1];
/**
, concat_ws(""
, col("dt")
, col("dh")
, col("dm")
, col("projectIdentify")
, col("equipmentNo")
, col("name")
, col("span")
, col("meterKind")
)
*/
String pk = dt + dh + dm + projectId + meterCode + name + span + meterKind;
e.setDt(dt);
e.setDh(dh);
e.setDm(dm);
e.setT_start(t_start);
e.setProjectIdentify(projectId);
e.setMeterKind(meterKind);
e.setEquipmentNo(meterCode);
e.setName(name);
e.setSpan(span);
e.setUnit("0".equals(meterKind) ? "kWh" : "m3");
e.setPk(pk);
e.setLast_update_time(DateUtil.getDateStrFromMill(context.currentProcessingTime()));
out.collect(e);
}
}
}