任务2:使用Flink消费Kafka数据
国赛试题(1):
编写Scala代码,使用Flink消费Kafka中Topic为order的数据并进行相应的数据统计计算(订单信息对应表结构order_info,订单详细信息对应表结构order_detail(来源类型和来源编号这两个字段不考虑,所以在实时数据中不会出现),同时计算中使用order_info或order_detail表中create_time或operate_time取两者中值较大者作为EventTime,若operate_time为空值或无此列,则使用create_time填充,允许数据延迟5s,订单状态分别为1001:创建订单、1002:支付订单、1003:取消订单、1004:完成订单、1005:申请退回、1006:退回完成。另外对于数据结果展示时,不要采用例如:1.9786518E7的科学计数法)。
1、使用Flink消费Kafka中的数据,统计商城实时订单实收金额(需要考虑订单状态,若有取消订单、申请退回、退回完成则不计入订单实收金额,其他状态的则累加),将key设置成totalprice存入Redis中。使用redis cli以get key方式获取totalprice值,将结果截图粘贴至客户端桌面【Release\任务D提交结果.docx】中对应的任务序号下,需两次截图,第一次截图和第二次截图间隔1分钟以上,第一次截图放前面,第二次截图放后面;
2、在任务1进行的同时,使用侧边流,监控若发现order_status字段为退回完成, 将key设置成totalrefundordercount存入Redis中,value存放用户退款消费额。使用redis cli以get key方式获取totalrefundordercount值,将结果截图粘贴至客户端桌面【Release\任务D提交结果.docx】中对应的任务序号下,需两次截图,第一次截图和第二次截图间隔1分钟以上,第一次截图放前面,第二次截图放后面;
3、在任务1进行的同时,使用侧边流,监控若发现order_status字段为取消订单,将数据存入MySQL数据库shtd_result的order_info表中,然后在Linux的MySQL命令行中根据id降序排序,查询列id、consignee、consignee_tel、final_total_amount、feight_fee,查询出前5条,将SQL语句复制粘贴至客户端桌面【Release\任务D提交结果.docx】中对应的任务序号下,将执行结果截图粘贴至客户端桌面【Release\任务D提交结果.docx】中对应的任务序号下。
CSV数据源:
import com.ibm.icu.text.SimpleDateFormat
import org.apache.flink.api.common.eventtime.{SerializableTimestampAssigner, WatermarkStrategy}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.ProcessFunction
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.redis.RedisSink
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig
import org.apache.flink.streaming.connectors.redis.common.mapper.{RedisCommand, RedisCommandDescription, RedisMapper}
import org.apache.flink.util.Collector
import java.sql.{Connection, DriverManager, PreparedStatement}
import java.time.Duration
object test2 {
val flt: Array[String] = Array("1003", "1005", "1006")
val simple=new SimpleDateFormat("dd/MM/yyyy HH:mm:ss")
val simple2=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val stream = env.readTextFile("data/order_info")
val resStream = stream.assignTimestampsAndWatermarks(
WatermarkStrategy.forBoundedOutOfOrderness(Duration.ofSeconds(5)).withTimestampAssigner(
new SerializableTimestampAssigner[String] {
override def extractTimestamp(t: String, l: Long): Long = {
val row = t.split(",")
if (row(10) > row(11)) {
println("eventTime:row(10)")
simple.parse(row(10)).getTime
}
else if (row(11) == "" || row(11) == null) {
println("eventTime:row(10)")
row(11) = row(10)
simple.parse(row(10)).getTime
}
else {
println("eventTime:row(11)")
simple.parse(row(11)).getTime
}
}
}
)
)
val fltStream = resStream.process(new FStream)
val sideStream1=fltStream.getSideOutput(new FStream().side1)
val sideStream2=fltStream.getSideOutput(new FStream().side2)
fltStream.print("main")
sideStream1.print("side1")
sideStream2.print("side2")
//Task1
fltStream.map(x=>{
val row = x.split(",")
println(s"value:${row(3).toDouble}")
("totalprice",row(3).toDouble)
})
.keyBy(_._1)
.reduce((r1,r2)=>(r1._1,r2._2+r2._2))
.addSink(new RedisSink[(String,Double)](
new FlinkJedisPoolConfig.Builder().setHost("node1").setPort(6379).build(),
new RedisMapper[(String,Double)] {
override def getCommandDescription: RedisCommandDescription = new RedisCommandDescription(RedisCommand.SET)
override def getKeyFromData(t: (String, Double)): String = t._1
override def getValueFromData(t: (String, Double)): String = t._2.toString
}
)
)
//Task2
sideStream2.map(x=>{("totalrefundordercount",
x.split(",").mkString.split("\"").filter(_!="")(3).toDouble
)})
.keyBy(_._1)
.reduce((r1,r2)=>(r1._1,r1._2+r2._2))
.addSink(
new RedisSink[(String, Double)](
new FlinkJedisPoolConfig.Builder().setHost("node1").setPort(6379).build(),
new RedisMapper[(String, Double)] {
override def getCommandDescription: RedisCommandDescription = new RedisCommandDescription(RedisCommand.SET)
override def getKeyFromData(t: (String, Double)): String = t._1
override def getValueFromData(t: (String, Double)): String = t._2.toString
}
)
)
println(s"${"="*10}Task2 End${"="*10}")
println(s"${"="*10}Task3 Begin${"="*10}")
//Task3
sideStream1.addSink(new RichSinkFunction[String] {
var collection: Connection = _
var parState:PreparedStatement = _
override def open(parameters: Configuration): Unit = {
val url="jdbc:mysql://localhost:3306/moni?useSSL=false"
val user="root"
val password="8888"
collection=DriverManager.getConnection(url,user,password)
parState=collection.prepareStatement("insert into order_info (id,consignee,consignee_tel,final_total_amount,feight_fee,operate_time) values(?,?,?,?,?,?)")
}
override def invoke(value: String): Unit = {
val row=value.split(",").mkString.split("\"").filter(_!="")
parState.setLong(1,row(0).toLong);
parState.setString(2,row(1));
parState.setString(3,row(2));
parState.setDouble(4,row(3).toDouble);
parState.setInt(5,row(19).toInt);
parState.setTimestamp(5, simple.parse(row(11)));
parState.addBatch()
}
override def close(): Unit = {
if (parState!=null){parState.executeBatch();parState.close()}
if (collection!=null){collection.close()}
}
})
env.execute()
}
class FStream extends ProcessFunction[String,String]{
val side1 = new OutputTag[String]("side1")
val side2 = new OutputTag[String]("side2")
override def processElement(i: String, context: ProcessFunction[String, String]#Context, collector: Collector[String]): Unit = {
val row = i.split(",")
if (!flt.contains(row(4))){
collector.collect(i)
}
else if (row(4)=="1003"){
context.output(side1,i)
}
else if (row(4)=="1006"){
context.output(side2,i)
}
}
}
}
Json数据源(Java写法):
import com.google.gson.Gson;
import com.google.gson.reflect.TypeToken;
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.streaming.connectors.redis.RedisSink;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommand;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommandDescription;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisMapper;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.Timestamp;
import java.text.SimpleDateFormat;
import java.time.Duration;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Properties;
class nation2json{
public static SimpleDateFormat simple1 = new SimpleDateFormat("dd/MM/yyyy HH:mm:ss");
public static final OutputTag<HashMap<String,String>> side1=new OutputTag<HashMap<String,String>>("finalcancel"){
@Override
public TypeInformation getTypeInfo() {
return TypeInformation.of(new TypeHint<HashMap<String,String>>() {});
}
};
public static OutputTag<HashMap<String,String>> side2=new OutputTag<HashMap<String,String>>("submitcancel"){
@Override
public TypeInformation getTypeInfo() {
return TypeInformation.of(new TypeHint<HashMap<String,String>>() {});
}
};
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
Properties config=new Properties();
config.setProperty("bootstrap.servers","node1:9092");
config.setProperty("group.id","nation2");
FlinkKafkaConsumer<String> kfk=new FlinkKafkaConsumer<>("orderjson",new SimpleStringSchema(),config);
DataStreamSource<String> kafkastream = env.addSource(kfk);
SingleOutputStreamOperator<HashMap<String, String>> mapJson = kafkastream.map(new RichMapFunction<String, HashMap<String, String>>() {
@Override public HashMap<String, String> map(String s) {
Gson gson = new Gson();
HashMap<String, String> data = gson.fromJson(s, new TypeToken<HashMap<String, String>>(){}.getType());
if (data.get("operate_time").equals("null") || data.get("operate_time").equals("") || data.get("operate_time").isEmpty()) {
data.put("operate_time", data.get("create_time"));
}
return data;
}
});
SingleOutputStreamOperator<HashMap<String, String>> resStream = mapJson.assignTimestampsAndWatermarks(
WatermarkStrategy.<HashMap<String, String>>forBoundedOutOfOrderness(Duration.ofSeconds(5))
.withTimestampAssigner((SerializableTimestampAssigner<HashMap<String, String>>) (value, l) -> {
try {
long operate_time = simple1.parse(value.get("operate_time")).getTime();
long create_time = simple1.parse(value.get("create_time")).getTime();
if (operate_time > create_time) {
return operate_time;
} else {
return create_time;
}
} catch (Exception e) {
throw new RuntimeException(e);
}
})
);
SingleOutputStreamOperator<HashMap<String, String>> mainStream = resStream.process(new MyProcess());
DataStream<HashMap<String, String>> sideOutput1 = mainStream.getSideOutput(side1);
DataStream<HashMap<String, String>> sideOutput2 = mainStream.getSideOutput(side2);
//Task1
mainStream.map(x-> Tuple2.of("totalprice",Double.parseDouble(x.get("final_total_amount")))).returns(Types.TUPLE(Types.STRING,Types.DOUBLE))
.keyBy(x->x.f0)
.sum(1)
.map(x->Tuple2.of(x.f0,x.f1.toString()))
.returns(Types.TUPLE(Types.STRING,Types.STRING))
.addSink(new MyRedis());
// mainStream.map(x-> Tuple2.of("totalprice",new BigDecimal(x.get("final_total_amount")).setScale(2,BigDecimal.ROUND_HALF_UP).doubleValue())).returns(Types.TUPLE(Types.STRING,Types.DOUBLE)).print();
//Task2
sideOutput1.map(x->Tuple2.of("totalrefundordercount",Double.parseDouble(x.get("final_total_amount")))).returns(Types.TUPLE(Types.STRING,Types.DOUBLE))
.keyBy(x->x.f0)
.sum(1)
.map(x->Tuple2.of(x.f0,x.f1.toString())).returns(Types.TUPLE(Types.STRING,Types.STRING))
.addSink(new MyRedis());
//Task3
sideOutput2.addSink(new MyMysql());
env.execute();
}
public static class MyProcess extends ProcessFunction<HashMap<String,String>,HashMap<String,String>> {
@Override
public void processElement(HashMap<String, String> value, ProcessFunction<HashMap<String, String>, HashMap<String, String>>.Context context, Collector<HashMap<String, String>> collector) throws Exception {
List<String> flt = Arrays.asList(new String[]{"1003","1005","1006"});
if (!flt.contains(value.get("order_status"))){
collector.collect(value);
} else if (value.get("order_status").equals(flt.get(2))) {
context.output(side1,value);
}else if (value.get("order_status").equals(flt.get(0))) {
context.output(side2,value);
}
}
}
public static class MyRedis extends RedisSink<Tuple2<String,String>>{
MyRedis(){
super(
new FlinkJedisPoolConfig.Builder().setHost("node1").setPort(6379).build(),
new RedisMapper<Tuple2<String,String>>(){
@Override
public RedisCommandDescription getCommandDescription() {
return new RedisCommandDescription(RedisCommand.SET);
}
@Override
public String getKeyFromData(Tuple2<String, String> t) {
return t.f0;
}
@Override
public String getValueFromData(Tuple2<String, String> t) {
return t.f1;
}
}
);
}
}
public static class MyMysql extends RichSinkFunction<HashMap<String,String>> {
SimpleDateFormat simple2=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
Connection connection;
PreparedStatement preState;
@Override
public void open(Configuration parameters) throws Exception {
connection= DriverManager.getConnection("jdbc:mysql://localhost:3306/shtd_result?useSSL=false&characterEncodeing=UTF-8","root","8888");
preState=connection.prepareStatement("insert into order_info (id,consignee,consignee_tel,final_total_amount,feight_fee,operate_time) values(?,?,?,?,?,?)");
}
@Override
public void invoke(HashMap<String, String> value, Context context) throws Exception {
System.out.println("prepare insert into mysql");
preState.setLong(1,Long.parseLong(value.get("id")));
preState.setString(2,value.get("consignee"));
preState.setString(3,value.get("consignee_tel"));
preState.setDouble(4,Double.parseDouble(value.get("final_total_amount")));
preState.setInt(5,Integer.parseInt(value.get("feight_fee")));
preState.setTimestamp(6, Timestamp.valueOf(simple2.format(simple1.parse(value.get("operate_time")))));
preState.addBatch();
System.out.println("finish source adding");
System.out.println("insert begin");
preState.executeBatch();
System.out.println("insert end");
}
@Override
public void close() throws Exception {
if (preState!=null){preState.close();}
if (connection!=null){connection.close();}
}
}
}