flink ProcessionFunction 的使用 以及踩到的一些坑

笔者最近新需求需要在日志后面加入用户每个页面浏览的时间,由于日志中本身只有时间这个字段,没有浏览计时,最简单粗暴的方法就是后一条日志的时间减去前一条的时间,然后再设定一个超时阈值作为用户的超时时间,当一个用户长时间未操作时写回一个固定时间
第一个想到的是用window来做,由于flink也是在摸索之中,踩坑未果,然后使用ProcessionFunction完成(写完ProcessFunction又回头写了一下window,也解决了。汗。。。。。)
代码如下

package operator;

import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.util.Collector;
import pojo.LogBean;

import java.sql.Time;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import java.util.Date;

/**
 * Created by IntelliJ IDEA.
 * User: fr
 * Time: 2020/2/28 14:26
 */

public class AddTimeProcessFunction extends KeyedProcessFunction<String,Tuple2<String, LogBean>, Tuple2<String, LogBean>> {

    /**
     * process function维持的状态
     */
    private ValueState<CountWithTimestamp> state;

    /**
     * 设定的日志延迟时间
     */
    private final long delay = 500;

    @Override
    public void open(Configuration parameters) throws Exception {
        state = getRuntimeContext().getState(new ValueStateDescriptor<>("myState", CountWithTimestamp.class));
    }

    @Override
    public void processElement(Tuple2<String, LogBean> value, Context ctx, Collector<Tuple2<String, LogBean>> out) throws Exception {
// retrieve the current count
        // 获取当前key的状态
        CountWithTimestamp current = state.value();

        //判断状态是否存在
        if (current == null || current.flag == 0) {
            //将时间转换成long型
            LocalDateTime parse = LocalDateTime.parse(value.f1.getOperTime(), DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"));
            long nowEventTime = parse.toEpochSecond(ZoneOffset.of("+8"));
            nowEventTime *= 1000 ;

            current = new CountWithTimestamp();
            current.key = value.f0;
            current.logbean=value.f1;
            current.flag=1;
            current.lastModified = nowEventTime;

            //更新状态到state中
            state.update(current);

        }else {
            // set the state's timestamp to the record's assigned event time timestamp
            LocalDateTime parse = LocalDateTime.parse(value.f1.getOperTime(), DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"));
            long nowEventTime = parse.toEpochSecond(ZoneOffset.of("+8"));
            nowEventTime *= 1000 ;

            current.logbean.setGlobal1(String.valueOf(nowEventTime - current.lastModified));
            out.collect(new Tuple2<>(current.key,current.logbean));
            //System.err.println(current);
            current.logbean=value.f1;
            current.flag=1;

            // schedule the next timer 60 seconds from the current event time
            // 从当前事件时间开始计划下一个delay秒的定时器
            ctx.timerService().registerEventTimeTimer(current.lastModified + delay);
            // 将状态写回
            state.update(current);

        }

    }

    @Override
    public void onTimer(long timestamp, OnTimerContext ctx, Collector<Tuple2<String, LogBean>> out) throws Exception {
        // get the state for the key that scheduled the timer

        //获取计划定时器的key的状态
        CountWithTimestamp result = state.value();

        // 检查是否是过时的定时器或最新的定时器
        if (timestamp >= result.lastModified + delay) {
            state.value().flag=0;
            result.logbean.setGlobal1(String.valueOf(delay));
            
            System.err.println(result);
            // emit the state on timeout
            out.collect(new Tuple2<String, LogBean>(result.key, result.logbean));
            //清除此状态
            state.clear();

        }
    }

}
class CountWithTimestamp {

    public String key;
    public LogBean logbean;
    public long lastModified;
    public int flag = 0;

    public String getKey() {
        return key;
    }

    public void setKey(String key) {
        this.key = key;
    }

    public LogBean getLogbean() {
        return logbean;
    }

    public void setLogbean(LogBean logbean) {
        this.logbean = logbean;
    }

    public long getLastModified() {
        return lastModified;
    }

    public void setLastModified(long lastModified) {
        this.lastModified = lastModified;
    }

    public int getFlag() {
        return flag;
    }

    public void setFlag(int flag) {
        this.flag = flag;
    }

    public CountWithTimestamp() {
    }

    public CountWithTimestamp(String key, LogBean logbean, long lastModified, int flag) {
        this.key = key;
        this.logbean = logbean;
        this.lastModified = lastModified;
        this.flag = flag;
    }

    @Override
    public String toString() {
        return "CountWithTimestamp{" +
                "key='" + key + '\'' +
                ", logbean=" + logbean +
                ", lastModified=" + lastModified +
                ", flag=" + flag +
                '}';
    }
}

值得一说的是
由于使用的是ctx.timerService().registerEventTimeTimer,是eventTime,所以在主代码中要加上

//声明使用的是eventTime
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
//获取eventTime,转化为Long值
data.assignTimestampsAndWatermarks(new AscendingTimestampExtractor<LogBean>() {
            @Override
            public long extractAscendingTimestamp(LogBean element) {
                LocalDateTime parse = LocalDateTime.parse(element.getOperTime(), DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"));
                long eventTime = parse.toEpochSecond(ZoneOffset.of("+8"));
                //flink时间是精确到毫秒,日志中只转化到了秒,所以要乘一下
                return eventTime * 1000;
            }
        })
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值