kafkastream(二)

本文介绍了如何使用Apache Kafka Streams进行数据流操作,展示了如何从kafka topic 'event_attendees_row'中使用不同窗口(HoppingTimeWindow, TumblingWindow, SessionWindow, SlidingWindow)处理数据并传递到topic 'event_attendees_1',以及'user_friends'到'userfriends'的转换过程。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

直接将kafka topic1中的数据传递给topic2

样例1

package kafka;

import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.*;
import org.apache.kafka.streams.kstream.KStream;

import java.util.ArrayList;
import java.util.Properties;
import java.util.concurrent.CountDownLatch;

public class EventAttendStream {
    public static void main(String[] args) {
        Properties prop = new Properties();
        prop.put(StreamsConfig.APPLICATION_ID_CONFIG,"eventattend");//一个应用一个组
        prop.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG,"192.168.232.211:9092");
        prop.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
        prop.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG,Serdes.String().getClass());
        prop.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG,300);//间隔
        prop.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,"false");//是否自动提交
        //earliest latest none
        prop.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");

        StreamsBuilder builder = new StreamsBuilder();
        KStream<Object, Object> source = builder.stream("event_attendees_row");
        source.flatMap((key,value)->{
            //value -> event,yes,maybe,invited,no
            String[] fields = value.toString().split(",");
            ArrayList<KeyValue<String,String>> list = new ArrayList<>();
            if (fields.length>=2 && fields[1].trim().length()>0) {
                String[] yes = fields[1].split(" ");
                for (String s : yes) {
                    System.out.println(fields[0] + "," + s + ",yes");
                    KeyValue<String, String> yesKeyValue = new KeyValue<>(null, fields[0] + "," + s + ",yes");
                    list.add(yesKeyValue);
                }
            }
            if (fields.length>=3 && fields[2].trim().length()>0){
                String[] maybe = fields[2].split(" ");
                for (String s : maybe) {
                    System.out.println(fields[0] + "," + s + ",maybe");
                    KeyValue<String, String> maybeKeyValue = new KeyValue<>(null, fields[0] + "," + s + ",maybe");
                    list.add(maybeKeyValue);
                }
            }
            if (fields.length>=4 && fields[3].trim().length()>0){
                String[] invited = fields[3].split(" ");
                for (String s : invited) {
                    System.out.println(fields[0] + "," + s + ",invited");
                    KeyValue<String, String> invitedKeyValue = new KeyValue<>(null, fields[0] + "," + s + ",invited");
                    list.add(invitedKeyValue);
                }
            }
            if (fields.length>=5 && fields[4].trim().length()>0){
                String[] no = fields[4].split(" ");
                for (String s : no) {
                    System.out.println(fields[0] + "," + s + ",no");
                    KeyValue<String, String> noKeyValue = new KeyValue<>(null, fields[0] + "," + s + ",no");
                    list.add(noKeyValue);
                }
            }
            return list;
        }).to("event_attendees_1");
        Topology topo = builder.build();
        KafkaStreams streams = new KafkaStreams(topo,prop);
        CountDownLatch latch = new CountDownLatch(1);
        Runtime.getRuntime().addShutdownHook(new Thread("stream"){
            public void run(){
                streams.close();
                latch.countDown();
            }
        });
        streams.start();
        try {
            latch.await();
        } catch (InterruptedException e) {
            e.printStackTrace();
        }

    }
}

样例2

package kafka;

import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.*;
import org.apache.kafka.streams.kstream.KStream;

import java.util.ArrayList;
import java.util.Properties;
import java.util.concurrent.CountDownLatch;


public class UserFriendStream {
    public static void main(String[] args) {
        Properties prop = new Properties();
        prop.put(StreamsConfig.APPLICATION_ID_CONFIG,"userfriend1");//一个应用一个组
        prop.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG,"192.168.232.211:9092");
        prop.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
        prop.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG,Serdes.String().getClass());
        prop.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG,300);//间隔
        prop.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,"false");//是否自动提交
        //earliest latest none
        prop.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");

        StreamsBuilder builder = new StreamsBuilder();
        //user,friends=> 3238005,47949549 68056805
        KStream<Object, Object> source = builder.stream("user_friends");
        KStream<String, String> user_friends = source.flatMap((key, value) -> {
            ArrayList<KeyValue<String, String>> list = new ArrayList<>();
            String[] fields = value.toString().split(",");
            if (fields.length == 2) {
                String[] friends = fields[1].split("\\s+");
                String user = fields[0];
                if (user.trim().length() > 0) {
                    for (String friend : friends) {
                        System.out.println(user + "," + friend);
                        KeyValue<String, String> keyValue = new KeyValue<>(null, user + "," + friend);
                        list.add(keyValue);
                    }

                }
            }
            return list;
        });
        user_friends.to("userfriends");
        Topology topo = builder.build();
        KafkaStreams streams = new KafkaStreams(topo,prop);
        CountDownLatch latch = new CountDownLatch(1);
        Runtime.getRuntime().addShutdownHook(new Thread("stream"){
            public void run(){
                streams.close();
                latch.countDown();
            }
        });
        streams.start();
        try {
            latch.await();
        } catch (InterruptedException e) {
            e.printStackTrace();
        }

    }
}

kafka stream 窗口

流式数据是在时间上无界的数据。而聚合操作只能作用在特定的数据集,也即有界的数据集上。因此需要通过某种方式从无界的数据集上按特定的语义选取出有界的数据。窗口是一种非常常用的设定计算边界的方式。不同的流式处理系统支持的窗口类似,但不尽相同。

Hopping Time Window

跳跃时间窗口,它有两个属性,一个是Window size,一个是Advance interval。Window size指定了窗口的大小,也即每次计算的数据集的大小。而Advance interval定义输出的时间间隔。应用开始运行的时间就是第一个窗口的起始时间,然后每经过一个advance interval便会创建一个新的窗口,同时每个窗口的宽度都是size(时间上的宽度)

Tumbling time window

滚动时间窗口,是跳跃时间窗口的一种特例,当跳跃时间窗口的size和advance iterval值相等时,它就变成了滚动时间窗口。
滚动时间窗口只有一个参数:size,表示窗口的尺寸,一个窗口的结束点会是下一个窗口的起始点。窗口之间没有间隙,也不重叠。

Session window

该窗口用于对Key做Group后的聚合操作中。它需要对Key做分组,然后对组内的数据根据业务需求定义一个窗口的起始点和结束点。一个典型的案例是,希望通过Session Window计算某个用户访问网站的时间。对于一个特定的用户(用Key表示)而言,当发生登录操作时,该用户(Key)的窗口即开始,当发生退出操作或者超时时,该用户(Key)的窗口即结束。窗口结束时,可计算该用户的访问时间或者点击次数等。

Sliding Window

该窗口只用于2个KStream进行Join计算时。该窗口的大小定义了Join两侧KStream的数据记录被认为在同一个窗口的最大时间差。假设该窗口的大小为5秒,则参与Join的2个KStream中,记录时间差小于5的记录被认为在同一个窗口中,可以进行Join计算。

package kafka;

import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.*;
import org.apache.kafka.streams.kstream.*;

import java.time.Duration;
import java.util.ArrayList;
import java.util.Properties;
import java.util.concurrent.CountDownLatch;

public class SessionDemo {
    public static void main(String[] args) {
                Properties prop = new Properties();
                prop.put(StreamsConfig.APPLICATION_ID_CONFIG,"windowdemo3");//一个应用一个组
                prop.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG,"192.168.232.211:9092");
                prop.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
                prop.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG,Serdes.String().getClass());
                prop.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG,300);//间隔
                prop.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,"false");//是否自动提交
                //earliest latest none
                prop.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");
                StreamsBuilder builder = new StreamsBuilder();
                SessionWindowedKStream<String, String> windowdemo1 = builder.stream("windowdemo")
//                .flatMapValues((value) -> {
//                    String[] split = value.toString().split("\\s+");
//                    return Arrays.asList(split);
//                })
                        .flatMap((key, value) -> {
                            ArrayList<KeyValue<String, String>> keyValues = new ArrayList<>();
                            return keyValues;
                        })
                        .map((key, value) -> {
                            return new KeyValue<String, String>(value, "1");
                        }).groupByKey()
//                .windowedBy(TimeWindows.of(Duration.ofSeconds(15).toMillis()));//跳跃
//        .windowedBy(TimeWindows.of(Duration.ofSeconds(15).toMillis())
//                .advanceBy(Duration.ofSeconds(5).toMillis()));//滑动
                        .windowedBy(SessionWindows.with(Duration.ofSeconds(15).toMillis()));
                //SessionWindow
        KStream<Windowed<String>, Long> windowedLongKStream = windowdemo1.count().toStream();
        windowedLongKStream.foreach((key,value)->{
                    System.out.println("key:" + key + "value:" + value);
                });
                Topology topo = builder.build();
                KafkaStreams streams = new KafkaStreams(topo,prop);
                CountDownLatch latch = new CountDownLatch(1);
                Runtime.getRuntime().addShutdownHook(new Thread("stream"){
                    public void run(){
                        streams.close();
                        latch.countDown();
                    }
                });
                streams.start();
                try {
                    latch.await();
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            }
        }




评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值