package com.hao;
import java.util.Random;
public class Main {
public static void main(String[] args) {
// write your code here
FileThread fileThread = new FileThread(args[0]);
Thread thread = new Thread(fileThread);
thread.start();
}
}
package com.hao;
import java.io.BufferedOutputStream;
import java.io.FileOutputStream;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Random;
public class FileThread implements Runnable{
private Random random=null;
public HashMap<String,String[]> provinces =null;
private FileOutputStream fileOutputStream=null;
private BufferedOutputStream bufferedOutputStream=null;
public FileThread(String path) throws Exception{
random = new Random();
initProvinces();
fileOutputStream = new FileOutputStream(path);
bufferedOutputStream = new BufferedOutputStream(fileOutputStream,1024);
}
private void initProvinces(){
provinces = new HashMap<>();
provinces.put("beijing",new String[]{"dongcheng","xichang","chaoyang"
,"haidian","fengtai","daxing"});
provinces.put("hebei",new String[]{"xingtai","handan","langfang","zhangjiakou","dangshan"});
provinces.put("shanxia",new String[]{"taiyuan","datong","hongtong","linfen"});
provinces.put("henan",new String[]{"zhengzhou","sanmenxia","anyang","nanyang","zhumadian"});
provinces.put("shandong",new String[]{"jinan","qingdao","yantai","rizhao","liaocheng","linqing"});
}
@Override
public void run() {
// 543462,1715,beijing,beijing,1511658000
while (true) {
StringBuffer buffer = new StringBuffer();
int userId = random.nextInt(10000);
buffer.append(userId);
buffer.append(",");
int adId = random.nextInt(30);
buffer.append(adId);
buffer.append(",");
int idx = random.nextInt(provinces.keySet().size());
String province = provinces.keySet().toArray()[idx].toString();
buffer.append(province);
buffer.append(",");
String[] cities = provinces.get(province);
idx = random.nextInt(cities.length);
String city = cities[idx];
buffer.append(city);
buffer.append(",");
buffer.append(System.currentTimeMillis());
String newLine = System.getProperty("line.separator");//获取当前系统的换行符
buffer.append(newLine);
try {
bufferedOutputStream.write(buffer.toString().getBytes(StandardCharsets.UTF_8));
// bufferedOutputStream.flush();
Thread.sleep(300);
}
catch (Exception ex){}
}
}
@Override
protected void finalize() throws Throwable {
bufferedOutputStream.close();
fileOutputStream.close();
}
}
开启虚拟机
开启环境
我是在dragon01下创建的生产者主题,上传jar在02
在02下创建flume配置文件
在03消费数据
创建kafka分区province
kafka-topics.sh --create --bootstrap-server dragon01:9092 --topic province --partitions 3 --replication-factor 2
启动消费者
kafka-console-consumer.sh --bootstrap-server dragon01:9092 --topic province
切换到flume/job下
vim file-flume-kafka.conf
a1.sources=r1
a1.channels=c1
a1.sources.r1.type=exec
a1.sources.r1.command=tail -F /home/flink/adclick.log
a1.channels.c1.type=org.apache.flume.channel.kafka.KafkaChannel
a1.channels.c1.kafka.bootstrap.servers=flink01:9092,flink02:9092
a1.channels.c1.parseAsFlumeEvent=false
a1.channels.c1.kafka.topic=province
a1.sources.r1.channels=c1
在bin目录下启动
bin/flume-ng agent -c conf -f job/file-flume-kafka.conf -n a1
启动jar包文件
java -cp ./mock.jar com.hao.Main ./adclick.log
等待几秒钟
采集到的数据
写flink代码
package hao.day20220606;
import hao.bean.AdClickLog;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import java.util.Properties;
public class Flume_Kafka_Flink {
public static void main(String[] args) throws Exception {
//统计各省各市各广告点击次数
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(3);
Properties props = new Properties();
props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"dragon01:9092,dragon03:9092");
props.put(ConsumerConfig.GROUP_ID_CONFIG,"adclick");
//偏移量
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");
FlinkKafkaConsumer<String> kafkaConsumer =
new FlinkKafkaConsumer<>("province", new SimpleStringSchema(), props);
DataStreamSource<String> kafkaSource = env.<String>addSource(kafkaConsumer);
SingleOutputStreamOperator<Tuple2<String, Integer>> map = kafkaSource.map(line -> {
AdClickLog adClickLog = new AdClickLog(line);
return Tuple2.of(adClickLog.getProvince() + "-"+adClickLog.getCity()+"-"
+ adClickLog.getAdId(), 1);
}).returns(Types.TUPLE(Types.STRING, Types.INT));
map.keyBy(key -> key.f0).sum(1).print();
env.execute();
}
}
成功!!!!!!!!!!!!!