一:flink
官网API: https://nightlies.apache.org/flink/flink-docs-release-1.13/zh/docs/connectors/datastream/streamfile_sink/
//文件滚动策略
RollingPolicy rollingPolicy = DefaultRollingPolicy.builder()
.withMaxPartSize(1024 * 1024 * 1024)
.withInactivityInterval(TimeUnit.MINUTES.toMillis(5))
.withRolloverInterval(TimeUnit.MINUTES.toMillis(15))
.build();
//文件写到指定路径 FILE_SAVE_PATH
StreamingFileSink<String> fileSink = StreamingFileSink
.forRowFormat(new Path(FILE_SAVE_PATH), new SimpleStringEncoder<String>("UTF-8"))
.withBucketAssigner(new DateTimeBucketAssigner("yyyyMMdd"))//文件夹名称,hive建表读取文件路径
.withRollingPolicy(rollingPolicy)
.build();
//文件sink
dataStream.addSink(fileSink).name("文件输出hdfs").uid("sink to hdfs")
.setParallelism(10);
保存到hdfs 会存在2中文件形式
1:正在写入的文件
2:写入完成的文件
hsql读取hdfs文件, 根据文件夹名称获取指定数据
create external table if not exists tmp.table_name_${dt} (
column1 string,
column1 string,
column1 string,
column1 string,
column1 string,
column1 string
)
row format delimited fields terminated by '|' --数据分隔符
stored as orc
location '/user/hive/warehouse/flink/dir_name/${dt}/';
二:springboot
1:先将文件存储到本地
private void saveFile(List<String> list, String di) {
try {
File folder = new File(fileConfig.getLocalPath());//文件夹
if (!folder.exists()) {
folder.mkdirs();
}
String localFile = fileConfig.formatLocalPath(di);//文件,样例按分钟创建,可以自己选择时间跨度分隔文件大小
File file = new File(localFile);
if (!file.exists()) {
file.createNewFile();
}
FileWriter fileWriter = new FileWriter(file, true);//追加写入
BufferedWriter bufferedWriter = new BufferedWriter(fileWriter);
for (String data : list) {
bufferedWriter.write(data);
bufferedWriter.newLine();
}
bufferedWriter.flush();
bufferedWriter.close();
} catch (Exception e) {
e.printStackTrace();
}
}
2:本地文件
3:上传hdfs
//定时任务执行 .withSchedule(CronScheduleBuilder.cronSchedule("10 */1 * * * ?"))
//遍历本地文件并上传
public void process() {
try {
File folder = new File(localPath);
File[] files = folder.listFiles();
if (null != files && files.length > 0) {
for (File file : files) {
String fileName = file.getName();
String fileDt = fileName.substring(0, 8);
//上传2分钟前产生的文件
if (checkFileDone(fileName)) {
uploadFile(fileDt, file);
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
//上传
private void uploadFile(String dt, File file) {
try {
log.info("上传文件:[{}]", file.getName());
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(new URI(hdfsPath), configuration, "hive"); //hive 具有写入权限的用户名
//动态创建文件夹
fs.mkdirs(new Path(hdfsPath + dt));
//将本地文件上传到hdfs成功后,删除本地文件
fs.copyFromLocalFile(true, new Path(file.getPath()), new Path(hdfsPath + dt));
fs.close();
} catch (Exception e) {
e.printStackTrace();
}
}
//选择2分钟前产生的文件
private final static DateTimeFormatter dtf = DateTimeFormatter.ofPattern("yyyyMMddHHmm");
private boolean checkFileDone(String dateStr) {
try {
LocalDateTime target = LocalDateTime.parse(dateStr, dtf);
LocalDateTime current = LocalDateTime.now();
Duration duration = Duration.between(target, current);
if (duration.toMinutes() > 2) return true; else return false;
} catch (Exception e) {
e.printStackTrace();
}
return false;
}
4: hive读写同上