本文关键词:
- 实时ETL
- IP解析
- Blink–【阿里实时计算平台 Based on Flink】
前提:
之前的文章介绍了大数据离线平台的IP实时匹配,在那边文章中我说了将IP合并内容存储在ODPS【Hi一起来了解下大数据平台的IP解析吧】,是因为离线数据量一般很大,数据量增加之后离线任务启动的实例就很多,相当于全部的实例在同一时间去存储了IP合并内容的组件拉取数据,这个是危险的,所以不建议将IP内容存储在其他地方,而是就地存储ODPS,依赖于ODPS的内在资源分发机制来保证IP的的解析工作正常运行。
这里再次使用了IP内容表,但是是实时的ETL使用的,实时的数据量自然就没有离线那么大,所以运行的实例自然可以少一点,这样的话IP的存储组件就有了其他的不同选择。
这里我使用了OSS来进行存储IP数据。然后通过实时的UDF函数去解析【这里为什么使用UDF我在之前的文章中解释过,有兴趣的可以去看下,链接–>Hi一起来了解下大数据平台的IP解析吧】
IP信息上传OSS:
-
打开OSS-Browser【阿里云官网可以下载 链接–>OSS的使用】
-
将本地的IP合并内容直接拖拽上传即可
好了,离线文章基本已经解释了其他的相关信息,这里就直接上代码了
实现代码:
基于存储到内存:
package com.streamCompute.udx.IP.readOss.version02;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.*;
import java.util.concurrent.locks.ReentrantLock;
import com.aliyun.oss.OSS;
import com.aliyun.oss.OSSClientBuilder;
import com.aliyun.oss.model.GetObjectRequest;
import com.aliyun.oss.model.OSSObject;
import org.apache.flink.table.functions.FunctionContext;
import org.apache.flink.table.functions.ScalarFunction;
public class UDFGetIPCityInfo extends ScalarFunction {
//OSS配置
private InputStream in = null;
private static String END_POINT = "XXXXX";
private static String AK = "XXXX";
private static String SK = "XXXX";
private static String BUCKET_NAME = "ip";
private static String OBJECT_NAME = "ipInfo/ip_info.txt";
private static OSS ossClient;
//定义锁
private static ReentrantLock lock = new ReentrantLock();
//存储IP信息数据
private static ArrayList<IpDM> ipList = new ArrayList<IpDM>();
public static void loadIpInfo() {
lock.lock();
while(true) {
ArrayList<IpDM> ipTmp = new ArrayList<IpDM>();
ossClient = new OSSClientBuilder().build(END_POINT, AK, SK);
GetObjectRequest getObjectRequest = new GetObjectRequest(BUCKET_NAME, OBJECT_NAME);
getObjectRequest.setRange(0, 20971520); // 20M
//读取文件
OSSObject ossObject = ossClient.getObject(getObjectRequest);
InputStream stream = ossObject.getObjectContent();
BufferedReader reader = new BufferedReader(new InputStreamReader(stream));
//这里判断 如果ipInfos 已经完整了,那么不需要继续插入 最多299284
if(ipList.size()==299284){
//static变量 同JVM通用 只加载一次即可
break;
}
try{
String line = "";
while ((line = reader.readLine()) != null) {
String[] lines = line.split(",");
//处理数据
ipTmp.add(new IpDM(lines));
}
//Ip信息排序 使用二分查找
Collections.sort(ipTmp, new Comparator<IpDM>() {
@Override
public int compare(IpDM o1, IpDM o2) {
return Long.valueOf(o1.getIp_begin_long()).compareTo(
Long.valueOf(o2.getIp_begin_long()));
}
});
//如果临时存储不为空,那么将数据赋值给ipList
if(ipTmp!=null){
ipList = ipTmp;
}
}catch (Exception e){
e.printStackTrace();
}
System.out.println("The size of this list is " + ipList.size());
ossClient.shutdown(); // close connection
break;
}
lock.unlock();
}
public static IpDM getIpDM(String ip) {
long ipLong = ipDotDec2Long(ip);
if (ipLong < 0L)
return new IpDM();
int begin = 0;
int end = ipList.size() - 1;
while (begin <= end) {
int middle = (begin + end) / 2;
IpDM middleIpDM = ipList.get(middle);
if (middleIpDM == null)
break;
if (middleIpDM.getIp_begin_long() <= ipLong && middleIpDM
.getIp_end_long() >= ipLong)
return middleIpDM;
if (middleIpDM.getIp_begin_long() > ipLong) {
end = middle - 1;
continue;
}
if (middleIpDM.getIp_end_long() < ipLong)
begin = middle + 1;
}
return new IpDM();
}
private static long ipDotDec2Long(String ipDotDec) throws IllegalArgumentException {
ipDotDec = ipDotDec.trim();
String[] sa = ipDotDec.split("\\.");
if (sa.length != 4)
return -1L;
long[] ipLong = new long[4];
ipLong[0] = Long.parseLong(sa[0]);
if (ipLong[0] < 0L || ipLong[0] > 255L)
return -1L;
ipLong[1] = Long.parseLong(sa[1]);
if (ipLong[1] < 0L || ipLong[1] > 255L)
return -1L;
ipLong[2] = Long.parseLong(sa[2]);
if (ipLong[2] < 0L || ipLong[2] > 255L)
return -1L;
ipLong[3] = Long.parseLong(sa[3]);
if (ipLong[3] < 0L || ipLong[3] > 255L)
return -1L;
return (ipLong[0] << 24L) + (ipLong[1] << 16L) + (ipLong[2] << 8L) + ipLong[3];
}
public static class IpDM {
private long ip_begin_long = 0L;
private long ip_end_long = 0L;
private String country_name = "";
private String province_name = "";
public IpDM() {}
private String ipProcess(String src) {
if (src.equals("local"))
return "-1";
if (src.equals(""))
return "";
return src;
}
public IpDM(String[] info) {
this.ip_begin_long = Long.valueOf((info[0]));
this.ip_end_long = Long.valueOf(info[1]);
this.country_name = ipProcess(info[2]);
this.province_name = ipProcess(info[3]);
}
public long getIp_begin_long() {
return this.ip_begin_long;
}
public long getIp_end_long() {
return this.ip_end_long;
}
public String getCountry_name() {
return this.country_name;
}
public String getProvince_name() {
return this.province_name;
}
}
private String convNull(String text) {
if (text == null)
return "";
return text;
}
public void open(FunctionContext context) {
loadIpInfo();
}
public String eval(String ip, String flag) {
if (ip == null || flag == null || "".equals(ip) || "".equals(flag))
return null;
IpDM ipEntry = getIpDM(ip);
if (ipEntry != null) {
if ("country".equals(flag))
return ipEntry.getCountry_name();
if ("province".equals(flag))
return ipEntry.getProvince_name();
if ("all".equals(flag))
return convNull(ipEntry.getCountry_name()) + "," +
convNull(ipEntry.getProvince_name());
if ("range".equals(flag))
return ipEntry.getIp_begin_long() + "-" + ipEntry.getIp_end_long();
return null;
}
return null;
}
public void close() {
ossClient.shutdown();//close connection
}
public static void main(String[] args) {
UDFGetIPCityInfo demo = new UDFGetIPCityInfo();
demo.open(null);
System.out.println(demo.eval("12.12.12.12","all"));
}
}
基于存储在本地:
package com.streamCompute.udx.IP.readOss;
import com.aliyun.oss.OSS;
import com.aliyun.oss.OSSClientBuilder;
import com.aliyun.oss.model.GetObjectRequest;
import org.apache.flink.table.functions.FunctionContext;
import org.apache.flink.table.functions.ScalarFunction;
import java.io.*;
import java.text.SimpleDateFormat;
import java.util.LinkedList;
import java.util.Collections;
import java.util.concurrent.locks.ReentrantLock;
/**
* @ClassName GetIpInfoIntoMemory
* @Description TODO
* @Author Yun
* @Date 2021/12/21 6:06 下午
* @Version 1.0
**/
public class GetIpInfoFromDisk extends ScalarFunction {
private static ReentrantLock lock = new ReentrantLock();
private InputStream in = null;
private static String END_POINT = "XXXX";
private static String AK = "XXXX";
private static String SK = "XXXX";
private static String BUCKET_NAME = "ip";
private static String OBJECT_NAME = "ip_info.txt";
private static OSS ossClient;
private static String FILE_NAME = "ip_info.txt";
private static SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd HHmmss");
private static LinkedList<IPCityEntry> ipInfos = new LinkedList<>();
@Override
public void open(FunctionContext context) throws Exception {
lock.lock();
while(true){
ossClient = new OSSClientBuilder().build(END_POINT, AK, SK);
//本地文件
File file = new File(getProjectPath()+"/"+FILE_NAME);
if(!file.exists()){
// 下载文件到本地
ossClient.getObject(new GetObjectRequest(BUCKET_NAME, OBJECT_NAME), new File(FILE_NAME));
}
BufferedReader reader_02 = new BufferedReader(new FileReader(file));
String line = "";
while ((line=reader_02.readLine())!=null) {
String[] infos = line.split(",");
//处理数据
IPCityEntry entry = new IPCityEntry(Long.valueOf(infos[0]), Long.valueOf(infos[1]), infos[2], infos[3]);
if(entry!=null && entry.toString().length()>0){
ipInfos.add(entry);
}
}
System.out.println("The size of this list is "+ipInfos.size());
//TODO 自定义实现排序 过滤一些空数据
Collections.sort(ipInfos);
Thread.sleep(60000);
ossClient.shutdown();
break;
}
lock.unlock();
}
public String eval(String ip) throws IOException {
if (ip == null || "".equals(ip) )
return null;
return getInfoByIp(ip);
}
/**
* 获取当前路径
* @return
* @throws IOException
*/
public String getProjectPath() throws IOException {
File file = new File("");
String courseFile = file.getCanonicalPath();
return courseFile;
}
public static String getInfoByIp(String ip) {
long convertIP = convertIP(ip);
if (convertIP == -1L)
return "empty";
int start = 0;
int end = ipInfos.size() - 1;
while (start <= end) {
int mid = (start + end) / 2;
IPCityEntry entry = ipInfos.get(mid);
//如果为空 直接返回
if(entry==null){
return "empty";
}
int range = entry.getIpRange(convertIP);
if (range == 0)
return entry.toString();
if (range < 0) {
end = mid - 1;
continue;
}
start = mid + 1;
}
return "empty";
}
public static long convertIP(String ipStr) {
ipStr = ipStr.trim();
String[] sa = ipStr.split("\\.");
if (sa.length != 4)
return -1L;
long[] ipLong = new long[4];
ipLong[0] = Long.parseLong(sa[0]);
if (ipLong[0] < 0L || ipLong[0] > 255L)
return -1L;
ipLong[1] = Long.parseLong(sa[1]);
if (ipLong[1] < 0L || ipLong[1] > 255L)
return -1L;
ipLong[2] = Long.parseLong(sa[2]);
if (ipLong[2] < 0L || ipLong[2] > 255L)
return -1L;
ipLong[3] = Long.parseLong(sa[3]);
if (ipLong[3] < 0L || ipLong[3] > 255L)
return -1L;
return (ipLong[0] << 24L) + (ipLong[1] << 16L) + (ipLong[2] << 8L) + ipLong[3];
}
public static class IPCityEntry implements Comparable<IPCityEntry> {
private long startIP;
private long endIP;
private String country;
private String province;
private LinkedList<IPCityEntry> ipInfos;
private IPCityEntry() {}
public IPCityEntry(long startIP, long endIP, String country, String province) {
this.startIP = startIP;
this.endIP = endIP;
this.country = country;
this.province = province;
}
/**
* 当前对象与后一个对象进行比较,如果比较结果为1进行交换,其他不进行交换。
* 当后一个对象比当前对象大,返回结果值为1时,前后交换,说明是倒序排列。
* 当后一个对象比当前对象小,返回结果值为1时,前后交换,说明是升序排列。
* @param
* @return
*/
@Override
public int compareTo(IPCityEntry o) {
if(this.startIP==o.startIP){
return 0;
}
if(this.startIP>o.startIP){
return 1;
}
if(this.startIP<o.startIP){
return -1;
}
return 0;
}
public int getIpRange(long ip) {
if (ip >= this.startIP && ip <= this.endIP)
return 0;
return (ip > this.endIP) ? 1 : -1;
}
public long getStartIP() {
return this.startIP;
}
public long getEndIP() {
return this.endIP;
}
public void setStartIP(long startIP) {
this.startIP = startIP;
}
public void setEndIP(long endIP) {
this.endIP = endIP;
}
public String getCountry() {
return this.country;
}
public void setCountry(String country) {
this.country = country;
}
public String getProvince() {
return this.province;
}
public void setProvince(String province) {
this.province = province;
}
public String toString() {
return this.country + "###" + this.province;
}
}
public static void main(String[] args) throws Exception {
GetIpInfoFromDisk demo = new GetIpInfoFromDisk();
demo.open(null);
System.out.println(demo.eval("12.12.12.12"));
}
}
打包上传:
打包就是普通的打包形式,然后上传到Blink界面,查询以及使用基本和离线是一样的。
有兴趣的可以看下文档:链接–>blink资源和函数使用