纯真ip地址库解析hive udf实现
纯真IP地址库qqwry.dat解析代码https://github.com/difeng/qqwry
hive udf实现,基于上述代码实现。利用该udf函数,方便做数据分析。
pom.xml中添加hive,hadoop相关依赖
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>1.2.1</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.3</version>
</dependency>
package common.udf.qqwry2;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.UDF;
import java.io.*;
import java.net.URI;
public class IPLocation extends UDF {
private static Configuration configuration;
private static FileSystem fileSystem;
private static InputStream in;
private static byte[] data;
private long firstIndexOffset;
private long lastIndexOffset;
private long totalIndexCount;
private static final byte REDIRECT_MODE_1 = 0x01;
private static final byte REDIRECT_MODE_2 = 0x02;
static final long IP_RECORD_LENGTH = 7;
private static Long lastModifyTime = 0L;
public static boolean enableFileWatch = false;
static {
try {
configuration = new Configuration();
fileSystem = FileSystem.get(URI.create("hdfs:///data/qqwry.dat"), configuration);
in = fileSystem.open(new Path("hdfs:///data/qqwry.dat"));
ByteArrayOutputStream out = null;
out = new ByteAr