impala中时间字段采用int96保存,常规时间如:2017-05-23 11:59:43.345717要保存为timestamp类型,则需要经过转换才能使用。采用Julian day来格式化时间,利用JdateTime生成nanotime然后转换为Binary保存到hdfs.NanoTimeUtils根据JdateTime生成nanotime
注意时区:不同时区生成的结果不同
/**
* 创建日期:2017-8-4
* 包路径:org.meter.parquet.NanoTimeUtils.java
* 创建者:meter
* 描述:
* 版权:copyright@2017 by meter !
*/
package org.meter.parquet;
import java.sql.Timestamp;
import java.util.Calendar;
import java.util.TimeZone;
import org.apache.parquet.example.data.simple.NanoTime;
import jodd.datetime.JDateTime;
/**
* @author meter
* 文件名:NanoTimeUtils
* @描述:NanoTime工具,用于保存parquet文件timestamp类型字段
*/
public class NanoTimeUtils {
static final long NANOS_PER_SECOND = 1000000000;
static final long SECONDS_PER_MINUTE = 60;
static final long MINUTES_PER_HOUR = 60;
private static final ThreadLocal<Calendar> parquetTsCalendar = new ThreadLocal<Calendar>();
private static Calendar getCalendar() {
// Calendar.getInstance calculates the current-time needlessly, so cache
// an instance.
if (parquetTsCalendar.get() == null) {
parquetTsCalendar.set(Calendar.getInstance(TimeZone
.getTimeZone("Asia/Shanghai")));
}
return parquetTsCalendar.get();
}
public static NanoTime getNanoTime(Timestamp ts) {
Calendar calendar = getCalendar();
calendar.setTime(ts);
JDateTime jDateTime = new JDateTime(calendar.get(Calendar.YEAR),
calendar.get(Calendar.MONTH) + 1, // java calendar index
// starting at 1.
calendar.get(Calendar.DAY_OF_MONTH));
int days = jDateTime.getJulianDayNumber();
long hour = calendar.get(Calendar.HOUR_OF_DAY);
long minute = calendar.get(Calendar.MINUTE);
long second = calendar.get(Calendar.SECOND);
long nanos = ts.getNanos();
long nanosOfDay = nanos + NANOS_PER_SECOND * second + NANOS_PER_SECOND
* SECONDS_PER_MINUTE * minute + NANOS_PER_SECOND
* SECONDS_PER_MINUTE * MINUTES_PER_HOUR * hour;
return new NanoTime(days, nanosOfDay);
}
public static NanoTime getNanoTime(String time){
Timestamp ts=Timestamp.valueOf(time);
Calendar calendar = getCalendar();
calendar.setTime(ts);
JDateTime jDateTime = new JDateTime(calendar.get(Calendar.YEAR),
calendar.get(Calendar.MONTH) + 1, // java calendar index
// starting at 1.
calendar.get(Calendar.DAY_OF_MONTH));
int days = jDateTime.getJulianDayNumber();
long hour = calendar.get(Calendar.HOUR_OF_DAY);
long minute = calendar.get(Calendar.MINUTE);
long second = calendar.get(Calendar.SECOND);
long nanos = ts.getNanos();
long nanosOfDay = nanos + NANOS_PER_SECOND * second + NANOS_PER_SECOND
* SECONDS_PER_MINUTE * minute + NANOS_PER_SECOND
* SECONDS_PER_MINUTE * MINUTES_PER_HOUR * hour;
return new NanoTime(days, nanosOfDay);
}
public static Timestamp getTimestamp(NanoTime nt) {
int julianDay = nt.getJulianDay();
long nanosOfDay = nt.getTimeOfDayNanos();
JDateTime jDateTime = new JDateTime((double) julianDay);
Calendar calendar = getCalendar();
calendar.set(Calendar.YEAR, jDateTime.getYear());
calendar.set(Calendar.MONTH, jDateTime.getMonth() - 1); // java calender
// index
// starting at
// 1.
calendar.set(Calendar.DAY_OF_MONTH, jDateTime.getDay());
long remainder = nanosOfDay;
int hour = (int) (remainder / (NANOS_PER_SECOND * SECONDS_PER_MINUTE * MINUTES_PER_HOUR));
remainder = remainder
% (NANOS_PER_SECOND * SECONDS_PER_MINUTE * MINUTES_PER_HOUR);
int minutes = (int) (remainder / (NANOS_PER_SECOND * SECONDS_PER_MINUTE));
remainder = remainder % (NANOS_PER_SECOND * SECONDS_PER_MINUTE);
int seconds = (int) (remainder / (NANOS_PER_SECOND));
long nanos = remainder % NANOS_PER_SECOND;
calendar.set(Calendar.HOUR_OF_DAY, hour);
calendar.set(Calendar.MINUTE, minutes);
calendar.set(Calendar.SECOND, seconds);
Timestamp ts = new Timestamp(calendar.getTimeInMillis());
ts.setNanos((int) nanos);
return ts;
}
public static void main(String[] args) {
getNanoTime("2017-05-23 11:59:43.345717");
}
}
测试类:写parquet格式文件,用于impala操作timestamp字段access_time;impala中timestamp字段类型保存为int96
/**
* 创建日期:2017-8-3
* 包路径:org.meter.parquet.ParquetWriteTimeStampDemo.java
* 创建者:meter
* 描述:
* 版权:copyright@2017 by meter !
*/
package org.meter.parquet;
import java.io.IOException;
import jodd.datetime.JDateTime;
import org.apache.hadoop.fs.Path;
import org.apache.parquet.column.ParquetProperties;
import org.apache.parquet.example.data.Group;
import org.apache.parquet.example.data.simple.SimpleGroupFactory;
import org.apache.parquet.hadoop.ParquetFileWriter;
import org.apache.parquet.hadoop.ParquetWriter;
import org.apache.parquet.hadoop.example.ExampleParquetWriter;
import org.apache.parquet.hadoop.metadata.CompressionCodecName;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.MessageTypeParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author meter
* 文件名:ParquetWriteTimeStampDemo
* @描述:
*/
public class ParquetWriteTimeStampDemo {
private static Logger logger = LoggerFactory
.getLogger(ParquetWriteTimeStampDemo.class);
private static String schemaStr = "message schema {"
+ "optional int64 log_id;" + "optional binary idc_id;"
+ "optional int64 house_id;" + "optional int64 src_ip_long;"
+ "optional int64 dest_ip_long;" + "optional int64 src_port;"
+ "optional int64 dest_port;" + "optional int32 protocol_type;"
+ "optional binary url64;" + "optional int96 access_time;}";
private static MessageType schema = MessageTypeParser
.parseMessageType(schemaStr);
private static SimpleGroupFactory groupFactory = new SimpleGroupFactory(
schema);
/**
* 创建时间:2017-8-3
* 创建者:meter
* 返回值类型:ParquetWriter
* @描述:初始化writer
* @param path
* @return
* @throws IOException
*/
private static ParquetWriter<Group> initWriter(String path) throws IOException {
Path file = new Path("file:///"+path);
ExampleParquetWriter.Builder builder = ExampleParquetWriter
.builder(file).withWriteMode(ParquetFileWriter.Mode.CREATE)
.withWriterVersion(ParquetProperties.WriterVersion.PARQUET_1_0)
.withCompressionCodec(CompressionCodecName.SNAPPY)
// .withConf(configuration)
.withType(schema);
/*
* file, new GroupWriteSupport(), CompressionCodecName.SNAPPY, 256 *
* 1024 * 1024, 1 * 1024 * 1024, 512, true, false,
* ParquetProperties.WriterVersion.PARQUET_1_0, conf
*/
return builder.build();
}
/**
* 创建时间:2017-8-3 创建者:meter 返回值类型:void
*
* @描述:
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
ParquetWriter<Group> writer = initWriter("C:\\Users\\meir\\Desktop\\linuxtetdir\\logtxt\\testTime0804.parq");
String[] access_log = { "111111", "22222", "33333", "44444", "55555",
"666666", "777777", "888888", "999999", "2017-05-23 11:59:43.345717" };
JDateTime time=new JDateTime("2017-05-23 11:59:43.345717");
int day=time.getDay();
for(int i=0;i<1000;i++){
writer.write(groupFactory.newGroup()
.append("log_id", Long.parseLong(access_log[0]))
.append("idc_id", access_log[1])
.append("house_id", Long.parseLong(access_log[2]))
.append("src_ip_long", Long.parseLong(access_log[3]))
.append("dest_ip_long", Long.parseLong(access_log[4]))
.append("src_port", Long.parseLong(access_log[5]))
.append("dest_port", Long.parseLong(access_log[6]))
.append("protocol_type", Integer.parseInt(access_log[7]))
.append("url64", access_log[8])
.append("access_time", NanoTimeUtils.getNanoTime(access_log[9]).toBinary()));
}
writer.close();
}
}