hiveql--建表

本文介绍了如何使用HiveQL创建外部表、分区表以及序列化建表。示例包括创建分区的外部表,使用ORC格式和ZLIB压缩创建表,以及自定义解析类进行日志解析建表。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

创建hive表

1、建外表(分区pdate,\t作为字段分隔符,hdfs路径:path):

CREATE EXTERNAL TABLE tablename(
字段1 string,
字段2 string
)    
partitioned by (pdate string)
row format delimited fields terminated by '\t'
LOCATION 'path';

2、创建数据库database_test,创建表table_test(分区pdate、hour,压缩建表):

hive -e"create database if not exists database_test;"

hive -e "DROP TABLE IF EXISTS database_test.table_test";
sql="CREATE TABLE database_test.table_test
(
字段1 string
,字段2 string
)
PARTITIONED BY (
  pdate string,
  hour string)
STORED AS ORC tblproperties ('orc.compress'='ZLIB')";

hive -e "$sql";

3、序列化建表(将日志解析,为各个字段取相应的日志内容):

   首先需要写一个解析类,代码如下:


package net.csdn.hive.table_leiyf;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;

import net.sf.json.JSONObject;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;

public class TestDeserializer implements Deserializer {
private static List<String> FieldNames = new ArrayList<String>();
private static List<ObjectInspector> FieldNamesObjectInspectors = new ArrayList<ObjectInspector>();
static {
FieldNames.add("type");
FieldNamesObjectInspectors.add(ObjectInspectorFactory
.getReflectionObjectInspector(String.class,
ObjectInspectorOptions.JAVA));
FieldNames.add("userid");
FieldNamesObjectInspectors.add(ObjectInspectorFactory
.getReflectionObjectInspector(String.class,
ObjectInspectorOptions.JAVA));
FieldNames.add("wsid");
FieldNamesObjectInspectors.add(ObjectInspectorFactory
.getReflectionObjectInspector(String.class,
ObjectInspectorOptions.JAVA));
FieldNames.add("dt");
FieldNamesObjectInspectors.add(ObjectInspectorFactory
.getReflectionObjectInspector(String.class,
ObjectInspectorOptions.JAVA));
FieldNames.add("mem_disk");
FieldNamesObjectInspectors.add(ObjectInspectorFactory
.getReflectionObjectInspector(String.class,
ObjectInspectorOptions.JAVA));
FieldNames.add("cpu");
FieldNamesObjectInspectors.add(ObjectInspectorFactory
.getReflectionObjectInspector(String.class,
ObjectInspectorOptions.JAVA));
FieldNames.add("netrx");
FieldNamesObjectInspectors.add(ObjectInspectorFactory
.getReflectionObjectInspector(String.class,
ObjectInspectorOptions.JAVA));
FieldNames.add("nettx");
FieldNamesObjectInspectors.add(ObjectInspectorFactory
.getReflectionObjectInspector(String.class,
ObjectInspectorOptions.JAVA));
}

public Object deserialize(Writable blob) {
if (blob instanceof Text) {
String line = ((Text) blob).toString();
if (line == null)
return null;
Map<String, String> r = new HashMap<String, String>();
List<Object> result = new ArrayList<Object>();
JSONObject obj= JSONObject.fromObject(line);
if(obj != null && obj.containsKey("data")){
JSONObject objData = (JSONObject) obj.get("data");
if(objData.containsKey("type") && objData.get("type").toString().equalsIgnoreCase("mcn")){
result.add("mcn"); //type
if(objData.containsKey("userId"))
result.add(objData.getString("userId"));
if(objData.containsKey("wsId"))
result.add(objData.getString("wsId"));
if(objData.containsKey("date")){
Long timestamp = Long.parseLong(objData.getString("date"));
String dt = new java.text.SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new java.util.Date(timestamp));
result.add(dt);
}
if(objData.containsKey("mem"))
result.add(objData.getString("mem"));
if(objData.containsKey("cpu"))
result.add(objData.getString("cpu"));
if(objData.containsKey("netrx"))
result.add(objData.getString("netrx"));
if(objData.containsKey("nettx"))
result.add(objData.getString("nettx"));
}
else if(objData.containsKey("type") && objData.get("type").toString().equalsIgnoreCase("disk")){
if(objData.containsKey("type"))
result.add("disk");
if(objData.containsKey("userId"))
result.add(objData.getString("userId"));
if(objData.containsKey("wsId"))
result.add(objData.getString("wsId"));
if(objData.containsKey("date")){
Long timestamp = Long.parseLong(objData.getString("date"));
String dt = new java.text.SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new java.util.Date(timestamp));
result.add(dt);
}
if(objData.containsKey("disk"))
result.add(objData.getString("disk"));
result.add("");
result.add("");
result.add("");
}
}
return result;
}
return null;
}

public ObjectInspector getObjectInspector() throws SerDeException {
return ObjectInspectorFactory.getStandardStructObjectInspector(
FieldNames, FieldNamesObjectInspectors);
}

public void initialize(Configuration arg0, Properties arg1)
throws SerDeException {
}

public SerDeStats getSerDeStats() {
// TODO Auto-generated method stub
return null;
}

}


将上面打包,然后建表的时候rowformat用这个解析类TestDeserializer,语句如下:

add jar /path/test.jar;

drop table if exsits file_test;
create table file_test
partitioned by (pdate string)
row format serde 'net.csdn.hive.table.TestDeserializer'
LOCATION '/home/debug/test22'



评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值