环境:hadoop-2.7.2,hive-1.2.1
RegexSerDe
所需jar包:{HIVE_HOME}/lib/hive-contrib-1.2.1.jar
<1>. 根据数据格式建表
CREATE TABLE if not exists dbname.tbl_name(
chexing STRING,
source_name STRING,
brand STRING,
brandid STRING,
model STRING,
chexingid STRING,
make STRING,
modelid STRING
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe'
WITH SERDEPROPERTIES ('input.regex' = '\\{"chexing": "(.*?)", "source_name": "(.*?)", "brand": "(.*?)", "brandid": "([0-9]+)", "model": "(.*?)", "chexingid": "([0-9]+)", "make": "(.*?)", "modelid": "([0-9]+)"\\}'
,'output.format.string' = '%1$s %2$s %3$s %4$s %5$s %6$s %7$s %8$s')
STORED AS TEXTFILE;
<2>. load data to hive
LOAD DATA LOCAL INPATH './filename.txt' OVERWRITE INTO TABLE dbname.tbl_name
JsonSerDe
所需jar包:{HIVE_HOME}/lib/json-serde-1.3.8-jar-with-dependencies.jar
<1>. 根据数据格式建表
CREATE TABLE dbname.tablename (
api_name string,
request_time string,
response_time string,
elapse_time string,
status int,
error_code int,
error_msg string,
`data` map<string,array<string>>,
query_info map<string,string>
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
STORED AS TEXTFILE;
<2>. load data to hive
LOAD DATA LOCAL INPATH './filename.txt' OVERWRITE INTO TABLE dbname.tablename;
Refercence: