第一步:创建apachelog表(用于存放apache的访问日志:access_log)
CREATE TABLE apachelog (
host STRING,
identity STRING,
user_d STRING,
time STRING,
request STRING,
status STRING,
size STRING
)ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
"input.regex" = "([^ ]*) ([^ ]*) ([^ ]*) \[(.*)\] \"(.*)\" (-|[0-9]*) (-|[0-9]*)"
)
STORED AS TEXTFILE;
STORED AS TEXTFILE;
第二步:插入数据:load data local inpath "/usr/local/apache/logs/access_log" into table apachelog;