hive 命令

本文详细介绍了使用Hive进行数据仓库建模的过程,包括创建多种类型的表如原始数据表、明细数据表及时间维度表,并通过SQL语句实现数据的清洗和加载。此外,还展示了如何从HDFS导入数据到Hive,以及如何利用LATERAL VIEW进行URL解析。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

create database if not exists dw_weblog;

use dw_weblog;

show tables;

drop table if exists ods_weblog_origin;
create table ods_weblog_origin(
valid string,
remote_addr string,
remote_user string,
time_local string,
request string,
status string,
body_bytes_sent string,
http_referer string,
http_user_agent string)
partitioned by (datestr string)
row format delimited
fields terminated by '\001';

drop table if exists ods_click_pageviews;
create table ods_click_pageviews(
Session string,
remote_addr string,
remote_user string,
time_local string,
request string,
visit_step string,
page_staylong string,
http_referer string,
http_user_agent string,
body_bytes_sent string,
status string)
partitioned by (datestr string)
row format delimited
fields terminated by '\001';


drop table if exist click_stream_visit;
create table click_stream_visit(
session     string,
remote_addr string,
inTime      string,
outTime     string,
inPage      string,
outPage     string,
referal     string,
pageVisits  int)
partitioned by (datestr string);



drop table dim_time if exists dim_time;
create table dim_time(
year string,
month string,
day string,
hour string)
row format delimited
fields terminated by ',';

 

 

从hdfs导入到hive

load data inpath 'hdfs://node5:9000//web_click_log/data/output/' overwrite into table ods_weblog_origin partition(datestr='2013-09-18');

show partitions ods_weblog_origin;
select count(*) from ods_weblog_origin;

 

create table t_ods_tmp_referurl as
SELECT a.*,b.*
FROM ods_weblog_origin a LATERAL VIEW parse_url_tuple(regexp_replace(http_referer, "\"", ""), 'HOST', 'PATH','QUERY', 'QUERY:id') b as host, path, query, query_id;

 

insert into table dw_weblog.ods_weblog_detail partition(datestr='2013-09-18')
select c.valid,c.remote_addr,c.remote_user,c.time_local,
substring(c.time_local,0,10) as daystr,
substring(c.time_local,12) as tmstr,
substring(c.time_local,6,2) as month,
substring(c.time_local,9,2) as day,
substring(c.time_local,11,3) as hour,
c.request,c.status,c.body_bytes_sent,c.http_referer,c.ref_host,c.ref_path,c.ref_query,c.ref_query_id,c.http_user_agent
from
(SELECT
a.valid,a.remote_addr,a.remote_user,a.time_local,
a.request,a.status,a.body_bytes_sent,a.http_referer,a.http_user_agent,b.ref_host,b.ref_path,b.ref_query,b.ref_query_id
FROM dw_weblog.ods_weblog_origin a LATERAL VIEW parse_url_tuple(regexp_replace(http_referer, "\"", ""), 'HOST', 'PATH','QUERY', 'QUERY:id') b as ref_host, ref_path, ref_query, ref_query_id) c

 

load data local inpath '/usr/local/web_click_log/data/other/v_time.dat' into table dim_time;

 

转载于:https://www.cnblogs.com/yaoyao66123/p/9381953.html

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值