建表
drop table dml.sina_blog_sentiment_result;
create external table dml.sina_blog_sentiment_result(
news_time string comment '文章发布时间',
url string comment '文章url',
author string comment '文章作者',
sentence_id string comment '句子id',
sentence string comment '句子',
is_advertisement string comment '广告识别,1是广告0不是广告',
is_bug string comment '问题识别,1有问题0没有问题',
predict_time string comment '预测时间',
predict_object string comment '预测对象',
predict_result string comment '预测多空',
likenum string comment '文章点赞量',
goldpannum string comment '文章赠金笔',
readnum string comment '文章阅读量',
collectnum string comment '文章收藏量',
reprintnum string comment '文章转载量',
type string comment '文章类型',
updatetime timestamp comment '更新时间'
) comment '博客多空情感判断'
partitioned BY (day string COMMENT '按日分区表字段')
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
STORED AS TEXTFILE
location '/dw/dml/sina_blog_sentiment_result';
更新
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions.pernode=150000;
set hive.exec.max.dynamic.partitions=150000;
set hive.exec.max.created.files=150000;
set hive.exec.compress.output=true;
set mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec;
insert overwrite table dml.sina_blog_sentiment_result partition(day)
select
news_time,
url,
author,
sentence_id,
sentence,
is_advertisement,
is_bug,
predict_time,
predict_object1,
predict_result,
likenum,
goldpannum,
readnum,
collectnum,
reprintnum,
type,
from_unixtime(unix_timestamp(),'yyyy-MM-dd HH:mm:ss') as updatetime,
regexp_replace(to_date(news_time),'-','') as day
from
(
select
news_time,
url,
author,
sentence_id,
sentence,
is_advertisement,
is_bug,
predict_time,
predict_object,
predict_result,
likenum,
goldpannum,
readnum,
collectnum,
reprintnum,
type
from default.sina_blog_sentiment_result
where is_advertisement='0' and is_bug='0'
group by news_time,url,author,sentence_id, sentence, is_advertisement,
is_bug, predict_time, predict_object, predict_result, likenum, goldpannum,readnum,collectnum,
reprintnum,type
)t
lateral view explode(split(predict_object,'-')) b AS predict_object1
order by 1;