hive sql 语句学习

use liuliangyuan680_ind;
--drop table llid;
create table llid(insurance_no string,group_no int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE; 


load data  inpath '/apps-data/tmp/gbd-ide/hive-liuliangyuan680/file/id.txt'
into table liuliangyuan680_ind.llid;


select * from liuliangyuan680_ind.llid;


select count(insurance_no) from liuliangyuan680_ind.llid;






SELECT * from liuliangyuan680_ind.llid a , liuliangyuan680_ind.rxw_xmsb_djmxypb b where a.insurance_no =b.insurance_no;


select * from liuliangyuan680_ind.rxw_xmsb_djmxypb;










use liuliangyuan680_ind;
--drop table summary3month;
CREATE table summary3month AS
select insurance_no, sum( case when visit_type='1' and out_hosp_date is not null and in_hosp_date is not null then  (datediff(out_hosp_date,in_hosp_date)+1) else 0 end) hosp_days, 
--总频次
COUNT(DISTINCT(case when visit_type='1' then visit_no else null end)) hosp_freq,
COUNT(DISTINCT(case when visit_type='2' then visit_no else null end)) visit_freq,
COUNT(DISTINCT(case when diagnose_code is not null then diagnose_code else null end)) diagnose_num,COUNT(distinct medical_org_code) as org_num,count(DISTINCT bill_no) as bill_num,count(DISTINCT bill_detail_no) as detail_num,
--看医生数,看病月数
count (distinct doctor_code) as doctor_num,
count (distinct month(in_hosp_date)) as month_num,
--住院药品数,诊疗项目数,服务设施数,总金额,总赔付
count (distinct (case when (visit_type="1" and project_type="1") then project_name else null end)) as hosp_medicine_num,
count (distinct (case when (visit_type="1" and project_type="2") then project_name else null end)) as hosp_check_num,
count (distinct (case when (visit_type="1" and project_type="3") then project_name else null end)) as hosp_service_num,
sum(case when visit_type='1' then mx_sum_amount else null end) as hosp_sum_amount,
sum(case when visit_type='1' then mx_apply_pay_amount else null end) as hosp_apply_amount,


--门诊药品数,诊疗项目数,服务设施数,总金额,总赔付
count (distinct (case when (visit_type="2" and project_type="1") then project_name else null end)) as visit_medicine_num,
count (distinct (case when (visit_type="2" and project_type="1") then bill_detail_no else null end)) as visit_medicine_freq,
count (distinct (case when (visit_type="2" and project_type="2") then project_name else null end)) as visit_check_num,
count (distinct (case when (visit_type="2" and project_type="2") then bill_detail_no else null end)) as visit_check_freq,
count (distinct (case when (visit_type="2" and project_type="3") then project_name else null end)) as visit_service_num,
count (distinct (case when (visit_type="2" and project_type="3") then bill_detail_no else null end)) as visit_service_freq,
sum(case when visit_type='2' then mx_sum_amount else null end) as visit_sum_amount,
sum(case when visit_type='2' then mx_apply_pay_amount else null end) as visit_apply_amount


from liuliangyuan680_ind.rxw_xmsb_djmxypb 


where (visit_type = '1' and 
       year(in_hosp_date) ="2014" ) or 
       (visit_type = '2' and year(in_hosp_date) ="2014" ) 
group BY insurance_no;








use liuliangyuan680_ind;
--drop table summary_full_201410;
CREATE table summary_full_201410 AS
select insurance_no, sum( case when visit_type='1' and out_hosp_date is not null and in_hosp_date is not null then  (datediff(out_hosp_date,in_hosp_date)+1) else 0 end) hosp_days,
COUNT(DISTINCT(case when visit_type='1' then visit_no else null end)) hosp_freq,
COUNT(DISTINCT(case when visit_type='2' then visit_no else null end)) visit_freq,
COUNT(DISTINCT(case when diagnose_code is not null then diagnose_code else null end)) diagnose_num,COUNT(distinct medical_org_code) as org_num,count(DISTINCT bill_no) as bill_num,count(DISTINCT bill_detail_no) as detail_num,
--看医生数
count (distinct doctor_code) as doctor_num,


--住院药品数,诊疗项目数,服务设施数,总金额,总赔付
count (distinct (case when (visit_type="1" and project_type="1") then project_name else null end)) as hosp_medicine_num,
count (distinct (case when (visit_type="1" and project_type="2") then project_name else null end)) as hosp_check_num,
count (distinct (case when (visit_type="1" and project_type="3") then project_name else null end)) as hosp_service_num,
sum(case when visit_type='1' then mx_sum_amount else null end) as hosp_sum_amount,
sum(case when visit_type='1' then mx_apply_pay_amount else null end) as hosp_apply_amount,


--门诊药品数,诊疗项目数,服务设施数,总金额,总赔付
count (distinct (case when (visit_type="2" and project_type="1") then project_name else null end)) as visit_medicine_num,
count (distinct case when (visit_type="2" and project_type="1") then bill_detail_no else null end)) as visit_medicine_freq,
count (distinct (case when (visit_type="2" and project_type="2") then project_name else null end)) as visit_check_num,
count (distinct (case when (visit_type="2" and project_type="2") then bill_detail_no else null end)) as visit_check_freq,
count (distinct (case when (visit_type="2" and project_type="3") then project_name else null end)) as visit_service_num,
count (distinct (case when (visit_type="2" and project_type="3") then bill_detail_no else null end)) as visit_service_freq,
sum(case when visit_type='2' then mx_sum_amount else null end) as visit_sum_amount,
sum(case when visit_type='2' then mx_apply_pay_amount else null end) as visit_apply_amount
from liuliangyuan680_ind.rxw_xmsb_djmxypb 
where (visit_type = '1' and 
        year(in_hosp_date) ="2014" and 
        month(in_hosp_date)="11") or 
        (visit_type = '2' and year(in_hosp_date) ="2014" and 
        month(in_hosp_date)="11") 
group BY insurance_no;
















use liuliangyuan680_ind;
--drop table sefinal;
create table sefinal as SELECT
b.insurance_no,
b.medical_org_name,
b.medical_dept_name,
b.doctor_code
from liuliangyuan680_ind.llid a , liuliangyuan680_ind.rxw_xmsb_djmxypb b where a.insurance_no =b.insurance_no;


select * from liuliangyuan680_ind.sefinal;










insert overwrite local directory '/home/wyp/wyp' select * from liuliangyuan680_ind.selfinal; 




















































use liuliangyuan680_ind;
drop table llid;
create table llid(insurance_no string, group_no int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE; 


load data  inpath '/apps-data/tmp/gbd-ide/hive-liuliangyuan680/file/id.txt'
into table liuliangyuan680_ind.llid;


select * from liuliangyuan680_ind.llid;


select count(insurance_no) from liuliangyuan680_ind.llid;






use liuliangyuan680_ind;
--drop table sefinal;
create table sefinal as SELECT
a.group_no,
b.insurance_no,
b.diagnose_code,
b.project_name,
b.medical_org_name,
b.medical_dept_name,
b.doctor_code
from liuliangyuan680_ind.llid a , liuliangyuan680_ind.rxw_xmsb_djmxypb b where a.insurance_no =b.insurance_no;




use liuliangyuan680_ind;
--drop table sefinal;
create table selfinal as SELECT
a.group_no,
b.insurance_no,
b.diagnose_code,
b.project_name,
b.medical_org_name,
b.medical_dept_name,
b.doctor_code
from liuliangyuan680_ind.llid a join liuliangyuan680_ind.rxw_xmsb_djmxypb b on ( a.insurance_no =b.insurance_no);




select * from liuliangyuan680_ind.sefinal;


select count(DISTINCT insurance_no) from liuliangyuan680_ind.sefinal;






---社保号个数1484,群数10
select count(DISTINCT insurance_no) from liuliangyuan680_ind.selfinal;
select count(DISTINCT group_no) from liuliangyuan680_ind.selfinal;




select  medical_org_name,COUNT(medical_org_name) as time1 FROM liuliangyuan680_ind.selfinal a WHERE
a.group_no=1 group by medical_org_name order by time1 ;




select  medical_org_name,COUNT(medical_org_name) FROM liuliangyuan680_ind.selfinal  
group by group_no and medical_org_name;


select  medical_org_name, COUNT(medical_org_name) FROM liuliangyuan680_ind.selfinal
where group_no=1;















1.上传tar包 2.解压 tar -zxvf hive-1.2.1.tar.gz 3.安装mysql数据库 推荐yum 在线安装 4.配置hive (a)配置HIVE_HOME环境变量 vi conf/hive-env.sh 配置其中的$hadoop_home (b)配置元数据库信息 vi hive-site.xml 添加如下内容: javax.jdo.option.ConnectionURL jdbc:mysql://localhost:3306/hive?createDatabaseIfNotExist=true JDBC connect string for a JDBC metastore javax.jdo.option.ConnectionDriverName com.mysql.jdbc.Driver Driver class name for a JDBC metastore javax.jdo.option.ConnectionUserName root username to use against metastore database javax.jdo.option.ConnectionPassword hadoop password to use against metastore database 5.安装hive和mysq完成后,将mysql的连接jar包拷贝到$HIVE_HOME/lib目录下 如果出现没有权限的问题,在mysql授权(在安装mysql的机器上执行) mysql -uroot -p #(执行下面的语句 *.*:所有库下的所有表 %:任何IP地址或主机都可以连接) GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' IDENTIFIED BY 'root' WITH GRANT OPTION; FLUSH PRIVILEGES; 6. Jline包版本不一致的问题,需要拷贝hive的lib目录中jline.2.12.jar的jar包替换掉hadoop中的 /home/hadoop/app/hadoop-2.6.4/share/hadoop/yarn/lib/jline-0.9.94.jar 启动hive bin/hive ---------------------------------------------------------------------------------------------------- Hive几种使用方式: 1.Hive交互shell bin/hive 2.Hive JDBC服务(参考java jdbc连接mysql) 3.hive启动为一个服务器,来对外提供服务 bin/hiveserver2 nohup bin/hiveserver2 1>/var/log/hiveserver.log 2>/var/log/hiveserver.err & 启动成功后,可以在别的节点上用beeline去连接 bin/beeline -u jdbc:hive2://mini1:10000 -n root 或者 bin/beeline ! connect jdbc:hive2://mini1:10000 4.Hive命令 hive -e ‘sql’ bin/hive -e 'select * from t_test'
### Hive SQL 语句教程与使用示例 Hive 是基于 Hadoop 的一个数据仓库工具,能够将结构化数据文件映射为一张数据库表,并提供类 SQL 查询功能。Hive SQLHive 提供的查询语言,用于对存储在 Hadoop 中的数据进行查询、分析和处理。以下是一些常见的 Hive SQL 语句使用示例,涵盖表操作、数据加载、查询、修改和删除等场景。 #### 1. 创建表 Hive 中可以使用 `CREATE TABLE` 语句来创建一张新表,也可以使用 `CREATE TABLE ... LIKE` 语法复制已有表的结构。 ```sql -- 创建一张名为 student 的表 CREATE TABLE student ( id INT, name STRING, age INT ) ROW FORMAT DELIMITED FIELDS TERMINATED BY ','; -- 创建一张与 student 结构相同的表 student2 CREATE TABLE student2 LIKE student; ``` #### 2. 加载数据到表中 使用 `LOAD DATA` 命令可以将本地或 HDFS 上的数据文件加载到 Hive 表中。 ```sql -- 将本地路径 /data/student.csv 的数据加载到 student 表中 LOAD DATA LOCAL INPATH '/data/student.csv' INTO TABLE student; -- 将 HDFS 路径 /user/hive/input/student.csv 的数据加载到 student 表中 LOAD DATA INPATH '/user/hive/input/student.csv' INTO TABLE student; ``` #### 3. 修改表结构 Hive 支持多种表结构修改操作,包括重命名表、修改列名、调整列顺序和添加新列。 ```sql -- 将 student 表重命名为 student_new ALTER TABLE student RENAME TO student_new; -- 修改字段 id 的名称为 id_new,并将其数据类型设置为 INT ALTER TABLE student_new CHANGE id id_new INT; -- 将字段 id_new 移动到 name 字段之后 ALTER TABLE student_new CHANGE id_new id_new INT AFTER name; -- 添加一个新字段 score,数据类型为 INT ALTER TABLE student_new ADD COLUMNS (score INT COMMENT '1-100'); ``` #### 4. 查询数据 Hive SQL 支持标准的 SQL 查询语法,包括 `SELECT`、`WHERE`、`GROUP BY` 和 `JOIN` 等。 ```sql -- 查询 student_new 表中的所有记录 SELECT * FROM student_new; -- 按照 name 字段进行分组,并统计每个名字出现的次数 SELECT name, COUNT(*) AS count FROM student_new GROUP BY name; -- 查询 score 大于 80 的记录 SELECT * FROM student_new WHERE score > 80; ``` #### 5. 清空表或删除表 可以使用 `TRUNCATE` 或 `DROP` 语句来清空或删除表。 ```sql -- 清空 student_new 表中的所有数据 TRUNCATE TABLE student_new; -- 删除 student_new 表及其在 HDFS 上的数据(如果是内部表) DROP TABLE IF EXISTS student_new; ``` #### 6. 查看表信息 Hive 提供了多种方式来查看表的元数据信息,例如表结构、分区信息等。 ```sql -- 查看 student 表的建表语句 SHOW CREATE TABLE student; -- 查看 student 表的字段信息 DESCRIBE student; ``` #### 7. 复杂数据类型处理 Hive 支持复杂数据类型如 `ARRAY` 和 `MAP`,并提供了相应的函数来解析这些数据。 ```sql -- 解析 ARRAY 类型数据 SELECT explode(array('A', 'B', 'C')) AS element; -- 解析 MAP 类型数据 SELECT explode(map('A', 10, 'B', 20, 'C', 30)) AS (key, value); ``` #### 8. 分区表操作 Hive 支持分区表,可以按照某些字段(如时间)对数据进行分区,提升查询效率。 ```sql -- 创建一个按月份分区的表 CREATE TABLE sales ( product STRING, amount DOUBLE ) PARTITIONED BY (month STRING); -- 加载数据到指定分区 LOAD DATA LOCAL INPATH '/data/sales_oct.csv' INTO TABLE sales PARTITION (month='202310'); -- 查询特定分区的数据 SELECT * FROM sales WHERE month = '202310'; ``` #### 9. 多维度统计分析 Hive 常用于数据仓库中的多维度统计分析,例如按时间维度统计 PV 总量等。 ```sql -- 按时间维度统计 PV 总量 SELECT date, COUNT(*) AS pv_total FROM web_log GROUP BY date; ```
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值