HiveQL：数据定义

最新推荐文章于 2024-02-21 14:53:49 发布

hxhh

最新推荐文章于 2024-02-21 14:53:49 发布

阅读量515

点赞数

分类专栏： Hive 文章标签： hive语句操作数据库表的各种操作HQL

本文链接：https://blog.youkuaiyun.com/qq403580298/article/details/84886222

版权

Hive 专栏收录该内容

15 篇文章

订阅专栏

–查看数据库s

show databases;

–创建数据库

create database human_resources;

–查找开头为h的所有数据库

show database like 'h.*';

–创建数据库，并指定位置

create database financials
location 'my/dir'

–创建数据库。并添加说明

create database financials
comment 'Holds all financials tables';

–查看financials数据库的描述

describe database financials;

–创建数据库，并添加’键-值’的说明

create database financials
with DBPROPERTIES ('creator' = 'Hxh', 'date' = '2010-10-10');

–使用financials数据库

use financials;

–查看当前正在使用的数据库

set hive.cli.print.current.db=true;

–删除数据库,如果该数据库存在（默认内部有表，会删除失败）

drop database if exists financials;

–删除数据库中的表，再删除数据库（CASCADE：小瀑布）

drop database if exists financials CASCADE;

–修改数据库的描述（不可修改元数据信息：库名、位置）

alter database financials set DBPROPERTIES('edited-by'='hugh')

–创建一个雇员表
–(/user/hive/warehouse/ 是默认的’数据仓库’路径地址)
–Hive会自动增加两个表属性:last_modified_by,last_modified_time

create table if not exists mydb.employees(
	name		 string comment 'Employee name',
	salary		 float comment 'Employee salary',
	sybordinates Array<string> comment 'Names of subordinates',
	deductions	 Map<string,float>
				 comment 'Keys are deductions names,values are percentages',
	address		 struct<street:string,city:string,state:string,zip,int>
				 comment 'home address')
comment 'Description of the table'
tabproperties ('creator'='me','created_at'='2010-10-10')
location '/user/hive/warehouse/mydb.db/employees';

–拷贝一个表

create table if not exists mydb.employees2
like mydb.employees;

–使用一个表

use mydb;
show tables;
use employees;

–查看详细表结构信息

describe extended mydb.employees;

–查看某一列的信息

describe mydb.employees.salary；

–创建外部表

create external table if not exists stocks(
	exchange	string,
	symbol		string,
	ymd			string,
	price_open	float,
	price_high	float,
	price_low	float,
	price_close float,
	volume		int,
	price_adj_close float)
row format delimited fields terminated by ','
location '/data/stocks'

–复制外部表（不会复制数据）

create external table if not exists mydb.employees3
like mydb.employees
location '/path/to/data';

–创建分区表(分区表是为了更快的查询)

create table employees(
	name		 string comment 'Employee name',
	salary		 float comment 'Employee salary',
	sybordinates Array<string> comment 'Names of subordinates',
	deductions	 Map<string,float>
				 comment 'Keys are deductions names,values are percentages',
	address		 struct<street:string,city:string,state:string,zip,int>
)
partitioned by (country string, state string)

–反映分区结构的子目录

hdfs://master_server/user/hive/warehouse/mydb.db/employees/
.../employees/country=CA/state=AB
.../employees/country=CA/state=BC
...
.../employees/country=US/state=LA
.../employees/country=US/state=AK

–查询分区表中，美国，伊利诺斯州的所有雇员

select * from employees
where country = 'US' and state = 'IL';

–开启严格模式(默认)(如果对分区表进行查询，而where子句没有加分区过滤，会报错)

set hive.mapred.mode=strict;

–开启’非严格’模式

set hive.mapred.mode=nostrict;

–查看表中存在的分区

show partitions employees;

–查看表中的分区(具体)

show partitions employees partition(country='US');

–显示分区键

describe extended employees;

–创建外部分区表(共享、优化查询)(适合日志分析)(即使表被删除，数据也不会被删除)
–partition 分区
–row format 行格式
–delimited 划界，限制
–terminated 结束(分隔符)

create external table if not exists log_messages(
	hms			int,
	severity	string,
	server		string,
	process_id	int,
	message		string)
partitioned by (year int,month int,day int)
row format delimited fields terminated by '\t';

–查看外部表分区

show partitions log_messages;

–查看表的描述(分区键也会被输出)

describe extended log_messages;

–分区数据实际所在路径

describe extended log_messages partition (year=2012, month=1, day=2);

–自定义表存储格式:stored as textfile(同时指定各种分隔符)

create table employees(
	name		 string,
	salary		 float,
	sybordinates Array<string>,
	deductions	 Map<string,float>,
	address		 struct<street:string,city:string,state:string,zip,int>
)
row format delimited
fields terminated by '\001'
collection items terminated by '\002'
map keys terminated by '\003'
lines terminated by '\n'
stored as textfile;

–指定输入输出格式(stored as xxx)

create table kst
partitioned by (ds string)
row format serde 'com.linkedin.haivvreo.AvroSerDe'
with serdeproperties ('schema.url'='http://schema_provider/kst.avsc')
stored as 
inputformat 'com.linkedin.haivvreo.AvroContainerInputFormat'
outputformat 'com.linkedin.haivvreo.AvroContainerOutputFormat'

–查看表的输入输出格式

describe extended kst

–删除表(如是外部表，表的元数据信息会被删除，表中的数据不会被删除)

drop table if exists employees;

–表重命名

alter table log_messages rename to logmsgs;

–表增加分区(v0.8.0之后版本可以同时增加一个分区)

alter table log_messages add if not exists
partition (year = 2011, month = 1,day = 1) location 'logs/2011/01/01'
partition (year = 2011, month = 1,day = 2) location 'logs/2011/01/02'
partition (year = 2011, month = 1,day = 3) location 'logs/2011/01/03'
...;

–修改分区(移动位置即可)

alter table log_messages partition(year=2011, month=12, day = 2)
set location 's3n://orbucket.logs/2011/01/02'

–删除某个分区(drop)

alter table log_messages drop id exists partition(year = 2011,mouth = 12,day = 2);

–修改列信息(change)

alter table log_messages
change column hms hours_minutes_seconds INT
comment 'the hours,minutes,and seconds part of the timestamp'
after serverity;

–增加列

alter table log_messages add columns (
	app_name string comment 'Application name',
	session_id long comment 'the current session id');
)

–删除或者替换列(replace)

alter table log_messages replace columns (
	hours_mins_secs int 
)

–修改表属性(只能修改，无法删除)

alter table log_messages set tabproperties (
	'notes' = 'The process id is no longer captured ; this column is always NULL'
);

–修改存储属性(改为sequencefile)

alter table log_messages
partition(year = 2012, month = 1, day = 1)
set fileformat sequencefile;

–各种操作’钩子’技巧(touch)???

alter table log_messages touch
partition(year = 2012, month = 1, day = 1)

–参考<<Hive权威指南>>