–查看数据库s
show databases;
–创建数据库
create database human_resources;
–查找开头为h的所有数据库
show database like 'h.*';
–创建数据库,并指定位置
create database financials
location 'my/dir'
–创建数据库。并添加说明
create database financials
comment 'Holds all financials tables';
–查看financials数据库的描述
describe database financials;
–创建数据库,并添加’键-值’的说明
create database financials
with DBPROPERTIES ('creator' = 'Hxh', 'date' = '2010-10-10');
–使用financials数据库
use financials;
–查看当前正在使用的数据库
set hive.cli.print.current.db=true;
–删除数据库,如果该数据库存在(默认内部有表,会删除失败)
drop database if exists financials;
–删除数据库中的表,再删除数据库(CASCADE:小瀑布)
drop database if exists financials CASCADE;
–修改数据库的描述(不可修改元数据信息:库名、位置)
alter database financials set DBPROPERTIES('edited-by'='hugh')
–创建一个雇员表
–(/user/hive/warehouse/ 是默认的’数据仓库’路径地址)
–Hive会自动增加两个表属性:last_modified_by,last_modified_time
create table if not exists mydb.employees(
name string comment 'Employee name',
salary float comment 'Employee salary',
sybordinates Array<string> comment 'Names of subordinates',
deductions Map<string,float>
comment 'Keys are deductions names,values are percentages',
address struct<street:string,city:string,state:string,zip,int>
comment 'home address')
comment 'Description of the table'
tabproperties ('creator'='me','created_at'='2010-10-10')
location '/user/hive/warehouse/mydb.db/employees';
–拷贝一个表
create table if not exists mydb.employees2
like mydb.employees;
–使用一个表
use mydb;
show tables;
use employees;
–查看详细表结构信息
describe extended mydb.employees;
–查看某一列的信息
describe mydb.employees.salary;
–创建外部表
create external table if not exists stocks(
exchange string,
symbol string,
ymd string,
price_open float,
price_high float,
price_low float,
price_close float,
volume int,
price_adj_close float)
row format delimited fields terminated by ','
location '/data/stocks'
–复制外部表(不会复制数据)
create external table if not exists mydb.employees3
like mydb.employees
location '/path/to/data';
–创建分区表(分区表是为了更快的查询)
create table employees(
name string comment 'Employee name',
salary float comment 'Employee salary',
sybordinates Array<string> comment 'Names of subordinates',
deductions Map<string,float>
comment 'Keys are deductions names,values are percentages',
address struct<street:string,city:string,state:string,zip,int>
)
partitioned by (country string, state string)
–反映分区结构的子目录
hdfs://master_server/user/hive/warehouse/mydb.db/employees/
.../employees/country=CA/state=AB
.../employees/country=CA/state=BC
...
.../employees/country=US/state=LA
.../employees/country=US/state=AK
–查询分区表中,美国,伊利诺斯州的所有雇员
select * from employees
where country = 'US' and state = 'IL';
–开启严格模式(默认)(如果对分区表进行查询,而where子句没有加分区过滤,会报错)
set hive.mapred.mode=strict;
–开启’非严格’模式
set hive.mapred.mode=nostrict;
–查看表中存在的分区
show partitions employees;
–查看表中的分区(具体)
show partitions employees partition(country='US');
–显示分区键
describe extended employees;
–创建外部分区表(共享、优化查询)(适合日志分析)(即使表被删除,数据也不会被删除)
–partition 分区
–row format 行 格式
–delimited 划界,限制
–terminated 结束(分隔符)
create external table if not exists log_messages(
hms int,
severity string,
server string,
process_id int,
message string)
partitioned by (year int,month int,day int)
row format delimited fields terminated by '\t';
–查看外部表分区
show partitions log_messages;
–查看表的描述(分区键也会被输出)
describe extended log_messages;
–分区数据实际所在路径
describe extended log_messages partition (year=2012, month=1, day=2);
–自定义表存储格式:stored as textfile(同时指定各种分隔符)
create table employees(
name string,
salary float,
sybordinates Array<string>,
deductions Map<string,float>,
address struct<street:string,city:string,state:string,zip,int>
)
row format delimited
fields terminated by '\001'
collection items terminated by '\002'
map keys terminated by '\003'
lines terminated by '\n'
stored as textfile;
–指定输入输出格式(stored as xxx)
create table kst
partitioned by (ds string)
row format serde 'com.linkedin.haivvreo.AvroSerDe'
with serdeproperties ('schema.url'='http://schema_provider/kst.avsc')
stored as
inputformat 'com.linkedin.haivvreo.AvroContainerInputFormat'
outputformat 'com.linkedin.haivvreo.AvroContainerOutputFormat'
–查看表的输入输出格式
describe extended kst
–删除表(如是外部表,表的元数据信息会被删除,表中的数据不会被删除)
drop table if exists employees;
–表重命名
alter table log_messages rename to logmsgs;
–表增加分区(v0.8.0之后版本可以同时增加一个分区)
alter table log_messages add if not exists
partition (year = 2011, month = 1,day = 1) location 'logs/2011/01/01'
partition (year = 2011, month = 1,day = 2) location 'logs/2011/01/02'
partition (year = 2011, month = 1,day = 3) location 'logs/2011/01/03'
...;
–修改分区(移动位置即可)
alter table log_messages partition(year=2011, month=12, day = 2)
set location 's3n://orbucket.logs/2011/01/02'
–删除某个分区(drop)
alter table log_messages drop id exists partition(year = 2011,mouth = 12,day = 2);
–修改列信息(change)
alter table log_messages
change column hms hours_minutes_seconds INT
comment 'the hours,minutes,and seconds part of the timestamp'
after serverity;
–增加列
alter table log_messages add columns (
app_name string comment 'Application name',
session_id long comment 'the current session id');
)
–删除或者替换列(replace)
alter table log_messages replace columns (
hours_mins_secs int
)
–修改表属性(只能修改,无法删除)
alter table log_messages set tabproperties (
'notes' = 'The process id is no longer captured ; this column is always NULL'
);
–修改存储属性(改为sequencefile)
alter table log_messages
partition(year = 2012, month = 1, day = 1)
set fileformat sequencefile;
–各种操作’钩子’技巧(touch)???
alter table log_messages touch
partition(year = 2012, month = 1, day = 1)
–参考<<Hive权威指南>>