`````````````````
sql中连接有多个表时。都是有效的。 join 后 表继续可用。
``````````````
if(a.user_name is null,b.user_name,a.user_name) 三木运算
``````````````
11111111111111111hive实践
hive> load data local inpath '/home/mjiang/program/eclipse/ccms_stat/test/data/edm' overwrite into table test.s_edm;
FAILED: Error in semantic analysis: Need to specify partition columns because the destination table is partitioned
1111111111111111111hive 实践
hive> CREATE DATABASE database_test_ccms COMMENT database for test ccms
> ;
FAILED: Parse Error: line 1:44 mismatched input 'database' expecting StringLiteral near 'COMMENT' in database's comment
hive> CREATE DATABASE database_test_ccms COMMENT 'database for test ccms'
111111111111111111111
code1155:
select user_name,count(1),sum(sms_bill_count),count(distinct uid) from s_sms group by user_name limit 10;
0112:
hive> select sms.user_name,count(1),sum(sms_bill_count),count(distinct uid) from (select * from s_sms)sms group by sms.user_name limit 10;
Total MapReduce jobs = 2
Launching Job 1 out of 2
Number of reduce tasks not specified. Defaulting to jobconf value of: 10
02.07:
insert overwrite table r_fx_daily_all
select t_sms_sum.*,t_edm_sum.edm_send_count,t_edm_sum.edm_people_count
from
( select sms.user_name,sms.stat_time,count(1),sum(sms_bill_count),count(distinct uid)
from s_sms sms
group by sms.user_name,sms.stat_time )t_sms_sum
full outer join
( select edm.user_name ,edm.stat_time, count(1) as edm_send_count,count(distinct uid) as edm_people_count
from s_edm edm
group by edm.user_name,edm.stat_time )t_edm_sum
on (t_sms_sum.user_name=t_edm_sum.user_name and t_sms_sum.stat_time=t_edm_sum.stat_time)
sort by _col1 asc;
02.10:
hive> insert overwrite table r_fx_daily_all
> select t_sms_sum.*,t_edm_sum.edm_send_count,t_edm_sum.edm_people_count,null,null,null,null,null,null,null,null
> from
> ( select sms.user_name,sms.stat_time,count(1),sum(sms_bill_count),count(distinct uid)
> from s_sms sms
> group by sms.user_name,sms.stat_time )t_sms_sum
> full outer join
> ( select edm.user_name ,edm.stat_time, count(1) as edm_send_count,count(distinct uid) as edm_people_count
> from s_edm edm
> group by edm.user_name,edm.stat_time )t_edm_sum
> on (t_sms_sum.user_name=t_edm_sum.user_name and t_sms_sum.stat_time=t_edm_sum.stat_time) ;
Total MapReduce jobs = 6
Launching Job 1 out of 6
Launching Job 2 out of 6
Number of reduce tasks not specified. Defaulting to jobconf value of: 10
(
insert overwrite table r_fx_daily_all
select t_sms_sum.*,t_edm_sum.edm_send_count,t_edm_sum.edm_people_count,null,null,null,null,null,null,null,null
from
( select sms.user_name,sms.stat_time,count(1),sum(sms_bill_count),count(distinct uid)
from s_sms sms
group by sms.user_name,sms.stat_time )t_sms_sum
full outer join
( select edm.user_name ,edm.stat_time, count(1) as edm_send_count,count(distinct uid) as edm_people_count
from s_edm edm
group by edm.user_name,edm.stat_time )t_edm_sum
on ( t_sms_sum.user_name=t_edm_sum.user_name and t_sms_sum.stat_time=t_edm_sum.stat_time) ;
)
0447:
insert overwrite table r_fx_daily_all
select t_sms_sum.*,t_edm_sum.edm_send_count,t_edm_sum.edm_people_count,null,null,null,null,null,null,null,null
from
( select sms.user_name,sms.stat_time,count(1),sum(sms_bill_count),count(distinct uid)
from s_sms sms
group by sms.user_name,sms.stat_time )t_sms_sum
full outer join
( select edm.user_name ,edm.stat_time, count(1) as edm_send_count,count(distinct uid) as edm_people_count
from s_edm edm
group by edm.user_name,edm.stat_time )t_edm_sum
on ( if (t_sms_sum.stat_time=null, t_sms_sum.user_name )=t_edm_sum.user_name and t_sms_sum.stat_time=t_edm_sum.stat_time) ;
1111111111111111111111
2张表的完成 是基础,3张表是突破。
3张表要考虑的是 前两张表的合并结果怎么表示
111111111111111111111hive 实践hive
hive> insert table r_fx_daily_all
> select t_sms_sum.*,t_edm_sum.edm_send_count,t_edm_sum.edm_people_count
> from
> ( select sms.user_name,sms.stat_time,count(1),sum(sms_bill_count),count(distinct uid)
> from s_sms sms
> group by sms.user_name,sms_stat_time )t_sms_sum
> full outer join
> ( select edm.user_name ,edm.stat_time, count(1) as edm_send_count,count(distinct uid) as edm_people_count
> from s_edm edm
> group by edm.user_name,edm.stat_time )t_edm_sum
> on (t_sms_sum.user_name=t_edm_sum.user_name and t_sms_sum.stat_time=t_edm_sum.stat_time)
> sort by r_fx_daily_all.dd_date asc;
FAILED: Parse Error: line 1:0 cannot recognize input near 'insert' 'table' 'r_fx_daily_all' in insert clause
11111111111111111111111hive实践
hive> insert overwrite table r_fx_daily_all
> select t_sms_sum.*,t_edm_sum.edm_send_count,t_edm_sum.edm_people_count
> from
> ( select sms.user_name,sms.stat_time,count(1),sum(sms_bill_count),count(distinct uid)
> from s_sms sms
> group by sms.user_name,sms.stat_time )t_sms_sum
> full outer join
> ( select edm.user_name ,edm.stat_time, count(1) as edm_send_count,count(distinct uid) as edm_people_count
> from s_edm edm
> group by edm.user_name,edm.stat_time )t_edm_sum
> on (t_sms_sum.user_name=t_edm_sum.user_name and t_sms_sum.stat_time=t_edm_sum.stat_time)
> sort by r_fx_daily_all.dd_date asc;
FAILED: Error in semantic analysis: Line 12:8 Invalid table alias or column reference 'r_fx_daily_all': (possible column names are: _col0, _col1, _col2, _col3, _col4, _col5, _col6)
1111111111111111hive 实践
hive> insert overwrite table r_fx_daily_all
> select t_sms_sum.*,t_edm_sum.edm_send_count,t_edm_sum.edm_people_count
> from
> ( select sms.user_name,sms.stat_time,count(1),sum(sms_bill_count),count(distinct uid)
> from s_sms sms
> group by sms.user_name,sms.stat_time )t_sms_sum
> full outer join
> ( select edm.user_name ,edm.stat_time, count(1) as edm_send_count,count(distinct uid) as edm_people_count
> from s_edm edm
> group by edm.user_name,edm.stat_time )t_edm_sum
> on (t_sms_sum.user_name=t_edm_sum.user_name and t_sms_sum.stat_time=t_edm_sum.stat_time) ;
FAILED: Error in semantic analysis: Line 1:23 Cannot insert into target table because column number/types are different 'r_fx_daily_all': Table insclause-0 has 15 columns, but query has 7 columns.
11111111111111
alt + ` 同程序切换
1111111111111111
hive
FROM pv_users
INSERT OVERWRITE TABLE pv_gender_sum
SELECT pv_users.gender, count_distinct(pv_users.userid)
GROUP BY pv_users.gender
INSERT OVERWRITE DIRECTORY '/user/data/tmp/pv_age_sum'
SELECT pv_users.age, count_distinct(pv_users.userid)
GROUP BY pv_users.age;
hive 和 union all:
111111111111111111111111111111111111
学习网站:https://cwiki.apache.org/confluence/display/Hive/Tutorial#Tutorial-SimpleQuery
22222222222222
源表用s_为前缀
临时表用t_为前缀
结果表用r_为前缀
跨sql的临时表用m_为前缀
11111111111111111111111111code:
load data inpath '/user/hive/warehouse/s_sms/stat_time=20120613/sms_20120613' overwrite into table s_sms partition(stat_time='20120808');
create table r_fx_daily_all(
user_name string,
dd_date string,
sms_send_count bigint,
sms_bill_count bigint,
sms_people_count bigint,
edm_send_count bigint,
edm_people_count bigint,
wap_send_count bigint,
wap_people_count bigint,
mms_send_count bigint,
mms_people_count bigint,
coupon_send_count bigint,
coupon_people_count bigint,
ump_send_count bigint,
ump_people_count bigint)
partitioned by(stat_time string);
hive实践之:
hive> select ( select sms.user_name,sms.stat_time,count(sms.1),sum(sms.sms_bill_count),count(distinct sms.uid) group by sms.user_name) group by stat_time from s_sms sms limit 10;
FAILED: Parse Error: line 1:9 cannot recognize input near 'select' 'sms' '.' in expression specification
hive> select ( select sms.user_name,sms.stat_time,count(sms.1),sum(sms.sms_bill_count),count(distinct sms.uid) group by sms.user_name) group by sms.stat_time from s_sms sms limit 10;
FAILED: Parse Error: line 1:9 cannot recognize input near 'select' 'sms' '.' in expression specification
hive> select ( select user_name,stat_time,count(1),sum(sms_bill_count),count(distinct uid) group by user_name) group by stat_time from s_sms limit 10;
FAILED: Parse Error: line 1:9 cannot recognize input near 'select' 'user_name' ',' in expression specification
hive> select user_name,stat_time,count(1),sum(sms_bill_count),count(distinct uid) group by user_name from s_sms limit 10;
FAILED: Parse Error: line 1:76 mismatched input 'group' expecting FROM near ')' in from clause
hive> select user_name,stat_time,count(1),sum(sms_bill_count),count(distinct uid) from s_sms group by user_name limit 10;
FAILED: Error in semantic analysis: Line 1:17 Expression not in GROUP BY key 'stat_time'
hive> select user_name,count(1),sum(sms_bill_count),count(distinct uid) from s_sms group by user_name limit 10;
Total MapReduce jobs = 2
Launching Job 1 out of 2
1111111111111111111111111111111long long ago
安装jdk
[root@test11hadoop]# chmod +x jdk-6u16-linux-i586.bin
[root@test11 hadoop]#./jdk-6u16-linux-i586.bin
nfs 挂载目录
audio | 允许使用声音设备 |
cdrom | 允许直接使用光驱 |
floppy | 允许直接使用软驱 |
games | 允许运行游戏 |
portage | 允许以普通用户的身份执行emerge--pretend |
usb | 允许使用USB设备 |
plugdev | 允许挂载和使用可拔插设备,例如摄像头和U盘 |
video | 允许使用视频采集设备和硬件加速 |
wheel | 允许使用su |
例如,要创建一个名为john的用户,并使其隶属于wheel组,users组和audio组,请先用root用户登录(只有root有权限创建用户),然后执行useradd命令:
代码 1.1:添加一个日常使用的用户 |
Login: root Password: (您的root密码) # useradd -m -G users,wheel,audio -s /bin/bash john # passwd john Password: (输入john用户使用的密码) Re-enter password: (再次输入以便进行校验) |
A=(a b cdef) shell数组
统一资源定位符不但被用作网页地址,数据库终端也使用统一资源定位符服务器连接其服务器。实际上任何终端-服务器程序都可以使用统一资源定位符来连接。
以下是一个数据库的统一资源定位符
jdbc:datadirect:oracle://myserver:1521;sid=testdb
里面放的是该进程的process id. Pid文件
manrsyncd.conf
mars: bash:/home/mjiang/hadoop-1.0.0/sbin/hadoop-daemon.sh: Permission denied
shuffle 洗牌
给师兄解决了个问题无法编译文件 原来是因为权限不够 在su下就可以了
hash函数
对于应届毕业生,最重要的是可塑性而不是经验。
对于hadoop这种东西,在实际项目中是很快就可以掌握的,所以更重要的是要看你的学习能力,基础知识是否扎实。熟练程度应该是社会招聘或者职位紧缺需要人立即过来干活的时候才会看中的。
越大的公司,这一点会越明显。
当然,如果你能了解这个东西,很多公司的招聘条件里会写:优先考虑。
还有一点,是你自己的定位问题。如果你要进的公司根本不用这个玩意,那你就算精通hadoop的每一处细节也没有用;反之,如果你立志就要投身于hadoop研究中,那你最好要用这个东西做过几个项目。
总之最重要的是:你懂hadoop的原理,而不是会使用这个工具