我的脚本存放目录 /user/local/bin/
注意:
有些脚本涉及到用户和密码记得要改成自己的!路径有的也需要留意,这些都是我亲测跑过的脚本。
大数据项目之电商数仓(脚本篇)
-
- 1.xsync集群分发脚本
- 2.zookeeper集群起动关闭脚本
- 3.日志启动脚本
- 4.时间同步脚本(用于造假数据)
- 5.集群各个节点查看状态脚本
- 6.日志采集 Flume 启动停止脚本
- 7.Kafka 集群启动停止脚本
- 8.日志消费 Flume 启动停止脚本
- 9.采集通道启动/停止脚本
- 10.mysql业务数据通过sqoop导入hdfs脚本
- 11.数仓搭建-ODS 层(用户行为数据)加载数据脚本
- 12.数仓搭建-ODS 层(业务数据)加载数据脚本
- 13.数仓搭建-DWD 层启动表加载数据脚本
- 14.数仓搭建-DWD 层数据解析脚本
- 15.数仓搭建-DWD 层事件表加载数据脚本
- 16.数仓搭建-DWD 层数据导入脚本
- 17.数仓搭建-DWS 层数据导入脚本
- 18.数仓搭建-DWT 层数据导入脚本
- 19.数仓搭建-ADS 层会员主题信息导入脚本
- 20.数仓搭建-ADS 层导入脚本
- 21.hdfs_to_mysql传输脚本
1.xsync集群分发脚本
参考这篇博客安装
https://blog.youkuaiyun.com/yhblog/article/details/84066218
#!/bin/bash
#获取输入参数的个数.没有参数直接退出
pcount=$#
if((pcount==0));then
echo no args;
exit;
fi
#2.获取文件名称
p1=$1
fname=`basename $p1`
echo fname=$fname
#3.获取上级目录到绝对路径
pdir=`cd -P $(dirname $p1); pwd`
echo pdir=$pdir
#4.获取当前用户名称
user=`whoami`
#5.循环
for ((host=102; host<105;host++));do
echo ----------------hadoop$host------------------
rsync -av $pdir/$fname $user@hadoop$host:$pdir
done
2.zookeeper集群起动关闭脚本
#!/bin/bash
case $1 in
"start"){
for i in hadoop102 hadoop103 hadoop104
do
ssh $i "/opt/module/zookeeper-3.4.10/bin/zkServer.sh start"
done
};;
"stop"){
for i in hadoop102 hadoop103 hadoop104
do
ssh $i "/opt/module/zookeeper-3.4.10/bin/zkServer.sh stop"
done
};;
"status"){
for i in hadoop102 hadoop103 hadoop104
do
ssh $i "/opt/module/zookeeper-3.4.10/bin/zkServer.sh status"
done
};;
esac
3.日志启动脚本
#!/bin/bash
for i in hadoop102 hadoop103
do
echo ---------$i 生成日志----------
ssh $i "java -jar /opt/module/log-collector-1.0-SNAPSHOT-jar-with-dependencies.jar $1 $2 >/dev/null 2>&1 &"
done
4.时间同步脚本(用于造假数据)
#!/bin/bash
for i in hadoop102 hadoop103 hadoop104
do
echo ---------$i 同步时间----------
ssh -t $i "sudo data -s $1"
done
5.集群各个节点查看状态脚本
#!/bin/bash
for i in hadoop102 hadoop103 hadoop104
do
echo ---------$i ----------
ssh $i "$*"
done
6.日志采集 Flume 启动停止脚本
#!/bin/bash
case $1 in
"start"){
for i in hadoop102 hadoop103
do
echo " --------启动 $i 采集 flume-------"
ssh $i "nohup /opt/module/flume/bin/flume-ng agent --conf-file /opt/module/flume/conf/file-flume-kafka.conf --name a1 -Dflume.root.logger=INFO,LOGFILE >/opt/module/flume/test1 2>&1 &"
done
};;
"stop"){
for i in hadoop102 hadoop103
do
echo " --------停止 $i 采集 flume-------"
ssh $i "ps -ef | grep file-flume-kafka | grep -v grep |awk '{print \$2}' | xargs kill"
done
};;
esac
7.Kafka 集群启动停止脚本
#! /bin/bash
case $1 in
"start"){
for i in hadoop102 hadoop103 hadoop104
do
echo " --------启动 $i Kafka-------"
ssh $i "/opt/module/kafka/bin/kafka-server-start.sh -daemon /opt/module/kafka/config/server.properties "
done
};;
"stop"){
for i in hadoop102 hadoop103 hadoop104
do
echo " --------停止 $i Kafka-------"
ssh $i "/opt/module/kafka/bin/kafka-server-stop.sh stop"
done
};;
esac
8.日志消费 Flume 启动停止脚本
#! /bin/bash
case $1 in
"start"){
for i in hadoop104
do
echo " --------启动 $i 消费 flume-------"
ssh $i "nohup /opt/module/flume/bin/flume-ng agent --conf-file /opt/module/flume/conf/kafka-flume-hdfs.conf --name a1 -Dflume.root.logger=INFO,LOGFILE >/opt/module/flume/log.txt 2>&1 &"
done
};;
"stop"){
for i in hadoop104
do
echo " --------停止 $i 消费 flume-------"
ssh $i "ps -ef | grep kafka-flume-hdfs | grep -v grep |awk '{print \$2}' | xargs kill"
done
};;
esac
9.采集通道启动/停止脚本
#! /bin/bash
case $1 in
"start"){
echo " -------- 启动 集群 -------"
echo " -------- 启动 hadoop 集群 -------"
/opt/module/hadoop-2.7.2/sbin/start-dfs.sh
ssh hadoop103 "/opt/module/hadoop-2.7.2/sbin/start-yarn.sh"
#启动 Zookeeper 集群
zk.sh start
sleep 4s;
#启动 Flume 采集集群
f1.sh start
#启动 Kafka 采集集群
kf.sh start
sleep 6s;
#启动 Flume 消费集群
f2.sh start
};;
"stop"){
echo " -------- 停止 集群 -------"
#停止 Flume 消费集群
f2.sh stop
#停止 Kafka 采集集群
kf.sh stop
sleep 6s;
#停止 Flume 采集集群
f1.sh stop
#停止 Zookeeper 集群
zk.sh stop
echo " -------- 停止 hadoop 集群 -------"
ssh hadoop103 "/opt/module/hadoop-2.7.2/sbin/stop-yarn.sh"
/opt/module/hadoop-2.7.2/sbin/stop-dfs.sh
};;
esac
10.mysql业务数据通过sqoop导入hdfs脚本
#! /bin/bash
sqoop=/opt/module/sqoop/bin/sqoop
do_date=`date -d '-1 day' +%F`
if [[ -n "$2" ]]; then
do_date=$2
fi
import_data(){
$sqoop import --connect jdbc:mysql://hadoop102:3306/gmall --username root --password 123456 --target-dir /origin_data/gmall/db/$1/$do_date --delete-target-dir --query "$2 and \$CONDITIONS" --num-mappers 1 --fields-terminated-by '\t' --compress --compression-codec lzop --null-string '\\N' --null-non-string '\\N'
hadoop jar /opt/module/hadoop-2.7.2/share/hadoop/common/hadoop-lzo-0.4.20.jar com.hadoop.compression.lzo.DistributedLzoIndexer /origin_data/gmall/db/$1/$do_date
}
import_order_info(){
import_data order_info "select
id,
final_total_amount,
order_status,
user_id,
out_trade_no,
create_time,
operate_time,
province_id,
benefit_reduce_amount,
original_total_amount,
feight_fee
from order_info
where (date_format(create_time,'%Y-%m-%d')='$do_date'
or date_format(operate_time,'%Y-%m-%d')='$do_date')"
}
import_coupon_use(){
import_data coupon_use "select
id,
coupon_id,
user_id,
order_id,
coupon_status,
get_time,
using_time,
used_time
from coupon_use
where (date_format(get_time,'%Y-%m-%d')='$do_date'
or date_format(using_time,'%Y-%m-%d')='$do_date'
or date_format(used_time,'%Y-%m-%d')='$do_date')"
}
import_order_status_log(){
import_data order_status_log "select
id,
order_id,
order_status,
operate_time
from order_status_log
where
date_format(operate_time,'%Y-%m-%d')='$do_date'"
}
import_activity_order(){
import_data activity_order "select
id, activity_id,
order_id,
create_time
from activity_order
where
date_format(create_time,'%Y-%m-%d')='$do_date'"
}
import_user_info(){
import_data "user_info" "select
id,
name,
birthday,
gender,
email,
user_level,
create_time,
operate_time
from user_info
where (DATE_FORMAT(create_time,'%Y-%m-%d')='$do_date'
or DATE_FORMAT(operate_time,'%Y-%m-%d')='$do_date')"
}
import_order_detail(){
import_data order_detail "select
od.id,
order_id,
user_id,
sku_id,
sku_name,
order_price,
sku_num,
od.create_time
from order_detail od
join order_info oi
on od.order_id=oi.id
where
DATE_FORMAT(od.create_time,'%Y-%m-%d')='$do_date'"
}
import_payment_info(){
import_data "payment_info" "select
id,
out_trade_no,
order_id,
user_id,
alipay_trade_no,
total_amount,
subject,
payment_type,
payment_time
from payment_info
where
DATE_FORMAT(payment_time,'%Y-%m-%d')='$do_date'"
}
import_comment_info(){
import_data comment_info "select
id,
user_id,
sku_id,
spu_id,
order_id,
appraise,
comment_txt,
create_time
from comment_info
where date_format(create_time,'%Y-%m-%d')='$do_date'"
}
import_order_refund_info(){
import_data order_refund_info "select
id,
user_id,
order_id,
sku_id,
refund_type,
refund_num,
refund_amount,
refund_reason_type,
create_time
from order_refund_info
where
date_format(create_time,'%Y-%m-%d')='$do_date'"
}
import_sku_info(){
import_data sku_info "select
id,
spu_id,
price,
sku_name,
sku_desc,
weight,
tm_id,
category3_id,
create_time
from sku_info
where 1=1"
}
import_base_category1(){
import_data "base_category1" "select
id,
name
from base_category1
where 1=1"
}
import_base_category2(){
import_data "base_category2" "select
id,
name,
category1_id
from base_category2
where 1=1"
}
im