#!/bin/bash
#配置ES的安装目录 修改的地方1 脚本可以自己创建
##在windows编写的代码可能运行有问题执行以下 1>vim redisshell.sh 2>:set ff=unix
echo -e “请输入hadoop的安装目录,不存在脚本自动创建,最后一个/不要写 /bigdata/install”
read esinstallpath
#创建ES安装的目录
if [ ! -d $esinstallpath ]; then
mkdir -p $esinstallpath
fi
if [ ! -d
e
s
i
n
s
t
a
l
l
p
a
t
h
]
;
t
h
e
n
e
c
h
o
"
创
建
目
录
esinstallpath ]; then echo "创建目录
esinstallpath];thenecho"创建目录esinstallpat失败!请检查目录是否有权限"
exit
fi
#解压tar包
currentdir=$(cd $(dirname
0
)
;
p
w
d
)
l
s
∣
g
r
e
p
′
h
a
d
o
o
p
−
.
∗
[
g
z
]
0); pwd) ls | grep 'hadoop-.*[gz]
0);pwd)ls∣grep′hadoop−.∗[gz]’
if [ KaTeX parse error: Expected 'EOF', got '#' at position 20: …e 0 ]; then #̲当前目录没有hadoop的压缩…currentdir下没有发现hadoop-.tar.gz,请自行上传!"
exit
else
#解压
tar -zxvf
c
u
r
r
e
n
t
d
i
r
/
currentdir/
currentdir/(ls | grep 'hadoop-.[gz]$’) -C $esinstallpath
fi
esbanben=ls $esinstallpath| grep 'hadoop-.*'
#PATH设置
#末行插入
echo “”>>~/.bash_profile
echo ‘#HADOOP’>>~/.bash_profile
echo “export HADOOP_HOME=
e
s
i
n
s
t
a
l
l
p
a
t
h
/
esinstallpath/
esinstallpath/esbanben”>>~/.bash_profile
echo ‘export PATH=
P
A
T
H
:
PATH:
PATH:HADOOP_HOME/bin:$HADOOP_HOME/sbin’>>~/.bash_profile
source ~/.bash_profile
hadooppath=
e
s
i
n
s
t
a
l
l
p
a
t
h
/
esinstallpath/
esinstallpath/esbanben
confpath=
e
s
i
n
s
t
a
l
l
p
a
t
h
/
esinstallpath/
esinstallpath/esbanben/etc/hadoop
javahome=echo $JAVA_HOME
bak_dir='export JAVA_HOME=
J
A
V
A
H
O
M
E
′
n
e
w
d
i
r
=
′
e
x
p
o
r
t
J
A
V
A
H
O
M
E
=
′
{JAVA_HOME}' new_dir='export JAVA_HOME='
JAVAHOME′newdir=′exportJAVAHOME=′javahome
sed -i “s!
b
a
k
d
i
r
!
{bak_dir}!
bakdir!{new_dir}!g” $confpath/hadoop-env.sh
#修改core-site.xml
echo -e “请输入hadoop集群服务器名称 例如 mycluster”
read mycluster
sed -i ‘/</configuration>/i<!–配置hadoop集群服务器名称 -->’ $confpath/core-site.xml
sed -i ‘/</configuration>/i<property>’ $confpath/core-site.xml
sed -i ‘/</configuration>/i\ fs.defaultFS’ KaTeX parse error: Undefined control sequence: \/ at position 34: ….xml sed -i "/<\̲/̲configuration>/…mycluster" $confpath/core-site.xml
sed -i “/</configuration>/i</property>” $confpath/core-site.xml
echo -e “请输入hadoop运行时产生文件的存储目录 不需要自己创建目录 集群自己会创建 指定一个空间大的 用来保存hadoop数据 例如 /bigdata/hadoop”
read hadooptmpdir
sed -i ‘/</configuration>/i<!–hadoop运行时产生文件的存储目录 -->’ $confpath/core-site.xml
sed -i ‘/</configuration>/i<property>’ $confpath/core-site.xml
sed -i ‘/</configuration>/i\ hadoop.tmp.dir’ KaTeX parse error: Undefined control sequence: \/ at position 34: ….xml sed -i "/<\̲/̲configuration>/…hadooptmpdir" $confpath/core-site.xml
sed -i “/</configuration>/i</property>” $confpath/core-site.xml
echo -e “请输入zk集群 例如 cdh01:2181,cdh02:2181,cdh03:2181”
read zkhosts
sed -i ‘/</configuration>/i<!–配置zookeeper集群,容灾 -->’ $confpath/core-site.xml
sed -i ‘/</configuration>/i<property>’ $confpath/core-site.xml
sed -i ‘/</configuration>/i\ ha.zookeeper.quorum’ KaTeX parse error: Undefined control sequence: \/ at position 34: ….xml sed -i "/<\̲/̲configuration>/…zkhosts" $confpath/core-site.xml
sed -i “/</configuration>/i</property>” $confpath/core-site.xml
echo “core-site.xml 配置如下”
cat $confpath/core-site.xml
echo “core-site.xml 配置完成”
sleep 1
#开始配置hdfs-site.xml
sed -i ‘/</configuration>/i<!-- 集群服务名称 -->’ $confpath/hdfs-site.xml
sed -i ‘/</configuration>/i<property>’ $confpath/hdfs-site.xml
sed -i ‘/</configuration>/i\ dfs.nameservices’ KaTeX parse error: Undefined control sequence: \/ at position 34: ….xml sed -i "/<\̲/̲configuration>/…mycluster" $confpath/hdfs-site.xml
sed -i “/</configuration>/i</property>” $confpath/hdfs-site.xml
sed -i ‘/</configuration>/i<!-- 集群支持追加模式 -->’ $confpath/hdfs-site.xml
sed -i ‘/</configuration>/i<property>’ $confpath/hdfs-site.xml
sed -i ‘/</configuration>/i\ dfs.support.append’ $confpath/hdfs-site.xml
sed -i “/</configuration>/i\ true” $confpath/hdfs-site.xml
sed -i “/</configuration>/i</property>” $confpath/hdfs-site.xml
sed -i ‘/</configuration>/i<!-- 名称节点配置 -->’ $confpath/hdfs-site.xml
sed -i ‘/</configuration>/i<property>’ KaTeX parse error: Undefined control sequence: \/ at position 34: ….xml sed -i "/<\̲/̲configuration>/…mycluster" $confpath/hdfs-site.xml
sed -i “/</configuration>/i\ nn1,nn2” $confpath/hdfs-site.xml
sed -i “/</configuration>/i</property>” $confpath/hdfs-site.xml
echo “请输入两个nameNode的地址,按照空格分开 例如 cdh01 cdh02”
read nameNodes
array=(echo $nameNodes | tr ' ' ' '
)
for i in seq 0 $((${#array[@]}-1))
do
sed -i “/</configuration>/i<!-- 指定nn
(
(
((
(({i}+1))机器的RPC服务完整监听地址 -->” $confpath/hdfs-site.xml
sed -i “/</configuration>/i<property>” KaTeX parse error: Undefined control sequence: \/ at position 35: …xml sed -i "/<\̲/̲configuration>/…mycluster.nn
(
(
((
(({i}+1))" KaTeX parse error: Undefined control sequence: \/ at position 35: …xml sed -i "/<\̲/̲configuration>/…{array[${i}]}:8020" $confpath/hdfs-site.xml
sed -i “/</configuration>/i</property>” $confpath/hdfs-site.xml
sed -i “/</configuration>/i<!-- 指定nn
(
(
((
(({i}+1))机器的http服务地址 -->” $confpath/hdfs-site.xml
sed -i “/</configuration>/i<property>” KaTeX parse error: Undefined control sequence: \/ at position 35: …xml sed -i "/<\̲/̲configuration>/…mycluster.nn
(
(
((
(({i}+1))" KaTeX parse error: Undefined control sequence: \/ at position 35: …xml sed -i "/<\̲/̲configuration>/…{array[${i}]}:50070" $confpath/hdfs-site.xml
sed -i “/</configuration>/i</property>” $confpath/hdfs-site.xml
#配置yarn的环境
sed -i “/</configuration>/i<property>” KaTeX parse error: Undefined control sequence: \/ at position 35: …xml sed -i "/<\̲/̲configuration>/…((${i}+1))" KaTeX parse error: Undefined control sequence: \/ at position 35: …xml sed -i "/<\̲/̲configuration>/…{array[${i}]}" $confpath/yarn-site.xml
sed -i “/</configuration>/i</property>” $confpath/yarn-site.xml
sed -i “/</configuration>/i<property>” KaTeX parse error: Undefined control sequence: \/ at position 35: …xml sed -i "/<\̲/̲configuration>/…((${i}+1))" KaTeX parse error: Undefined control sequence: \/ at position 35: …xml sed -i "/<\̲/̲configuration>/…{array[${i}]}:8088" $confpath/yarn-site.xml
sed -i “/</configuration>/i</property>” $confpath/yarn-site.xml
done
echo “指定journalnode的配置,所有机器 例如 cdh01:8485;cdh02:8485;cdh03:8485;cdh04:8485;cdh05:8485”
read journalnode
sed -i “/</configuration>/i<!-- 指定journalnode -->” $confpath/hdfs-site.xml
sed -i “/</configuration>/i<property>” $confpath/hdfs-site.xml
sed -i “/</configuration>/i\ dfs.namenode.shared.edits.dir” KaTeX parse error: Undefined control sequence: \/ at position 35: …xml sed -i "/<\̲/̲configuration>/…journalnode/$mycluster" $confpath/hdfs-site.xml
sed -i “/</configuration>/i</property>” $confpath/hdfs-site.xml
sed -i “/</configuration>/i<!-- -->” $confpath/hdfs-site.xml
sed -i “/</configuration>/i<property>” KaTeX parse error: Undefined control sequence: \/ at position 35: …xml sed -i "/<\̲/̲configuration>/…mycluster" $confpath/hdfs-site.xml
sed -i “/</configuration>/i\ org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider” $confpath/hdfs-site.xml
sed -i “/</configuration>/i</property>” $confpath/hdfs-site.xml
sed -i “/</configuration>/i<!-- 在故障切换期间将用于隔离Active NameNode的脚本 -->” $confpath/hdfs-site.xml
sed -i “/</configuration>/i<property>” $confpath/hdfs-site.xml
sed -i “/</configuration>/i\ dfs.ha.fencing.methods” $confpath/hdfs-site.xml
sed -i "/</configuration>/i\ " $confpath/hdfs-site.xml
sed -i “/</configuration>/i\ sshfence” $confpath/hdfs-site.xml
sed -i “/</configuration>/i\ shell(/bin/true)” $confpath/hdfs-site.xml
sed -i "/</configuration>/i\ " $confpath/hdfs-site.xml
sed -i “/</configuration>/i</property>” $confpath/hdfs-site.xml
sed -i “/</configuration>/i<!-- 免密 -->” $confpath/hdfs-site.xml
sed -i “/</configuration>/i<property>” $confpath/hdfs-site.xml
sed -i “/</configuration>/i\ dfs.ha.fencing.ssh.private-key-files” $confpath/hdfs-site.xml
sed -i “/</configuration>/i\ ~/.ssh/id_rsa” $confpath/hdfs-site.xml
sed -i “/</configuration>/i</property>” $confpath/hdfs-site.xml
sed -i “/</configuration>/i<!-- Journal Node文件存储地址 -->” $confpath/hdfs-site.xml
sed -i “/</configuration>/i<property>” $confpath/hdfs-site.xml
sed -i “/</configuration>/i\ dfs.journalnode.edits.dir” KaTeX parse error: Undefined control sequence: \/ at position 35: …xml sed -i "/<\̲/̲configuration>/…hadooptmpdir/journal" $confpath/hdfs-site.xml
sed -i “/</configuration>/i</property>” $confpath/hdfs-site.xml
sed -i “/</configuration>/i<!–是否开启自动failover机制–>” $confpath/hdfs-site.xml
sed -i “/</configuration>/i<property>” $confpath/hdfs-site.xml
sed -i “/</configuration>/i\ dfs.ha.automatic-failover.enabled” $confpath/hdfs-site.xml
sed -i “/</configuration>/i\ true” $confpath/hdfs-site.xml
sed -i “/</configuration>/i</property>” $confpath/hdfs-site.xml
sed -i “/</configuration>/i<!–HDFS文件系统每个block冗余备份数–>” $confpath/hdfs-site.xml
sed -i “/</configuration>/i<property>” $confpath/hdfs-site.xml
sed -i “/</configuration>/i\ dfs.replication” $confpath/hdfs-site.xml
sed -i “/</configuration>/i\ 3” $confpath/hdfs-site.xml
sed -i “/</configuration>/i</property>” $confpath/hdfs-site.xml
echo “hdfs-site.xml 配置如下”
cat $confpath/hdfs-site.xml
echo “hdfs-site.xml 配置完成”
sleep 1
#开始配置mapred-site.xml
#mapred-site.xml
cp $confpath/mapred-site.xml.template $confpath/mapred-site.xml
sed -i “/</configuration>/i<!–HDFS文件系统每个block冗余备份数–>” $confpath/mapred-site.xml
sed -i “/</configuration>/i<property>” $confpath/mapred-site.xml
sed -i “/</configuration>/i\ mapreduce.framework.name” $confpath/mapred-site.xml
sed -i “/</configuration>/i\ yarn” $confpath/mapred-site.xml
sed -i “/</configuration>/i</property>” $confpath/mapred-site.xml
echo “mapred-site.xml 配置如下”
cat $confpath/mapred-site.xml
echo “mapred-site.xml 配置完成”
sleep 1
#开始配置yarn-site.xml
sed -i “/</configuration>/i<property>” $confpath/yarn-site.xml
sed -i “/</configuration>/i\ yarn.nodemanager.aux-services” $confpath/yarn-site.xml
sed -i “/</configuration>/i\ mapreduce_shuffle” $confpath/yarn-site.xml
sed -i “/</configuration>/i</property>” $confpath/yarn-site.xml
sed -i “/</configuration>/i<property>” $confpath/yarn-site.xml
sed -i “/</configuration>/i\ yarn.resourcemanager.ha.enabled” $confpath/yarn-site.xml
sed -i “/</configuration>/i\ true” $confpath/yarn-site.xml
sed -i “/</configuration>/i</property>” $confpath/yarn-site.xml
sed -i “/</configuration>/i<property>” $confpath/yarn-site.xml
sed -i “/</configuration>/i\ yarn.resourcemanager.ha.rm-ids” $confpath/yarn-site.xml
sed -i “/</configuration>/i\ rm1,rm2” $confpath/yarn-site.xml
sed -i “/</configuration>/i</property>” $confpath/yarn-site.xml
sed -i “/</configuration>/i<property>” $confpath/yarn-site.xml
sed -i “/</configuration>/i\ yarn.resourcemanager.zk-address” KaTeX parse error: Undefined control sequence: \/ at position 35: …xml sed -i "/<\̲/̲configuration>/…zkhosts" $confpath/yarn-site.xml
sed -i “/</configuration>/i</property>” $confpath/yarn-site.xml
sed -i “/</configuration>/i<property>” $confpath/yarn-site.xml
sed -i “/</configuration>/i\ yarn.resourcemanager.cluster-id” $confpath/yarn-site.xml
sed -i “/</configuration>/i\ mycluster1” $confpath/yarn-site.xml
sed -i “/</configuration>/i</property>” $confpath/yarn-site.xml
#配置任务提交最大尝试
sed -i “/</configuration>/i<property>” $confpath/yarn-site.xml
sed -i “/</configuration>/i\ yarn.resourcemanager.am.max-attempts” $confpath/yarn-site.xml
sed -i “/</configuration>/i\ 5” $confpath/yarn-site.xml
sed -i “/</configuration>/i</property>” $confpath/yarn-site.xml
sed -i “/</configuration>/i<property>” $confpath/yarn-site.xml
sed -i “/</configuration>/i\ yarn.nodemanager.vmem-check-enabled” $confpath/yarn-site.xml
sed -i “/</configuration>/i\ false” $confpath/yarn-site.xml
sed -i “/</configuration>/i</property>” $confpath/yarn-site.xml
echo “yarn-site.xml 配置如下”
cat $confpath/yarn-site.xml
echo “yarn-site.xml 配置完成”
sleep 1
#开始配置slaves
echo “请输入所有的datanode,按照空格分开 例如 cdh01 cdh02 cdh03”
read datanodes
array=(echo $datanodes | tr ' ' ' '
)
echo “” > $confpath/slaves
for datanode in ${array[@]}
do
echo $datanode >> $confpath/slaves
done
echo “slaves 配置如下”
cat $confpath/slaves
echo “slaves 配置完成”
sleep 1
#分发hadoop安装文件
echo “以下输入的节点必须做免密登录”
echo “请输入所有的hadoop节点,按照空格分开,当前机器不用输入(当前机器是cdh01) 例如 cdh02 cdh03 cdh04 cdh05”
read allnodes
user=whoami
array=(echo $allnodes | tr ' ' ' '
)
for allnode in ${array[@]}
do
echo ======= $allnode =======
ssh $allnode “echo ‘’>>~/.bash_profile”
ssh $allnode “echo ‘#HADOOP’>>~/.bash_profile”
ssh
a
l
l
n
o
d
e
"
e
c
h
o
′
e
x
p
o
r
t
H
A
D
O
O
P
H
O
M
E
=
allnode "echo 'export HADOOP_HOME=
allnode"echo′exportHADOOPHOME=hadooppath’>>~/.bash_profile"
ssh $allnode ‘echo “export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin”>>~/.bash_profile’
ssh $allnode “source ~/.bash_profile”
ssh $allnode “rm -rf $hadooppath”
ssh $allnode “mkdir -p $hadooppath”
scp -r $hadooppath/*
u
s
e
r
@
{user}@
user@allnode:$hadooppath/
echo ======= $allnode 复制完成 =======
done
for allnode in ${array[@]}
do
echo ======= 在 $allnode 手动执行 source ~/.bash_profile 在通过 hadoop version 查看是否安装成功 =======
done
#################初始化执行的命令#######
启动集群
a、先启动zookeeper集群
$>zkServer.sh start
b、启动JournalNode
当所有配置都完成以后,我们需要先启动JournalNode服务。在配置了JournalNode服务的每台服务器上执行
$>hadoop-daemon.sh start journalnode
c、格式化NameNode
启动了JournalNode后(可以通过jps命令查看),我们需要格式化NameNode。在nn1(cdh01)上执行:
$>hdfs namenode -format
d、启动格式化的namenode
$>hadoop-daemon.sh start namenode
在另一台NN服务器nn2(cdh02)上执行
$>hdfs namenode -bootstrapStandby
// $>hdfs namenode -initializeSharedEdits
e、在其中任意一台上面执行namenode上初始化zkfc
$>hdfs zkfc -formatZK
f、在其中任意一台nn1机器启动集群
$>start-all.sh
$>stop-all.sh
$>start-all.sh
g、在cdh02启动yarn进程
$>yarn-daemon.sh start resourcemanager
查询集群状态
$>hdfs dfsadmin -report
web ui
http://192.168.8.21:50070/
##################启动的命令##########
2 node1 启动整个集群
start-all.sh
如果namenode没有启动(jn的原因),则单独启动namenode(hadoop-daemon.sh start namenode)
3 单独启动node2的resourcemanager
yarn-daemon.sh start resourcemanager
3.1查询hadoop的集群状态
hdfs dfsadmin -report