- 组件总览
组件总览
组件 | 版本 |
---|---|
JDK | 1.8.0_211 |
Scala | 1.12.14 |
Zookeeper | 3.5.9 |
Hadoop | 3.2.2 |
Hive | 3.1.4 |
HBase | 2.4.9 |
Kafka | 2.6.3 |
Spark | 2.4.8 |
Kudu | 1.14.0 |
Impala | 3.4.0 |
Zookeeper部署
下载 apache-zookeeper-3.5.9-bin.tar.gz
https://mirrors.cnnic.cn/apache/zookeeper/zookeeper-3.5.9/apache-zookeeper-3.5.9-bin.tar.gz
创建目录
# 切换到hdfs用户安装
su hdfs
# 安装目录
sudo mkdir /opt/apps/
# 数据目录
sudo chmod 755 /data
sudo mkdir /data/zookeeper
sudo chown hdfs:hdfs zookeeper
解压
tar -zxvf apache-zookeeper-3.5.9-bin.tar.gz -C /opt/apps/
cd /opt/apps
mv apache-zookeeper-3.5.9/ zookeeper-3.5.9
环境变量
sudo vim /etc/profile.d/hdfs_env.sh
# zookeeper
export ZK_HOME=/opt/apps/zookeeper-3.5.9
export PATH=$PATH:$ZK_HOME/bin
source /etc/profile.d/hdfs_env.sh
配置服务器编号
echo "1" > /data/zookeeper/myid
配置
cd /opt/apps/zookeeper-3.5.9/conf/
sudo mv zoo_sample.cfg zoo.cfg
sudo vim zoo.cfg
# 修改data存储路径
dataDir=/data/zookeeper
# 添加
server.1=hadoop-master:2888:3888
server.2=hadoop-slave01:2888:3888
server.3=hadoop-slave02:2888:3888
vim /opt/apps/zookeeper/bin/zkEnv.sh
# 添加
export JAVA_HOME=/opt/apps/jdk
同步
lsync /opt/apps/zookeeper-3.5.9
# 修改其他服务 myid
[root@hadoop-slave01 /]$ echo "2" > /data/zookeeper/myid
[root@hadoop-slave02 /]$ echo "3" > /data/zookeeper/myid
启动
启动脚本
vim zkCluster.sh
sudo chmod +x zkCluster.sh
脚本内容
#!/bin/bash
hosts=(hadoop-master hadoop-slave01 hadoop-slave02)
path=/opt/apps/zookeeper-3.5.9
case $1 in
"start"){
for i in ${hosts[@]}
do
echo ---------- $i zookeeper startting ------------
ssh $i "$path/bin/zkServer.sh start"
done
};;
"stop"){
for i in ${hosts[@]}
do
echo ---------- $i zookeeper stopping ------------
ssh $i "$path/bin/zkServer.sh stop"
done
};;
"status"){
for i in ${hosts[@]}
do
echo ---------- $i zookeeper status ------------
ssh $i "$path/bin/zkServer.sh status"
done
};;
esac
Hadoop部署
下载 hadoop-3.2.2.tar.gz
https://archive.apache.org/dist/hadoop/common/hadoop-3.2.2/hadoop-3.2.2.tar.gz
解压
tar -zxvf hadoop-3.2.2.tar.gz -C /opt/apps/
环境变量
vim /etc/profile.d/hdfs_env.sh
# hadoop
export HADOOP_HOME=/opt/apps/hadoop-3.2.2
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export LD_LIBRARY_PATH=$HADOOP_HOME/lib/native:/usr/lib64
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
source /etc/profile.d/hdfs_env.sh
创建目录
sudo mkdir -p /data/hadoop/tmp
sudo mkdir -p /data/hadoop/nn
sudo mkdir -p /data/hadoop/dn
sudo mkdir -p /data/hadoop/jn
sudo chown hdfs:hdfs -R /data/hadoop
集群配置
cd /opt/apps/hadoop-3.2.2/etc/hadoop/
core-site.xml
https://hadoop.apache.org/docs/r3.1.4/hadoop-project-dist/hadoop-common/core-default.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://nameservice1</value>
</property>
<!-- hadoop运行时产生的临时文件存储路径 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/data/hadoop/tmp</value>
</property>
<!-- 开启垃圾回收功能,保留分钟数,3天 -->
<property>
<name>fs.trash.interval</name>
<value>4320</value>
</property>
<!-- zookeeper地址 -->
<property>
<name>ha.zookeeper.quorum</name>
<value>hadoop-master:2181,hadoop-slave01:2181,hadoop-slave02:2181</value>
</property>
<!-- 开启本地库支持 -->
<property>
<name>io.native.lib.available</name>
<value>true</value>
</property>
<!-- 支持的编码的类 -->
<property>
<name>io.compression.codecs</name>
<value>org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.SnappyCodec</value>
</property>
<!-- SequenceFiles在读写中可以使用的缓存大小 -->
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
<!-- 客户端断联最大时长,6秒 -->
<property>
<name>ipc.client.connection.maxidletime</name>
<value>60000</value>
</property>
<!-- hdfs(superUser)允许通过代理访问的主机节点 -->
<property>
<name>hadoop.proxyuser.hdfs.hosts</name>
<value>*</value>
</property>
<!-- hdfs(superUser)允许通过代理用户所属组 -->
<property>
<name>hadoop.proxyuser.hdfs.groups</name>
<value>*</value>
</property>
<!-- root允许通过代理访问的主机节点 -->
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<!-- root允许通过代理用户所属组 -->
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
</configuration>
hdfs-site.xml
https://hadoop.apache.org/docs/r3.1.4/hadoop-project-dist/hadoop-hdfs/hdfs-default.xml
<configuration>
<!-- 集群命名空间 -->
<property>
<name>dfs.nameservices</name>
<value>nameservice1</value>
</property>
<!-- 高可用2个NameNode逻辑地址 -->
<property>
<name>dfs.ha.namenodes.nameservice1</name>
<value>nn1,nn2</value>
</property>
<!-- nn1的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.nameservice1.nn1</name>
<value>hadoop-master:8020</value>
</property>
<!-- nn1 HDFS服务RPC通讯地址 -->
<property>
<name>dfs.namenode.servicerpc-address.nameservice1.nn1</name>
<value>hadoop-master:8022</value>
</property>
<!-- nn1 webUI监听地址和端口 -->
<property>
<name>dfs.namenode.http-address.nameservice1.nn1</name>
<value>hadoop-master:9870</value>
</property>
<!-- nn1的安全HTTP通信地址 -->
<property>
<name>dfs.namenode.https-address.nameservice1.nn1</name>
<value>hadoop-master:9871</value>
</property>
<!-- nn2的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.nameservice1.nn2</name>
<value>hadoop-slave01:8020</value>
</property>
<!-- nn2 HDFS服务RPC通讯地址 -->
<property>
<name>dfs.namenode.servicerpc-address.nameservice1.nn2</name>
<value>hadoop-slave01:8022</value>
</property>
<!-- nn2 webUI监听地址和端口 -->
<property>
<name>dfs.namenode.http-address.nameservice1.nn2</name>
<value>hadoop-slave01:9870</value>
</property>
<!-- nn2的安全HTTP通信地址 -->
<property>
<name>dfs.namenode.https-address.nameservice1.nn2</name>
<value>hadoop-salve01:9871</value>
</property>
<!-- NameNode fsimage日志存放位置 -->
<property>
<name>dfs.namenode.name.dir</name>
<value>/data/hadoop/nn</value>
</property>
<!-- Journal日志存放位置 -->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/data/hadoop/jn</value>
<