Apache大数据相关组件部署

组件总览

组件 版本
JDK 1.8.0_211
Scala 1.12.14
Zookeeper 3.5.9
Hadoop 3.2.2
Hive 3.1.4
HBase 2.4.9
Kafka 2.6.3
Spark 2.4.8
Kudu 1.14.0
Impala 3.4.0

Zookeeper部署

下载 apache-zookeeper-3.5.9-bin.tar.gz

https://mirrors.cnnic.cn/apache/zookeeper/zookeeper-3.5.9/apache-zookeeper-3.5.9-bin.tar.gz

创建目录

# 切换到hdfs用户安装
su hdfs

# 安装目录
sudo mkdir /opt/apps/

# 数据目录
sudo chmod 755 /data
sudo mkdir /data/zookeeper
sudo chown hdfs:hdfs zookeeper

解压

tar -zxvf apache-zookeeper-3.5.9-bin.tar.gz -C /opt/apps/
cd /opt/apps
mv apache-zookeeper-3.5.9/ zookeeper-3.5.9

环境变量

sudo vim /etc/profile.d/hdfs_env.sh

# zookeeper
export ZK_HOME=/opt/apps/zookeeper-3.5.9
export PATH=$PATH:$ZK_HOME/bin

source /etc/profile.d/hdfs_env.sh

配置服务器编号

echo "1" > /data/zookeeper/myid

配置

cd /opt/apps/zookeeper-3.5.9/conf/

sudo mv zoo_sample.cfg zoo.cfg 

sudo vim zoo.cfg

# 修改data存储路径
dataDir=/data/zookeeper

# 添加
server.1=hadoop-master:2888:3888
server.2=hadoop-slave01:2888:3888
server.3=hadoop-slave02:2888:3888

vim /opt/apps/zookeeper/bin/zkEnv.sh

# 添加
export JAVA_HOME=/opt/apps/jdk

同步

lsync /opt/apps/zookeeper-3.5.9

# 修改其他服务 myid
[root@hadoop-slave01 /]$ echo "2" > /data/zookeeper/myid
[root@hadoop-slave02 /]$ echo "3" > /data/zookeeper/myid

启动

启动脚本

vim zkCluster.sh
sudo chmod +x zkCluster.sh 

脚本内容

#!/bin/bash

hosts=(hadoop-master hadoop-slave01 hadoop-slave02)
path=/opt/apps/zookeeper-3.5.9

case $1 in
"start"){
	for i in ${hosts[@]}
	do
       echo ---------- $i zookeeper startting ------------
		ssh $i "$path/bin/zkServer.sh start"
	done
};;
"stop"){
	for i in ${hosts[@]}
	do
       echo ---------- $i zookeeper stopping ------------    
		ssh $i "$path/bin/zkServer.sh stop"
	done
};;
"status"){
	for i in ${hosts[@]}
	do
       echo ---------- $i zookeeper status ------------    
		ssh $i "$path/bin/zkServer.sh status"
	done
};;
esac

Hadoop部署

下载 hadoop-3.2.2.tar.gz

https://archive.apache.org/dist/hadoop/common/hadoop-3.2.2/hadoop-3.2.2.tar.gz

解压

tar -zxvf hadoop-3.2.2.tar.gz -C /opt/apps/

环境变量

vim /etc/profile.d/hdfs_env.sh

# hadoop
export HADOOP_HOME=/opt/apps/hadoop-3.2.2
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export LD_LIBRARY_PATH=$HADOOP_HOME/lib/native:/usr/lib64
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

source /etc/profile.d/hdfs_env.sh

创建目录

sudo mkdir -p /data/hadoop/tmp
sudo mkdir -p /data/hadoop/nn
sudo mkdir -p /data/hadoop/dn
sudo mkdir -p /data/hadoop/jn
sudo chown hdfs:hdfs -R /data/hadoop

集群配置

cd /opt/apps/hadoop-3.2.2/etc/hadoop/

core-site.xml

https://hadoop.apache.org/docs/r3.1.4/hadoop-project-dist/hadoop-common/core-default.xml

<configuration>
  <property>  
    <name>fs.defaultFS</name>  
    <value>hdfs://nameservice1</value>  
  </property> 
  <!-- hadoop运行时产生的临时文件存储路径 -->
  <property>  
    <name>hadoop.tmp.dir</name>  
    <value>/data/hadoop/tmp</value>  
  </property> 
  <!-- 开启垃圾回收功能,保留分钟数,3天 -->
  <property>
    <name>fs.trash.interval</name>
    <value>4320</value>
  </property>
  <!-- zookeeper地址 -->
  <property>  
    <name>ha.zookeeper.quorum</name>  
    <value>hadoop-master:2181,hadoop-slave01:2181,hadoop-slave02:2181</value>  
  </property>
  <!-- 开启本地库支持 -->
  <property>
    <name>io.native.lib.available</name>
    <value>true</value>
  </property>
  <!-- 支持的编码的类 -->
  <property>
    <name>io.compression.codecs</name>
    <value>org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.SnappyCodec</value>
  </property>
  <!-- SequenceFiles在读写中可以使用的缓存大小 -->
  <property>
    <name>io.file.buffer.size</name>
    <value>131072</value>
  </property>
  <!-- 客户端断联最大时长,6秒 -->
  <property>
    <name>ipc.client.connection.maxidletime</name>
    <value>60000</value>
  </property>
      <!-- hdfs(superUser)允许通过代理访问的主机节点 -->
  <property>
    <name>hadoop.proxyuser.hdfs.hosts</name>
    <value>*</value>
  </property>
  <!-- hdfs(superUser)允许通过代理用户所属组 -->
  <property>
    <name>hadoop.proxyuser.hdfs.groups</name>
    <value>*</value>
  </property>
  <!-- root允许通过代理访问的主机节点 -->
  <property>
    <name>hadoop.proxyuser.root.hosts</name>
    <value>*</value>
  </property>
  <!-- root允许通过代理用户所属组 -->
  <property>
    <name>hadoop.proxyuser.root.groups</name>
    <value>*</value>
  </property>
</configuration>

hdfs-site.xml

https://hadoop.apache.org/docs/r3.1.4/hadoop-project-dist/hadoop-hdfs/hdfs-default.xml

<configuration>
  <!-- 集群命名空间 -->
  <property>
    <name>dfs.nameservices</name>
    <value>nameservice1</value>
  </property>
  <!-- 高可用2个NameNode逻辑地址 -->
  <property>
    <name>dfs.ha.namenodes.nameservice1</name>
    <value>nn1,nn2</value>
  </property>
  <!-- nn1的RPC通信地址 -->
  <property>
    <name>dfs.namenode.rpc-address.nameservice1.nn1</name>
    <value>hadoop-master:8020</value>
  </property>
  <!-- nn1 HDFS服务RPC通讯地址 -->
  <property>
    <name>dfs.namenode.servicerpc-address.nameservice1.nn1</name>
    <value>hadoop-master:8022</value>
  </property>
  <!-- nn1 webUI监听地址和端口 -->
  <property>
    <name>dfs.namenode.http-address.nameservice1.nn1</name>
    <value>hadoop-master:9870</value>
  </property>
  <!-- nn1的安全HTTP通信地址 -->
  <property>
    <name>dfs.namenode.https-address.nameservice1.nn1</name>
    <value>hadoop-master:9871</value>
  </property>
  <!-- nn2的RPC通信地址 -->
  <property>
    <name>dfs.namenode.rpc-address.nameservice1.nn2</name>
    <value>hadoop-slave01:8020</value>
  </property>
  <!-- nn2 HDFS服务RPC通讯地址 -->
  <property>
    <name>dfs.namenode.servicerpc-address.nameservice1.nn2</name>
    <value>hadoop-slave01:8022</value>
  </property>
  <!-- nn2 webUI监听地址和端口 -->
  <property>
    <name>dfs.namenode.http-address.nameservice1.nn2</name>
    <value>hadoop-slave01:9870</value>
  </property>
  <!-- nn2的安全HTTP通信地址 -->
  <property>
    <name>dfs.namenode.https-address.nameservice1.nn2</name>
    <value>hadoop-salve01:9871</value>
  </property>
  <!-- NameNode fsimage日志存放位置 --> 
  <property>
    <name>dfs.namenode.name.dir</name>
    <value>/data/hadoop/nn</value>
  </property>
  <!-- Journal日志存放位置 -->
  <property>
    <name>dfs.journalnode.edits.dir</name>
    <value>/data/hadoop/jn</value>
  <
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

訾零

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值