Hadoop安装(3.3.1)
1.设置主机名
hostname hadoop01 #设置临时主机名
vi /etc/hostname #设置永久主机名
2.设置hosts
vi /etc/hosts
192.168.136.103 hadoop01
192.168.136.104 hadoop02
192.168.136.105 hadoop03
3.关闭防火墙
systemctl stop firewalld #关闭防火墙
systemctl disable firewalld #禁用防火墙
4.ssh免密登陆
#在主节点上执行
ssh-keygen -t rsa #输入内容直接回车
#将生成的公钥添加到免密登陆keys中
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
#将公钥复制到2台从节点上
scp ~/.ssh/id_rsa.pub root@hadoop02:~/
scp ~/.ssh/id_rsa.pub root@hadoop03:~/
#在从节点上将主节点的公钥添加到免密登陆keys中
cat ~/id_rsa.pub >> ~/.ssh/authorized_keys
5.ntpdate时间同步
yum install ntpdate
#同步时间
ntpdate -u ntp.sjtu.edu.cn
#定时同步,每小时整点执行一次
vi /etc/crontab
0 */1 * * * root ntpdate -u ntp.sjtu.edu.cn
6.下载hadoop
#下载
wget https://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/stable/hadoop-3.3.1.tar.gz
#解压
tar -zxvf hadoop-3.3.1.tar.gz
7.编辑etc/hadoop/hadoop-env.sh
cd hadoop-3.3.1/etc/hadoop/
vi hadoop-env.sh
#添加如下内容
export JAVA_HOME=/home/jdk1.8/jdk1.8.0_191
export HADOOP_HOME=/home/hadoop3.3.1/hadoop-3.3.1
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
export HADOOP_LOG_DIR=${HADOOP_HOME}/logs
export HADOOP_PID_DIR=${HADOOP_HOME}/pid
8.编辑etc/hadoop/core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop01:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/hadoop_repo</value>
</property>
</configuration>
9.编辑etc/hadoop/hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>hadoop01:9868</value>
</property>
</configuration>
10.编辑etc/hadoop/mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
11.编辑etc/hadoop/yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>hadoop01</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://hadoop01:19888/jobhistory/logs/</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>8192</value>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>8</value>
</property>
#分配给每个Container(容器)的最小执行内存
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>1024</value>
</property>
</configuration>
12.编辑etc/hadoop/workers
hadoop02
hadoop03
13.编辑启动脚本
cd sbin
vi start-dfs.sh
HDFS_DATANODE_USER=root
HDFS_DATANODE_SECURE_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root
vi stop-dfs.sh
HDFS_DATANODE_USER=root
HDFS_DATANODE_SECURE_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root
vi start-yarn.sh
YARN_RESOURCEMANAGER_USER=root
HADOOP_SECURE_DN_USER=yarn
YARN_NODEMANAGER_USER=root
vi stop-yarn.sh
YARN_RESOURCEMANAGER_USER=root
HADOOP_SECURE_DN_USER=yarn
YARN_NODEMANAGER_USER=root
14.格式化HDFS
#将hadoop拷贝到其它节点
#格式化namenode节点
bash hdfs namenode -format
#出现如下信息说明格式化成功
common.Storage: Storage directory /home/hadoop_repo/dfs/name has been successfully formatted.
15.启动集群
sbin/start-all.sh #启动集群
sbin/stop-all.sh #停止集群
./mapred --daemon start historyserver#每个节点启动
16.验证
#主节点jps
NameNode
SecondaryNameNode
ResourceManager
#从节点jps
DataNode
NodeManager
17.查看集群信息
#资源信息查看
http://192.168.136.103:8088/
#存储节点信息查看
http://192.168.136.103:9870/
18.命令查看日志
#资源信息查看
./yarn logs -applicationId application_1639553331593_0001