1.统一服务器域名
为了方便管理hadoop集群,需要统一修改服务器域名
eg:3台服务器(集群最小规模)修改/etc/hosts文件
192.168.0.1 hadoop-master-01
192.168.0.2 hadoop-slave-01
192.168.0.3 hadoop-slave-02
不同的机器运行不同的命令
hostnamectl set-hostname hadoop-master-01
hostnamectl set-hostname hadoop-slave-01
hostnamectl set-hostname hadoop-slave-02
最后执行reboot命令就能成功修改域名
2.SSH无密码登录
hadoop-master-01主机生成秘钥,执行以下命令
ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa
cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys
分发公钥到两个slave上面
hadoop-slave-01和hadoop-slave-02两台服务器执行以下命令:
scp root@hadoop-master-01:~/.ssh/id_dsa.pub ~/.ssh/master_dsa.pub
cat ~/.ssh/master_dsa.pub >> ~/.ssh/authorized_keys
hadoop-master-01测试连接hadoop-slave-01和hadoop-slave-02
ssh hadoop-slave-01
logout
ssh hadoop-slave-02
logout
3.配置Java环境
配置java运行环境
修改/etc/profile文件,新增内容:
export JAVA_HOME=/usr/local/java/jdk1.8.0_291
export JRE_HOME=${JAVA_HOME}/jre
export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib:$CLASSPATH
export JAVA_PATH=${JAVA_HOME}/bin:${JRE_HOME}/bin
export PATH=$PATH:${JAVA_PATH}
保存内容后执行
source /etc/profile
java -version
4.安装hadoop组件
Hadoop-master-01 下载hadoop-3.3.1
wget https://dlcdn.apache.org/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz
tar -zxf hadoop-3.3.1.tar.gz
mv hadoop-3.3.1 hadoop
配置hadoop运行环境
vim /etc/profile
增加以下内容:
export HADOOP_HOME=/home/bigdata/hadoop
export PATH=$PATH:${JAVA_PATH}:${HADOOP_HOME}/bin
保存后执行以下命令生效
source /etc/profile
修改hadoop环境
vim /home/bigdata/hadoop/etc/hadoop/hadoop-env.sh
增加
export JAVA_HOME=/usr/local/java/jdk1.8.0_291
source /home/bigdata/hadoop/etc/hadoop/hadoop-env.sh
执行命令:
hadoop version
进入hadoop目录创建子目录
mkdir tmp
mkdir hdfs
cd hdfs
mkdir name
mkdir tmp
mkdir data
vim /home/bigdata/hadoop/etc/hadoop/core-site.xml
增加内容
<property>
<name>hadoop.tmp.dir</name>
<value>/home/bigdata/hadoop/tmp</value>
<final>true</final>
<description>A base for other temporary directories.</description>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://hadoop-master-01:9000</value>
<final>true</final>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
vim /home/bigdata/hadoop/etc/hadoop/hdfs-site.xml
增加内容(3台服务器,分配为2个DN节点和1个NN节点):
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.name.dir</name>
<value>/home/bigdata/hadoop/hdfs/name</value>
</property>
<property>
<name>dfs.data.dir</name>
<value>/home/bigdata/hadoop/hdfs/data</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>hadoop-master-01:9001</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
<property>
<name>dfs.http.address</name>
<value>hadoop-master-01:50070</value>
</property>
vim /home/bigdata/hadoop/etc/hadoop/mapred-site.xml
增加内容
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
vim /home/bigdata/hadoop/etc/hadoop/yarn-site.xml
增加内容
<property>
<name>yarn.resourcemanager.address</name>
<value>hadoop-master-01:18088</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>hadoop-master-01:18030</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>hadoop-master-01:18040</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>hadoop-master-01:18025</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>hadoop-master-01:18141</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
vim /home/bigdata/hadoop/etc/hadoop/yarn-env.sh
新增内容
export JAVA_HOME=/usr/local/java/jdk1.8.0_291
vim /home/bigdata/hadoop/etc/hadoop/workers
新增内容
hadoop-slave-01
hadoop-slave-02
vim /home/bigdata/hadoop/sbin/start-dfs.sh
vim /home/bigdata/hadoop/sbin/stop-dfs.sh
首部增加内容:
HDFS_DATANODE_USER=root
HADOOP_SECURE_DN_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root
vim /home/bigdata/hadoop/sbin/start-yarn.sh
vim /home/bigdata/hadoop/sbin/stop-yarn.sh
首部增加内容
YARN_RESOURCEMANAGER_USER=root
HADOOP_SECURE_DN_USER=yarn
YARN_NODEMANAGER_USER=root
hadoop-slave-01和hadoop-slave-02 配置数据
scp -rp /etc/profile root@hadoop-slave-01:/etc/profile
scp -rp /etc/profile root@hadoop-slave-02:/etc/profile
scp -rp /home/bigdata/hadoop root@hadoop-slave-01:/home/bigdata/hadoop
scp -rp /home/bigdata/hadoop root@hadoop-slave-02:/home/bigdata/hadoop
启动hadoop系统
Hadoop-master-01 执行以下命令
cd /home/bigdata/hadoop/bin
hdfs namenode -format
启动hadoop集群所有的节点
首先master启动 hdfs
cd /home/bigdata/hadoop/sbin
./start-dfs.sh
./start-yarn.sh
用jps查看是否启动成功
hadoop-master-01 上显示
ResourceManager
NameNode
SecondaryNameNode
hadoop-slave-01 和hadoop-slave-02上显示
DataNode
NodeManager
5.hadoop集群mapreduce测试
测试hadoop是否正常工作
hadoop fs -mkdir -p /test
hadoop fs -ls /
touch words.txt
echo "hello world" >> words.txt
hadoop fs -put words.txt /test
hadoop fs -mkdir -p /test/input
touch words-input.txt
echo 'hello world' >> words-input.txt
echo 'hello java' >> words-input.txt
echo 'hello hadoop' >> words-input.txt
hadoop fs -put words-input.txt /test/input
hadoop fs -ls /test/input
cd /home/bigdata/hadoop/share/hadoop/mapreduce/
hadoop jar hadoop-mapreduce-examples-3.3.1.jar wordcount /test/input /test/output
hadoop fs -ls /test/output
hadoop fs -cat /test/output/part-r-00000