1. 解压缩(bigdata用户)
tar -zvxf hadoop-2.7.2.tar.gz
2. 环境变量配置
vi .bash_profile
HADOOP_HOME=/home/hadoopadmin/hadoop-2.7.2
PATH=$PATH:$HADOOP_HOME/bin
export HADOOP_HOME
//保存退出
source .bash_profile //刷新环境变量
3. Hadoop配置
-core-site.xml
<configuration>
<!-- Hadoop文件系统依赖的基础配置 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/home/bigdata/hadoop/data</value>
</property>
<!-- NameNode结点的URI(包括协议、主机名称、端口号) -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://HADOOP01:8020</value>
</property>
<!-- 开启回收站机制,可以设置文件彻底删除的时间,默认为0,单位为分钟
相关详解(https://blog.youkuaiyun.com/mn_kw/article/details/72586578)-->
<property>
<name>fs.trash.interval</name>
<value>60</value>
</property>
</configuration>
-hdfs-site.xml
<configuration>
<!-- secondarynamenode的http服务器地址和端口 -->
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>HADOOP01:50090</value>
</property>
<!-- 默认块复制 -->
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<!-- 关闭权限校验 -->
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
<!-- namenode的http服务器地址和端口 -->
<property>
<name>dfs.namenode.http-address</name>
<value>HADOOP01:50070</value>
</property>
<!-- datanode结点被指定要存储数据的本地文件系统路径 -->
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///home/bigdata/hadoop/data/dfs/dn</value>
</property>
</configuration>
-mapred-site.xml(重命名mapred-site.xml.template)
<configuration>
<!-- MapReduce JobHistory进程通信主机、端口 -->
<property>
<name>mapreduce.jobhistory.address</name>
<value>HADOOP01:10020</value>
</property>
<!-- MapReduce JobHistory的web界面主机、端口 -->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>HADOOP01:19888</value>
</property>
<!-- 以yarn方式运行MapReduce -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
-yarn-site.xml
<configuration>
<!-- resourcemanager的主机名 -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>HADOOP01</value>
</property>
<!-- resourcemanager提供给nodemanager的地址 -->
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>HADOOP01:8031</value>
</property>
<!-- resourcemanager中应用程序管理器界面的地址 -->
<property>
<name>yarn.resourcemanager.address</name>
<value>HADOOP01:8032</value>
</property>
<!-- 调度器接口的地址 -->
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>HADOOP01:8030</value>
</property>
<!-- 分配给容器的物理内存量(75%) -->
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>1536</value>
</property>
<!-- NodeManager上运行的附属服务,配置成mapreduce_shuffle才可运行MR -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
-hadoop-env.sh
export JAVA_HOME=/home/bigdata/jdk1.7.0_67
-slaves
HADOOP01
HADOOP02
HADOOP03
-使用ssh拷贝文件
可将一下命令以脚本方式执行,执行前确保SSH配置正确
==tip==:可用touch创建文件并在里面写如下命令,也可单独执行,莫不能在win环境下复制过来(会导致所有文件都复制到一个文件夹中)
scp -r /home/bigdata/jdk1.8.0_171 bigdata@HADOOP02:/home/bigdata
scp -r /home/bigdata/jdk1.8.0_171 bigdata@HADOOP03:/home/bigdata
scp -r /home/bigdata/hadoop-2.7.2 bigdata@HADOOP02:/home/bigdata
scp -r /home/bigdata/hadoop-2.7.2 bigdata@HADOOP03:/home/bigdata
scp /home/bigdata/.bash_profile bigdata@HADOOP02:/home/bigdata
scp /home/bigdata/.bash_profile bigdata@HADOOP03:/home/bigdata
4. HADOOP启动/停止
hdfs namenode -format //格式化namenode节点
cd /home/bigdata/hadoop-2.7.2/sbin //切换目录
./start-all.sh //启动HADOOP
./stop-all.sh //停止HADOOP