hadoop集群双namenode配置

最新推荐文章于 2025-03-12 12:47:12 发布

xiaoxi1779

最新推荐文章于 2025-03-12 12:47:12 发布

阅读量1.8k

点赞数 9

本文链接：https://blog.youkuaiyun.com/xiaoxi1779/article/details/116980743

版权

hadoop-env.sh
export JAVA_HOME=/etc/alternatives/jre_1.8.0_openjdk
export HADOOP_HOME=/usr/local/hadoop/
export HDFS_NAMENODE_USER=hadoop
export HDFS_DATANODE_USER=hadoop
export HDFS_SECONDARYNAMENODE_USER=hadoop
export YARN_RESOURCEMANAGER_USER=hadoop
export YARN_NODEMANAGER_USER=hadoop
export HADOOP_USER_NAME=hadoop
export HADOOP_SHELL_EXECNAME=hadoop

hdfs-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->



<configuration>
    <!--指定hdfs的nameservice为myha01，需要和core-site.xml中的保持一致
                 dfs.ha.namenodes.[nameservice id]为在nameservice中的每一个NameNode设置唯一标示符。
        配置一个逗号分隔的NameNode ID列表。这将是被DataNode识别为所有的NameNode。
        例如，如果使用"myha01"作为nameservice ID，并且使用"nn1"和"nn2"作为NameNodes标示符
    -->
    <property>
        <name>dfs.nameservices</name>
        <value>myha01</value>
    </property>

    <!-- myha01下面有两个NameNode，分别是nn1，nn2 -->
    <property>
        <name>dfs.ha.namenodes.myha01</name>
        <value>nn1,nn2</value>
    </property>

    <!-- nn1的RPC通信地址 -->
    <property>
        <name>dfs.namenode.rpc-address.myha01.nn1</name>
        <value>hadoop01:9000</value>
    </property>

    <!-- nn1的http通信地址 -->
    <property>
        <name>dfs.namenode.http-address.myha01.nn1</name>
        <value>hadoop01:50070</value>
    </property>

    <!-- nn2的RPC通信地址 -->
    <property>
        <name>dfs.namenode.rpc-address.myha01.nn2</name>
        <value>hadoop04:9000</value>
    </property>

    <!-- nn2的http通信地址 -->
    <property>
        <name>dfs.namenode.http-address.myha01.nn2</name>
        <value>hadoop04:50070</value>
    </property>
  
    <property>
        <name>dfs.namenode.name.dir</name>
        <value>/usr/local/hadoop/hdfs/name</value>
    </property>
 
  <property>
    <name>dfs.datanode.data.dir</name>
    <value>/usr/local/hadoop/hdfs/data</value>
  </property>
  <!-- 指定副本数 -->
  <property>
    <name>dfs.replication</name>
    <value>2</value>
  </property>
    <!-- 启用webhdfs -->
    <property>
        <name>dfs.webhdfs.enabled</name>
        <value>true</value>
    </property>
    <!-- 指定NameNode的edits元数据的共享存储位置。也就是JournalNode列表
                 该url的配置格式：qjournal://host1:port1;host2:port2;host3:port3/journalId
        journalId推荐使用nameservice，默认端口号是：8485 -->
    <property>
        <name>dfs.namenode.shared.edits.dir</name>
        <value>qjournal://hadoop01:8485;hadoop04:8485;hadoop02:8485/myha01</value>
    </property>

    <!-- 指定JournalNode在本地磁盘存放数据的位置 -->
    <property>
        <name>dfs.journalnode.edits.dir</name>
        <value>/usr/local/hadoop/hdfs/journaldata</value>
    </property>

    <!-- 开启NameNode失败自动切换 -->
    <property>
        <name>dfs.ha.automatic-failover.enabled.myha01</name>
        <value>true</value>
    </property>

    <!-- 配置失败自动切换实现方式 -->
    <property>
        <name>dfs.client.failover.proxy.provider.myha01</name>
        <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
    </property>

    <!-- 配置隔离机制方法，多个机制用换行分割，即每个机制暂用一行 -->
    <property>
        <name>dfs.ha.fencing.methods</name>
        <value>
            sshfence
            shell(/bin/true)
        </value>
    </property>

    <!-- 使用sshfence隔离机制时需要ssh免登陆-->
    <property>
        <name>dfs.ha.fencing.ssh.private-key-files</name>
        <value>/home/hadoop/.ssh/id_rsa</value>
    </property> 

    <!-- 配置sshfence隔离机制超时时间 -->
    <property>
        <name>dfs.ha.fencing.ssh.connect-timeout</name>
        <value>30000</value>
    </property>

    <property>
        <name>ha.failover-controller.cli-check.rpc-timeout.ms</name>
        <value>60000</value>
    </property>
</configuration>

core-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
	<property>
    <name>fs.defaultFS</name>
    	<value>hdfs://myha01</value>
	</property>

	<property>
	    <name>io.file.buffer.size</name>
	    <value>131072</value>
	</property>

	<property>
	    <name>hadoop.tmp.dir</name>
	    <value>/usr/local/hadoop/hdfs/tmp</value>
	</property>
    <!-- 指定zookeeper地址 -->
    <property>
        <name>ha.zookeeper.quorum</name>
        <value>hadoop01:2181,hadoop02:2181,hadoop03:2181</value>
    </property>

    <!-- hadoop链接zookeeper的超时时长设置 -->
    <property>
        <name>ha.zookeeper.session-timeout.ms</name>
        <value>1000</value>
        <description>ms</description>
    </property>
</configuration>

yarn-site.xml

<?xml version="1.0"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->
<configuration>
    <!-- 开启RM高可用 -->
    <property>
        <name>yarn.resourcemanager.ha.enabled</name>
        <value>true</value>
    </property>

    <!-- 指定RM的cluster id -->
    <property>
        <name>yarn.resourcemanager.cluster-id</name>
        <value>yrc</value>
    </property>

    <!-- 指定RM的名字 -->
    <property>
        <name>yarn.resourcemanager.ha.rm-ids</name>
        <value>rm1,rm2</value>
    </property>

    <!-- 分别指定RM的地址 -->
    <property>
        <name>yarn.resourcemanager.hostname.rm1</name>
        <value>hadoop02</value>
    </property>

    <property>
        <name>yarn.resourcemanager.hostname.rm2</name>
        <value>hadoop03</value>
    </property>

    <!-- 指定zk集群地址 -->
    <property>
        <name>yarn.resourcemanager.zk-address</name>
        <value>hadoop01:2181,hadoop02:2181,hadoop03:2181</value>
    </property>

    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>

    <property>
        <name>yarn.log-aggregation-enable</name>
        <value>true</value>
    </property>

    <property>
        <name>yarn.log-aggregation.retain-seconds</name>
        <value>86400</value>
    </property>

    <!-- 启用自动恢复 -->
    <property>
        <name>yarn.resourcemanager.recovery.enabled</name>
        <value>true</value>
    </property>

    <!-- 制定resourcemanager的状态信息存储在zookeeper集群上 -->
    <property>
        <name>yarn.resourcemanager.store.class</name>
        <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
    </property>
</configuration>

yarn-env.sh
export JAVA_HOME=/etc/alternatives/jre_1.8.0_openjdk

mapred-site.xml

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
<property>
		<name>mapreduce.framework.name</name>
		<value>yarn</value>
	</property>
  <!-- 指定mapreduce jobhistory地址 -->
    <property>
        <name>mapreduce.jobhistory.address</name>
        <value>hadoop01:10020</value>
    </property>

    <!-- 任务历史服务器的web地址 -->
    <property>
        <name>mapreduce.jobhistory.webapp.address</name>
        <value>hadoop01:19888</value>
    </property>
</configuration>

上面是所有配置文件，下面所有操作都用hadoop来执行

1.启动ZK（确保所有节点正常） bin/zkServer.sh start
2. hdfs zkfc -formatZK （在主namenode上执行）
3. hdfs --daemon start journalnode （在每个journalnode执行）
4. hdfs namenode -format myha01 (在主namenode上执行，格式化namenode)
5. hdfs --daemon start namenode (在主namenode上执行，启动namenode)
6. hdfs namenode -bootstrapStandby (在副的namenode上执行，拷贝主配置）
7. hdfs --daemon start namenode (在副namenode上执行，启动namenode)
8. hdfs --daemon start zkfc （在主和副上执行）
9. hdfs --daemon start datanode (在datanode上执行，启动datanode)

日常维护
sbin/start-dfs.sh
sbin/stop-dfs.sh