hadoop ha 分布式高可用集群的爬坑之旅

本文详细介绍了在Hadoop集群中配置高可用HA的过程,包括HDFS和YARN的HA配置步骤,解决了配置过程中遇到的常见问题,并分享了配置成功后的启动与验证经验。

看到好多关于hadoop ha的介绍,挺诱人,中间耽搁了一段时间,现在终于把它做了,我在原先的集群之上,花了5个多小时根据官网把hadoop ha 配置了,以下是这次的总结:

配置
启动
查看

HDFS ha 的配置

hdfs-site.xml

<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
<!--nameservice名 下面保持一致-->
<property>
  <name>dfs.nameservices</name>
  <value>mztt</value>
</property>
<!--配置几个namenode hadoop3.x 说是可以配置多个-->
<property>
  <name>dfs.ha.namenodes.mztt</name>
  <value>nn1,nn2</value>
</property>
<!-- RPC 通信地址-->
<property>
  <name>dfs.namenode.rpc-address.mztt.nn1</name>
  <value>mztt1:8020</value>
</property>
<property>
  <name>dfs.namenode.rpc-address.mztt.nn2</name>
  <value>mztt2:8020</value>
</property>
<!-- web 地址-->
<property>
  <name>dfs.namenode.http-address.mztt.nn1</name>
  <value>mztt1:50070</value>
</property>
<property>
  <name>dfs.namenode.http-address.mztt.nn2</name>
  <value>mztt2:50070</value>
</property>
<!-- edits 的位置-->
<property>
  	<name>dfs.namenode.shared.edits.dir</name>
  	<value>qjournal://mztt1:8485;mztt2:8485;mztt3:8485/mztt</value>
</property>

<property>
 	<name>dfs.client.failover.proxy.provider.mztt</name>
 	<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!--隔离-->
<property>
      <name>dfs.ha.fencing.methods</name>
      <value>sshfence</value>
</property>

<property>
      <name>dfs.ha.fencing.ssh.private-key-files</name>
      <value>/home/hadoop/.ssh/id_rsa</value>
</property>
<!--隔离的超时时间 -->
<property>
      <name>dfs.ha.fencing.ssh.connect-timeout</name>
      <value>30000</value>
</property>
<!--journalnode 数据的存放目录-->
<property>
       <name>dfs.journalnode.edits.dir</name>
       <value>/opt/data/journal</value>
</property>
<!--自动切换-->
<property>
  	 <name>dfs.ha.automatic-failover.enabled</name>
  	 <value>true</value>
</property>

</configuration>

core-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
	<!--默认路径前缀-->
	<property>
		<name>fs.defaultFS</name>
		<value>hdfs://mztt</value>
	</property>

<!--zookeeper 的节点-->
	<property>
   		<name>ha.zookeeper.quorum</name>
  	        <value>mztt1:2181,mztt2:2181,mztt3:2181</value>
 	</property>
<!--io 缓存-->	
<property>
		<name>io.file.buffer.size</name>
		<value>8192</value>
	</property>

<!--临时目录-->
	<property>
		<name>hadoop.tmp.dir</name>
		<value>/opt/data/tmp</value>
	</property>


</configuration>

mapred-site.xml

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
    <!--MR方式设为yarn-->
<property>
      
		<name>mapreduce.framework.name</name>

        <value>yarn</value>

</property>

<!--历史服务和 web 地址-->
<property>

        <name>mapreduce.jobhistory.address</name>

        <value>mztt1:10020</value>

</property>

 
 <property>

        <name>mapreduce.jobhistory.webapp.address</name>

        <value>mztt1:19888</value>

</property>

</configuration>

YARN ha 的配置

<?xml version="1.0"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->
<configuration>

<!-- Site specific YARN configuration properties -->
	<property>
 		 <name>yarn.nodemanager.aux-services</name>
 	 	<value>mapreduce_shuffle</value>
	</property>
<!--<property>
    
	<name>yarn.log-aggregation-enable</name>
    
	<value>true</value>
 
</property>

<property>
    
	<name>yarn.log-aggregation.retain-seconds</name>
   
	<value>106800</value>
 
</property> -->
<!--开启yarn ha-->
<property>
  <name>yarn.resourcemanager.ha.enabled</name>
  <value>true</value>
</property>


<property>
  <name>yarn.resourcemanager.cluster-id</name>
  <value>cluster1</value>
</property>
<!--rm 的名字-->
<property>
  <name>yarn.resourcemanager.ha.rm-ids</name>
  <value>rm1,rm2</value>
</property>
<!--指定resourcemanager运行的节点-->
<property>
  <name>yarn.resourcemanager.hostname.rm1</name>
  <value>mztt1</value>
</property>
<property>
  <name>yarn.resourcemanager.hostname.rm2</name>
  <value>mztt2</value>
</property>
<!--web 端地址-->
<property>
  <name>yarn.resourcemanager.webapp.address.rm1</name>
  <value>mztt1:8088</value>
</property>
<property>
  <name>yarn.resourcemanager.webapp.address.rm2</name>
  <value>mztt2:8088</value>
</property>
<!--zk 地址-->
<property>
  <name>yarn.resourcemanager.zk-address</name>
  <value>mztt1:2181,mztt2:2181,mztt3:2181</value>
</property>

</configuration>

启动

启动这地方挺坑,第一次最好照这个顺序来。

  1. 启动zookeeper
//有环境变量 
./zkServer.sh start
  1. 启动 journalnode
hadoop-daemon.sh start journalnode
  1. 先格式化一个namenode,任意一个都行
	hdfs name -format
  1. 将2个namenode 启动
hadoop-daemon.sh start namenode
  1. 同步2个namenode的数据
hdfs namenode -bootstrapStandby
  1. 格式化zkfc
hdfs zkfc -formatZK

查看

jps 看一下进程发现都起来了:

8240 DataNode
14032 Jps
8065 JournalNode
8003 QuorumPeerMain
9238 NodeManager
8166 NameNode
8427 DFSZKFailoverController
8669 ResourceManager

然后去网页查看,2个namenode都能访问,只不过一个是standby,另一个是 active,然后kill了一个,试试是不是跟介绍的一样能自动切换。
5分钟过去了,standby还是没有切换,起初我以为是切换可能需要时间或者我配置有误,
把原先集群的历史服务什么的都给注了,还是不行。
最后,百度一圈,找到了解决,我系统是centOS 7.6,装的时候是最小安装,说是少了一个fuser,开始装:


yum search fuser


#已加载插件:fastestmirror
Determining fastest mirrors
 * base: mirrors.huaweicloud.com
 * extras: mirrors.huaweicloud.com
 * updates: mirrors.cn99.com
============================================== 匹配:fuser ==============================================
psmisc.x86_64 : Utilities for managing processes on your system

# 发现在psmisc里
yum install psmisc

装完之后,重启集群OK了。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值