HA环境搭建
zookeeper集群搭建
-
zookeeper由2n+1台服务器组成
-
集群搭建
-
java环境,需要安装jdk
-
修稿配置文件
# zoo.cfg dataDir=/opt/data/zookeeper_data server.1=node-01:2888:3888 server.2=node-03:2888:3888 server.3=node-03:2888:3888
-
创建myid文件,
echo 1 > /opt/data/zookeeper_data/myid
-
分发到其他机器,并将zookeeper添加到环境变量中;修改myid为各自的id
-
服务启动,
zkServer.sh start
-
服务状态、角色查询
zkServer.sh status
-
服务关闭
zkServer.sh stop
-
hadoop集群
-
各个节点之间互相免密
-
namenode中需要有zkfc,各个节点需要zk和jn
-
配置文件修改
- Hdfs-env.sh
# hdfs-evn.sh export JAVA_HOME=/opt/software/jdk1.8.0_311 export HDFS_NAMENODE_USER=root export HDFS_DATANODE_USER=root export HDFS_SECONDARYNAMENODE_USER=root export YARN_RESOURCEMANAGER_USER=root export YARN_NODEMANAGER_USER=root export HDFS_ZKFC_USER=root export HDFS_JOURNALNODE_USER=root
- Core-site.xml
<!--core-site.xml--> <configuration> <!--HADOOP集群名称, 需要和hdfs-site.xml文件中的名称保持一致--> <property> <name>fs.defaultFS</name> <value>hdfs://hadoop-cluster</value> <description>集群名称,需要和hdfs-site.xml文件中保持一致</description> </property> <property> <name>hadoop.tmp.dir</name> <value>/opt/data/hadoop</value> <description>临时文件夹,元数据默认文件夹</description> </property> <property> <name>hadoop.http.staticuser.user</name> <value>root</value> <description>http请求用户</description> </property> <property> <name>hadoop.proxyuser.root.hosts</name> <value>*</value> </property> <property> <name>hadoop.proxyuser.root.groups</name> <value>*</value> </property> <!--zookeeper集群地址--> <property> <name>ha.zookeeper.quorum</name> <value>node-01:2181,node-02:2181,node-03:2181</value> <description>zookeeper集群地址</description> </property> </configuration>
- hdfs-site.xml
<!--hdfs-site.xml--> <configuration> <!--集群名称, 和core-site.xml保持一致--> <property> <name>dfs.nameservices</name> <value>hadoop-cluster</value> </property> <!--集群namenode节点别名--> <property> <name>dfs.ha.namenodes.hadoop-cluster</name> <value>namenode-1, namenode-2</value> </property> <!--namenode-1 RPC地址--> <property> <name>dfs.namenode.rpc-address.hadoop-cluster.namenode-1</name> <value>node-01:8020</value> </property> <!--namenode-1 http地址--> <property> <name>dfs.namenode.http-address.hadoop-cluster.namenode-1</name> <value>node-01:9870</value> </property> <!--namenode-2 RPC地址--> <property> <name>dfs.namenode.rpc-address.hadoop-cluster.namenode-2</name> <value>node-02:8020</value> </property> <!--namenode-2 http地址--> <property> <name>dfs.namenode.http-address.hadoop-cluster.namenode-2</name> <value>node-02:9870</value> </property> <!--指定namenode的edits元数据日志在journalNode上的存储位置,hadoop自身的服务--> <property> <name>dfs.namenode.shared.edits.dir</name> <value>qjournal://node-01:8485;node-02:8485;node-03:8485/hadoop-cluster</value> </property> <!--指定journalNode在本地磁盘存放数据的位置, 提前创建--> <property> <name>dfs.journalnode.edits.dir</name> <value>/opt/data/hadoop/journaldata</value> </property> <!--namenode失败自动切换--> <property> <name>dfs.ha.automatic-failover.enabled</name> <value>true</value> </property> <!--指定该集群出故障时,由哪个类负责执行故障切换--> <property> <name>dfs.client.failover.proxy.provider.hadoop-cluster</name> <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value> </property> <!--隔离机制方法--> <property> <name>dfs.ha.fencing.methods</name> <value>sshfence</value> </property> <!--免密登录--> <property> <name>dfs.ha.fencing.ssh.private-key-files</name> <value>/root/.ssh/id_rsa</value> </property> <!--隔离机制超时时间--> <property> <name>dfs.ha.fencing.ssh.connect-timeout</name> <value>30000</value> </property> <!--副本数--> <property> <name>dfs.replication</name> <value>3</value> </property> <!--http web访问--> <property> <name>dfs.webhdfs.enabled</name> <value>true</value> </property> <!--关闭权限--> <property> <name>dfs.permissions.enabled</name> <value>false</value> </property> <!--数据文件存放目录--> <property> <name>dfs.datanode.data.dir</name> <value>/opt/data/datanode</value> </property> <!-- 安全模式下,standby NameNode也可以变为active --> <property> <name>dfs.ha.nn.not-become-active-in-safemode</name> <value>false</value> </property> </configuration>
- yarn高可用,修改yarn-site.xml
<!-- 启用HA高可用性 --> <property> <name>yarn.resourcemanager.ha.enabled</name> <value>true</value> </property> <property> <name>yarn.resourcemanager.ha.automatic-failover.embedded</name> <value>true</value> <description>ha状态切换为自动切换</description> </property> <!-- 指定resourcemanager的名字 --> <property> <name>yarn.resourcemanager.cluster-id</name> <value>yarn-cluster</value> </property> <!-- 使用了2个resourcemanager,分别指定Resourcemanager的地址 --> <property> <name>yarn.resourcemanager.ha.rm-ids</name> <value>yarn-ha-rm1,yarn-ha-rm2</value> </property> <!-- 指定rm1的地址 --> <property> <name>yarn.resourcemanager.hostname.yarn-ha-rm1</name> <value>node-01</value> </property> <!-- 指定rm2的地址 --> <property> <name>yarn.resourcemanager.hostname.yarn-ha-rm2</name> <value>node-02</value> </property> <!-- 指定当前机器node-01作为yarn-ha-rm1 ,node-02的这一项设置为yarn-ha-rm2, node-03服务器删除该属性--> <property> <name>yarn.resourcemanager.ha.id</name> <value>yarn-ha-rm1</value> </property> <!--出现空指针问题时,需要配置这几个选项--> <property> <name>yarn.resourcemanager.webapp.address.yarn-ha-rm1</name> <value>node-01:8088</value> </property> <property> <name>yarn.resourcemanager.webapp.address.yarn-ha-rm2</name> <value>node-02:8088</value> </property> <property> <name>yarn.resourcemanager.scheduler.address.yarn-ha-rm1</name> <value>node-01:8030</value> </property> <property> <name>yarn.resourcemanager.scheduler.address.yarn-ha-rm2</name> <value>node-02:8030</value> </property> <!-- 指定zookeeper集群 --> <property> <name>yarn.resourcemanager.zk-address</name> <value>node-01:2181,node-02:2181,node-03:2181</value> </property> <!-- NodeManager上运行的附属服务,默认是mapreduce_shuffle --> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <!--加载类库--> <property> <name>yarn.application.classpath</name> <value>/opt/server/hadoop/etc/hadoop:/opt/server/hadoop/share/hadoop/common/lib/*:/opt/server/hadoop/share/hadoop/common/*:/opt/server/hadoop/share/hadoop/hdfs:/opt/server/hadoop/share/hadoop/hdfs/lib/*:/opt/server/hadoop/share/hadoop/hdfs/*:/opt/server/hadoop/share/hadoop/mapreduce/lib/*:/opt/server/hadoop/share/hadoop/mapreduce/*:/opt/server/hadoop/share/hadoop/yarn:/opt/server/hadoop/share/hadoop/yarn/lib/*:/opt/server/hadoop/share/hadoop/yarn/*</value> <!-- 执行hadoop classpath获得 --> </property>
- maprd.xml
<property> <name>mapreduce.framework.name</name> <value>yarn</value> </property>
-
启动journalnode
hdfs --daemon start journalnode
-
集群初始化
hdfs namenode -format
-
启动namenode
hdfs namenode start --daemon
-
Node-02上同步元数据
hdfs namenode -boostrapStandby
- 或者执行
scp -r /opt/data/hadoop/dfs root@node-02:/opt/data/hadoop
- 或者执行
-
格式化zkfc,在active服务器上面执行即可
hdfs zkfc -formatZK
-
启动节点服务
start-dfs.sh start-yarn.sh
-
如果切换失败,
yum install psmisc -y
服务器断电重启集群
-
启动zookeeper集群,
zkServer.sh start
-
启动journalnode,
hdfs --daemon start journalnode
-
启动hdfs, yarn
sh start-dfs.sh; sh start-yarn.sh