Docker本地搭建Hadoop高可用,Hbase,Spark,Flink,Zookeeper集群_基于docker容器,搭建hadoop+spark+hive+hbase+zookeeper sca

img
img

网上学习资料一大堆,但如果学到的知识不成体系,遇到问题时只是浅尝辄止,不再深入研究,那么很难做到真正的技术提升。

需要这份系统化资料的朋友,可以戳这里获取

一个人可以走的很快,但一群人才能走的更远!不论你是正从事IT行业的老鸟或是对IT行业感兴趣的新人,都欢迎加入我们的的圈子(技术交流、学习资源、职场吐槽、大厂内推、面试辅导),让我们一起学习成长!

在这里插入图片描述


Kafka集群搭建


#拉取Kafka镜像和kafka-manager镜像
docker pull wurstmeister/kafka:2.12-2.3.1
docker pull sheepkiller/kafka-manager


编辑docker-compose.yml文件
version: '2'

services:
  broker1:
    image: wurstmeister/kafka:2.12-2.3.1
    restart: always  # 出现错误时自动重启
    hostname: broker1# 节点主机
    container\_name: broker1 # 节点名称
    privileged: true  # 可以在容器里面使用一些权限
    ports:
      - "9091:9092"   # 将容器的9092端口映射到宿主机的9091端口上
    environment:
      KAFKA\_BROKER\_ID: 1
      KAFKA\_LISTENERS: PLAINTEXT://broker1:9092
      KAFKA\_ADVERTISED\_LISTENERS: PLAINTEXT://broker1:9092
      KAFKA\_ADVERTISED\_HOST\_NAME: broker1
      KAFKA\_ADVERTISED\_PORT: 9092
      KAFKA\_ZOOKEEPER\_CONNECT: zoo1:2181/kafka1,zoo2:2181/kafka1,zoo3:2181/kafka1
      JMX\_PORT: 9988   # 负责kafkaManager的端口JMX通信
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock
      - ./broker1:/kafka/kafka\-logs\-broker1
    external\_links:
    - zoo1
    - zoo2
    - zoo3
    networks:
      default:
        ipv4\_address: 172.25.0.14
  
  broker2:
    image: wurstmeister/kafka:2.12-2.3.1
    restart: always
    hostname: broker2
    container\_name: broker2
    privileged: true
    ports:
      - "9092:9092"
    environment:
      KAFKA\_BROKER\_ID: 2
      KAFKA\_LISTENERS: PLAINTEXT://broker2:9092
      KAFKA\_ADVERTISED\_LISTENERS: PLAINTEXT://broker2:9092
      KAFKA\_ADVERTISED\_HOST\_NAME: broker2
      KAFKA\_ADVERTISED\_PORT: 9092
      KAFKA\_ZOOKEEPER\_CONNECT: zoo1:2181/kafka1,zoo2:2181/kafka1,zoo3:2181/kafka1
      JMX\_PORT: 9988
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock
      - ./broker2:/kafka/kafka\-logs\-broker2
    external\_links:  # 连接本compose文件以外的container
    - zoo1
    - zoo2
    - zoo3
    networks:
      default:
        ipv4\_address: 172.25.0.15
broker3:
    image: wurstmeister/kafka:2.12-2.3.1
    restart: always
    hostname: broker3
    container\_name: broker3
    privileged: true
    ports:
      - "9093:9092"
    environment:
      KAFKA\_BROKER\_ID: 3
      KAFKA\_LISTENERS: PLAINTEXT://broker3:9092
      KAFKA\_ADVERTISED\_LISTENERS: PLAINTEXT://broker3:9092
      KAFKA\_ADVERTISED\_HOST\_NAME: broker3
      KAFKA\_ADVERTISED\_PORT: 9092
      KAFKA\_ZOOKEEPER\_CONNECT: zoo1:2181/kafka1,zoo2:2181/kafka1,zoo3:2181/kafka1
      JMX\_PORT: 9988
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock
      - ./broker3:/kafka/kafka\-logs\-broker3
    external\_links:  # 连接本compose文件以外的container
    - zoo1
    - zoo2
    - zoo3
    networks:
      default:
        ipv4\_address: 172.25.0.16
kafka-manager:
    image: sheepkiller/kafka-manager:latest
    restart: always
    container\_name: kafka-manager
    hostname: kafka-manager
    ports:
      - "9000:9000"
    links:            # 连接本compose文件创建的container
      - broker1
      - broker2
      - broker3
    external\_links:   # 连接本compose文件以外的container
      - zoo1
      - zoo2
      - zoo3
    environment:
      ZK\_HOSTS: zoo1:2181/kafka1,zoo2:2181/kafka1,zoo3:2181/kafka1
      KAFKA\_BROKERS: broker1:9092,broker2:9092,broker3:9092
      APPLICATION\_SECRET: letmein
      KM\_ARGS: -Djava.net.preferIPv4Stack=true
    networks:
      default:
        ipv4\_address: 172.25.0.10

networks:
  default:
    external:   # 使用已创建的网络
      name: bigdata

#运行命令
docker-compose up -d


在这里插入图片描述
**看看本地端口9000也确实起来了
在这里插入图片描述
在这里插入图片描述


Hadoop高可用集群搭建


docker-compose创建集群
version: '2'

services:
  master:
    image: hadoop:latest
    restart: always  # 出现错误时自动重启
    hostname: master# 节点主机
    container\_name: master # 节点名称
    privileged: true  # 可以在容器里面使用一些权限
    networks:
      default:
        ipv4\_address: 172.25.0.3

  master\_standby:
    image: hadoop:latest
    restart: always
    hostname: master_standby
    container\_name: master_standby
    privileged: true
    networks:
      default:
        ipv4\_address: 172.25.0.4

  slave01:
    image: hadoop:latest
    restart: always
    hostname: slave01
    container\_name: slave01
    privileged: true
    networks:
      default:
        ipv4\_address: 172.25.0.5

  slave02:
    image: hadoop:latest
    restart: always
    container\_name: slave02
    hostname: slave02
    networks:
      default:
        ipv4\_address: 172.25.0.6
  slave03:
    image: hadoop:latest
    restart: always
    container\_name: slave03
    hostname: slave03
    networks:
      default:
        ipv4\_address: 172.25.0.7

命令行方式创建
#创建一个master节点
docker run -tid --name master --privileged=true hadoop:latest /usr/sbin/init
#创建热备master\_standby节点
docker run -tid --name master_standby --privileged=true hadoop:latest /usr/sbin/init
#创建三个slave
docker run -tid --name slave01 --privileged=true hadoop:latest /usr/sbin/init
docker run -tid --name slave02 --privileged=true hadoop:latest /usr/sbin/init
docker run -tid --name slave03 --privileged=true hadoop:latest /usr/sbin/init

给每台节点配置免密码登陆
ssh-keygen -t rsa
#然后不断会车,最终如下图所示
#每台机器都是如此

在这里插入图片描述

将各自的公钥传到每台机器authorized_keys里面

这里有个小问题:先检查安装了passwd没有,如果没有执行以下命令:

yum install passwd
#然后设置密码
passwd

在这里插入图片描述

#一台机器的公钥都要弄到自己和其他机器的authorized\_keys
#以免以后安装其他东西减少不必要的麻烦
cat id_rsa.pub >> .ssh/authorized_keys

编辑/etc/hosts

注意:这里的master_standby可能不允许带下划线,有的机器在hdfs格式化的时候会不合法,所以你配置最后不要带特殊字符
在这里插入图片描述

#将/etc/hosts复制到每台节点
scp /etc/hosts master_standby:/etc/
scp /etc/hosts slave01:/etc/
scp /etc/hosts slave02:/etc/
scp /etc/hosts slave03:/etc/

配置Hadoop
#解压hadoop包
tar -zxvf hadoop-2.8.5.tar.gz


配置环境变量
#配置环境变量
vim ~/.bashrc
#添加以下内容
export HADOOP\_HOME=/usr/local/hadoop-2.8.5
export CLASSPATH=.:$HADOOP\_HOME/lib:$CLASSPATH
export PATH=$PATH:$HADOOP\_HOME/bin
export PATH=$PATH:$HADOOP\_HOME/sbin
export HADOOP\_MAPRED\_HOME=$HADOOP\_HOME
export HADOOP\_COMMON\_HOME=$HADOOP\_HOME
export HADOOP\_HDFS\_HOME=$HADOOP\_HOME
export YARN\_HOME=$HADOOP\_HOME
export HADOOP\_ROOT\_LOGGER=INFO,console
export HADOOP\_COMMON\_LIB\_NATIVE\_DIR=$HADOOP\_HOME/lib/native
export HADOOP\_OPTS="-Djava.library.path=$HADOOP\_HOME/lib"
#将这个文件拷到其他机器的下面
scp ~/.bashrc 机器名字:~/
#hadoop命令验证一下

在这里插入图片描述

配置文件

hdfs-site.xml

<configuration>

<!-- same with core-site.xml:defaultFS-->
<property>
        <name>dfs.nameservices</name>
        <value>mycluster</value>
</property>
<!-- two NameNode,nn1 and nn2-->
<property>
        <name>dfs.ha.namenodes.mycluster</name>
        <value>nn1,nn2</value>
</property>

<!-- mycluster.nn1 Namenode's RPC Address-->
<property>
<name>dfs.namenode.rpc-address.mycluster.nn1</name>
<value>master:9000</value>
</property>
<!-- mycluster.nn1 Namenode's Http Address-->
<property>
<name>dfs.namenode.http-address.mycluster.nn1</name>
<value>master:50070</value>
</property>
<!-- mycluster.nn2 Namenode's RPC Address-->
<property>
<name>dfs.namenode.rpc-address.mycluster.nn2</name>
<value>master_standby:9000</value>
</property>
<!-- mycluster.nn2 Namenode's Http Address-->
<property>
<name>dfs.namenode.http-address.mycluster.nn2</name>
<value>master_standby:50070</value>
</property>

<!-- where the NameNode's metadata store in JournalNodes -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://slave01:8485;slave02:8485;slave03:8485/mycluster</value>
</property>

<!-- where Journaldata store in its disk-->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/usr/local/hadoop-2.8.5/journaldata</value>
</property>

<!-- open automatic-failover when fail-->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>

<!-- the way when fail  -->
<property>
<name>dfs.client.failover.proxy.provider.mycluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!-- set the methods which disdancy-->
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<!-- -->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>

<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>30000</value>
</property>
</configuration>

core-site.xml

<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://mycluster</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/usr/local/hadoop-2.8.5/tmp</value>
</property>

<property>
<name>ha.zookeeper.quorum</name>
<value>zoo1:2181,zoo2:2181,zoo3:2181</value>
</property>
</configuration>

yarn-site.xml

<configuration>

<!-- Site specific YARN configuration properties -->
 <!-- open the yarn HA-->
 <property>
 <name>yarn.resourcemanager.ha.enabled</name>
 <value>true</value>
 </property>
<!-- RM's cluster id-->
 <property>
 <name>yarn.resourcemanager.cluster-id</name>
 <value>mycluster</value>
 </property>

    <!-- Rm's name-->
 <property>
 <name>yarn.resourcemanager.ha.rm-ids</name>
 <value>rm1,rm2</value>
 </property>

    <!-- RM1's address-->
 <property>
 <name>yarn.resourcemanager.hostname.rm1</name>
 <value>master</value>
 </property>
 <!-- RM2's address-->
 <property>
 <name>yarn.resourcemanager.hostname.rm2</name>
 <value>master\_standby</value>
 </property>

    <!-- zookeeper cluster address-->
 <property>
 <name>yarn.resourcemanager.zk-address</name>
 <value>zoo1:2181,zoo2:2181,zoo3:2181</value>
 </property>

    <!-- mapreduce-->
 <property>
 <name>yarn.nodemanager.aux-services</name>
 <value>mapreduce\_shuffle</value>
 </property>

    <property>
        <name>yarn.log-aggregation-enable</name>
        <value>true</value>
    </property>
<!-- open RM restart-->

    <property>
            <name>yarn.resourcemanager.webapp.address.rm1</name>
            <value>master:8001</value>
    </property>
    <property>
            <name>yarn.resourcemanager.webapp.address.rm2</name>
            <value>master_standby:8001</value>
    </property>
    <!-- RM1 -->
 <property>
 <name>yarn.resourcemanager.scheduler.address.rm1</name>
 <value>master:8030</value>
 </property>
 <property>
 <name>yarn.resourcemanager.resource-tracker.address.rm1</name>
 <value>master:8031</value>
 </property>
 <property>
 <name>yarn.resourcemanager.address.rm1</name>
 <value>master:8032</value>
 </property>
 <property>
 <name>yarn.resourcemanager.admin.address.rm1</name>
 <value>master:8033</value>
 </property>
 <!-- RM2 -->
 <property>
 <name>yarn.resourcemanager.scheduler.address.rm2</name>
 <value>master\_standby:8030</value>
 </property>
 <property>
 <name>yarn.resourcemanager.resource-tracker.address.rm2</name>
 <value>master\_standby:8031</value>
 </property>
 <property>
 <name>yarn.resourcemanager.address.rm2</name>
 <value>master\_standby:8032</value>
 </property>
 <property>
 <name>yarn.resourcemanager.admin.address.rm2</name>
 <value>master\_standby:8033</value>
 </property>
 <!-- YARN ?~[~F群?~Z~D?~A~Z?~P~H?~W??~W?~\~@?~U??~]?~U~Y?~W??~U? -->
 <property>


![img](https://img-blog.csdnimg.cn/img_convert/8bf4a80f590d0422470c9e20b5b0adf7.png)
![img](https://img-blog.csdnimg.cn/img_convert/318dbf0ba48d274a8f5dec076dac0694.png)

**网上学习资料一大堆,但如果学到的知识不成体系,遇到问题时只是浅尝辄止,不再深入研究,那么很难做到真正的技术提升。**

**[需要这份系统化资料的朋友,可以戳这里获取](https://bbs.youkuaiyun.com/forums/4f45ff00ff254613a03fab5e56a57acb)**


**一个人可以走的很快,但一群人才能走的更远!不论你是正从事IT行业的老鸟或是对IT行业感兴趣的新人,都欢迎加入我们的的圈子(技术交流、学习资源、职场吐槽、大厂内推、面试辅导),让我们一起学习成长!**

 </property>
 <property>
 <name>yarn.resourcemanager.admin.address.rm2</name>
 <value>master\_standby:8033</value>
 </property>
 <!-- YARN ?~[~F群?~Z~D?~A~Z?~P~H?~W??~W?~\~@?~U??~]?~U~Y?~W??~U? -->
 <property>


[外链图片转存中...(img-NzEiyXiS-1715322003877)]
[外链图片转存中...(img-N4RM3Iwi-1715322003878)]

**网上学习资料一大堆,但如果学到的知识不成体系,遇到问题时只是浅尝辄止,不再深入研究,那么很难做到真正的技术提升。**

**[需要这份系统化资料的朋友,可以戳这里获取](https://bbs.youkuaiyun.com/forums/4f45ff00ff254613a03fab5e56a57acb)**


**一个人可以走的很快,但一群人才能走的更远!不论你是正从事IT行业的老鸟或是对IT行业感兴趣的新人,都欢迎加入我们的的圈子(技术交流、学习资源、职场吐槽、大厂内推、面试辅导),让我们一起学习成长!**

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值