Docker 安装与配置
安装 Docker(摘自 Docker 官方文档)
设置 Docker Repository
- 更新软件索引
$ sudo apt-get update
- 安装一些必要的包
$ sudo apt-get install \
apt-transport-https \
ca-certificates \
curl \
gnupg-agent \
software-properties-common
- 添加 Docker 官方 GPG Key
$ curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
- 设置 Stable Repository
$ sudo add-apt-repository \
"deb [arch=amd64] https://download.docker.com/linux/ubuntu \
$(lsb_release -cs) \
stable"
安装 Docker Engine - community
- 更新源
$ sudo apt-get update
- 安装最新版 Docker Engine
$ sudo apt-get install docker-ce docker-ce-cli containerd.io
- 通过运行 hello-world 程序确定 Docker 成功安装
$ sudo docker run hello-world
使用 Docker
为 Hadoop 集群构建一个虚拟的网络
$ sudo docker network create --driver=bridge hadoop
拉取 Ubuntu 镜像
$ sudo docker pull ubuntu
利用镜像创建容器
$ sudo docker run -it --name ubuntu-hadoop ubuntu
修改 apt 源为阿里镜像
- 进入配置文件目录
$ cd /etc/apt
- 备份配置文件
$ cp sources.list sources.list.bak
- 将配置内容 echo 进文件
$ echo "deb http://mirrors.aliyun.com/ubuntu/ xenial main restricted universe multiverse
deb http://mirrors.aliyun.com/ubuntu/ xenial-security main restricted universe multiverse
deb http://mirrors.aliyun.com/ubuntu/ xenial-updates main restricted universe multiverse
deb http://mirrors.aliyun.com/ubuntu/ xenial-backports main restricted universe multiverse
deb-src http://mirrors.aliyun.com/ubuntu/ xenial main restricted universe multiverse
deb-src http://mirrors.aliyun.com/ubuntu/ xenial-security main restricted universe multiverse
deb-src http://mirrors.aliyun.com/ubuntu/ xenial-updates main restricted universe multiverse
deb-src http://mirrors.aliyun.com/ubuntu/ xenial-backports main restricted universe multiverse" > /etc/apt/sources.list
- 更新源
$ apt-get update
安装 Vim
$ apt install vim
安装 net-tools
$ apt install net-tools
安装 Java
$ apt install openjdk-8-jdk
安装 SSH
$ apt install openssh-server
安装 SSH 客户端
$ apt install openssh-client
配置 SSH
- 进入根目录
$ cd ~
- 生成密钥(不用输入直接回车)
$ ssh-keygen -t rsa -P ""
- 将公钥追加到 authorized_keys 文件中
$ cat .ssh/id_rsa.pub >> .ssh/authorized_keys
- 启动 SSH 服务
$ service ssh start
- 免密登陆自己
$ ssh 127.0.0.1
- 编辑.bashrc
$ vim ~/.bashrc
在文档的最后一行添加service ssh start
安装 Hadoop
- 下载安装文件
$ wget http://mirrors.hust.edu.cn/apache/hadoop/common/hadoop-3.2.1/hadoop-3.2.1.tar.gz
- 解压到 /usr/local 目录下面并重命名文件
$ tar -zxvf hadoop-3.2.1.tar.gz -C /usr/local/
$ cd /usr/local/
$ mv hadoop-3.2.1 hadoop
- 修改 /etc/profile 文件,添加以下环境变量到文件中
$ vim /etc/profile
#添加内容如下:
#java
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
export JRE_HOME=${JAVA_HOME}/jre
export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib
export PATH=${JAVA_HOME}/bin:$PATH
#hadoop
export HADOOP_HOME=/usr/local/hadoop
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_YARN_HOME=$HADOOP_HOME
export HADOOP_INSTALL=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_CONF_DIR=$HADOOP_HOME
export HADOOP_LIBEXEC_DIR=$HADOOP_HOME/libexec
export JAVA_LIBRARY_PATH=$HADOOP_HOME/lib/native:$JAVA_LIBRARY_PATH
export HADOOP_CONF_DIR=$HADOOP_PREFIX/etc/hadoop
export HDFS_DATANODE_USER=root
export HDFS_DATANODE_SECURE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export HDFS_NAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
#使配置文件的修改生效
$ source /etc/profile
配置 Hadoop
- 设置当前工作目录
cd /usr/local/hadoop/etc/hadoop
- 修改 hadoop-env.sh 文件,在文件末尾添加下信息
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
- 修改 core-site.xml
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://master:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/hadoop3/hadoop/tmp</value>
</property>
</configuration>
- 修改 hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/home/hadoop3/hadoop/hdfs/name</value>
</property>
<property>
<name>dfs.namenode.data.dir</name>
<value>/home/hadoop3/hadoop/hdfs/data</value>
</property>
</configuration>
- 修改 mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.application.classpath</name>
<value>
/usr/local/hadoop/etc/hadoop,
/usr/local/hadoop/share/hadoop/common/*,
/usr/local/hadoop/share/hadoop/common/lib/*,
/usr/local/hadoop/share/hadoop/hdfs/*,
/usr/local/hadoop/share/hadoop/hdfs/lib/*,
/usr/local/hadoop/share/hadoop/mapreduce/*,
/usr/local/hadoop/share/hadoop/mapreduce/lib/*,
/usr/local/hadoop/share/hadoop/yarn/*,
/usr/local/hadoop/share/hadoop/yarn/lib/*
</value>
</property>
</configuration>
- 修改 yarn-site.xml
<configuration>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>master</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
- 修改 worker
master
slave1
slave2
在 Docker 中启动集群
- 将当前容器导出为镜像
#用 docker ps -a 查询自己的 container ID
$ docker commit -a "damonchunglm" -m "my hadoop" b9336744997d myhadoop
- 利用导出的镜像创建 master 节点
$ sudo docker run -itd --network hadoop -h "master" --name "master" -p 9870:9870 -p 8088:8088 myhadoop /bin/bash
- 分别创建 slave1 和 slave2 节点
$ sudo docker run -itd --network hadoop -h "slave1" --name "slave1" myhadoop /bin/bash
$ sudo docker run -itd --network hadoop -h "slave2" --name "slave2" myhadoop /bin/bash
- 进入 master 节点 初始化 hdfs
$ sudo docker attach master
$ cd /usr/local/hadoop/bin
$ ./hadoop namenode -format
- 启动服务
$ cd ../sbin
$ ./start-all.sh