大数据软件安装教程
centos7.7
1.虚拟机安装
1.1开机配置ip
vi /etc/sysconfig/network-scripts/ifcfg-ens33
BOOTPROTO="static" # 使用静态IP地址,默认为dhcp
IPADDR="192.168.23.101" # 设置的静态IP地址
NETMASK="255.255.255.0" # 子网掩码
GATEWAY="192.168.23.2" # 网关地址
DNS1=8.8.8.8 # DNS服务器
DNS2=8.8.4.4
DNS3=114.114.114.114
ONBOOT="yes" #是否开机启用
1.2重启network服务
systemctl restart network
1.3修改主机名
hostnamectl set-hostname cdh01
1.4查看本机ip、ping、重启虚拟机
ip addr
ping外网、ping主机ip
ping www.baidu.com
ping 10.31.156.48 #主机ip地址!
reboot
1.5下载插件
yum -y install ifconfig
yum search ifconfig
#直接安装
yum -y install net-tools.x86_64
yum -y install vim
1.6安装ntp客户端和服务器
yum -y install ntpdate
yum -y install ntp
#ntpdate同步时间:
ntpdate -u time.windows.com
systemctl start ntpd
systemctl enable ntpd
1.8关闭虚拟机防火墙+配置hosts文件
关闭:systemctl stop firewalld
禁止开机自启:systemctl disable firewalld
开启:systemctl enable firewalld
查看状态:systemctl status firewalld
#关闭NetworkManager
常用管理命令:
systemctl status NetworkManager
systemctl start NetworkManager
systemctl stop NetworkManager
systemctl disable NetworkManager
systemctl enable NetworkManager
vi /etc/hosts
192.168.10.101 cdh01
192.168.10.102 cdh02
192.168.10.103 cdh03
1.9克隆并修改IP+主机名
2.安装Hadoop、JDK
伪分布式安装
1.解压安装jdk+hadoop+环境变量
2.配置免密登录
ssh-ketgen -t rsa
ssh-copy-id root@localhost
ssh localhost
3.配置文件
3.1hadoop-env.sh
export JAVA_HOME=/opt/apps/jdk1.8.0_261
3.2core-site.xml
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/apps/hadoop-2.8.1//hdpdata</value>
</property>
3.3hdfs-site.xml
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/opt/apps/hadoop-2.8.1/hdpdata/hdfs/name</value>
</property>
<property>
<name>dfs.namenode.data.dir</name>
<value>/opt/apps/hadoop-2.8.1/hdpdata/hdfs/data</value>
</property>
3.4mapred-site.xml
cp mapred-site.xml.template mapred-site.xml
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
3.5yarn-site.xml
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
3.6创建目录
mkdir -p /opt/apps/hadoop-2.8.1/hdpdata/hdfs/name
mkdir -p /opt/apps/hadoop-2.8.1/hdpdata/hdfs/data
3.7格式化
hdfs namenode -format
全分布式安装
2.1hadoop、jdk解压安装
tar -zxvf hadoop-2.6.0-cdh5.7.6.tar.gz -C /opt/apps
tar -zxvf jdk-8u45-linux-x64.tar.gz -C /opt/apps
配置环境变量
vi /etc/profile
source /etc/profile
查看版本
java -version
2.2配置hadoop-env.sh
vi /opt/apps/hadoop-2.6.0-cdh5.7.6/etc/hadoop/hadoop-env.sh
主要修改下面这个:
# The java implementation to use.
export JAVA_HOME=/opt/apps/jdk1.8.0_45
hadoop-env.sh
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Set Hadoop-specific environment variables here.
# The only required environment variable is JAVA_HOME. All others are
# optional. When running a distributed configuration it is best to
# set JAVA_HOME in this file, so that it is correctly defined on
# remote nodes.
# The java implementation to use.
export JAVA_HOME=/opt/apps/jdk1.8.0_45
# The jsvc implementation to use. Jsvc is required to run secure datanodes
# that bind to privileged ports to provide authentication of data transfer
# protocol. Jsvc is not required if SASL is configured for authentication of
# data transfer protocol using non-privileged ports.
#export JSVC_HOME=${JSVC_HOME}
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"}
# Extra Java CLASSPATH elements. Automatically insert capacity-scheduler.
for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do
if [ "$HADOOP_CLASSPATH" ]; then
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
else
export HADOOP_CLASSPATH=$f
fi
done
# The maximum amount of heap to use, in MB. Default is 1000.
#export HADOOP_HEAPSIZE=
#export HADOOP_NAMENODE_INIT_HEAPSIZE=""
# Extra Java runtime options. Empty by default.
export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
# Command specific options appended to HADOOP_OPTS when specified
export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_NAMENODE_OPTS"
export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS"
export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS"
export HADOOP_NFS3_OPTS="$HADOOP_NFS3_OPTS"
export HADOOP_PORTMAP_OPTS="-Xmx512m $HADOOP_PORTMAP_OPTS"
# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS"
#HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS"
# On secure datanodes, user to run the datanode as after dropping privileges.
# This **MUST** be uncommented to enable secure HDFS if using privileged ports
# to provide authentication of data transfer protocol. This **MUST NOT** be
# defined if SASL is configured for authentication of data transfer protocol
# using non-privileged ports.
export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER}
# Where log files are stored. $HADOOP_HOME/logs by default.
#export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER
# Where log files are stored in the secure data environment.
export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}
###
# HDFS Mover specific parameters
# The directory where pid files are stored. /tmp by default.
# NOTE: this should be set to a directory that can only be written to by
# the user that will run the hadoop daemons. Otherwise there is the
# potential for a symlink attack.
export HADOOP_PID_DIR=/opt/apps/hadoop-2.6.0-cdh5.7.6/hdpdata
export HADOOP_SECURE_DN_PID_DIR=/opt/apps/hadoop-2.6.0-cdh5.7.6/hdpdata
# A string representing this instance of hadoop. $USER by default.
export HADOOP_IDENT_STRING=$USER
2.3配置hdfs-site.xml
vi /opt/apps/hadoop-2.6.0-cdh5.7.6/etc/hadoop/hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/opt/apps/hadoop-2.6.0-cdh5.7.6/hdpdata/hdfs/name</value>
</property>
<property>
<name>dfs.namenode.data.dir</name>
<value>/opt/apps/hadoop-2.6.0-cdh5.7.6/hdpdata/hdfs/data</value>
</property>
<property>
<name>dfs.namenode.checkpoint.dir</name>
<value>/opt/apps/hadoop-2.6.0-cdh5.7.6/hdpdata/hdfs/sname</value>
</property>
<property>
<name>dfs.namenode.checkpoint.edits.dir</name>
<value>/opt/apps/hadoop-2.6.0-cdh5.7.6/hdpdata/hdfs/sname</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
<!-- WebHDFS (REST API) in Namenodes and Datanodes. -->
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
</configuration>
2.4配置core-site.xml
vi /opt/apps/hadoop-2.6.0-cdh5.7.6/etc/hadoop/core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://cdh01:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/apps/hadoop-2.6.0-cdh5.7.6/hdpdata</value>
</property>
<property>
<name>hadoop.proxyuser.hue.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hue.groups</name>
<value>*</value>
</property>
</configuration>
2.5配置mapred-site.xml
mv mapred-site.xml.template mapred-site.xml
vi /opt/apps/hadoop-2.6.0-cdh5.7.6/etc/hadoop/mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>cdh01:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>cdh01:19888</value>
</property>
</configuration>
2.6配置yarn-site.xml
vi /opt/apps/hadoop-2.6.0-cdh5.7.6/etc/hadoop/yarn-site.xml
<configuration>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>cdh01</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>cdh01:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>cdh01:8031</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>cdh01:8032</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>cdh01:8088</value>
</property>
</configuration>
2.7配置slaves
vi slaves
cdh01
cdh02
cdh03
2.8克隆虚拟机修改ip+主机名,并免密登录
三台机器分别免密
ssh-keygen -t rsa
ssh-copy-id cdh01
ssh-copy-id cdh02
ssh-copy-id cdh03
ssh cdh01
ssh 0.0.0.0
2.9 格式化namenode
hdfs namenode -format
3.安装Hive
3.1解压安装
tar -zxvf hive-1.1.0-cdh5.7.6.tar.gz -C /opt/apps
配置环境变量
vi /etc/profile
...
source /etc/profile