1.机器环境
1.1配置:
#centos 系统 内存:31G CPU:24 存储: 2T#三台机器:
A:10.90.3.81(H-namenode)
B:10.90.3.82(H-datanode)
C:10.90.3.83(H-datanode)
#创建账号:
useradd hadoop -g hadoop
#目录结构:
/data/cloud/(hadoop:hadoop)
hadoop -> /data/cloud/hadoop-2.6.0-cdh5.4.8
hadoop-2.6.0-cdh5.4.8
hive -> /data/cloud/hive-1.1.0-cdh5.4.8
hive-1.1.0-cdh5.4.8
spark -> /data/cloud/spark-1.3.0-cdh5.4.8-bin
spark-1.3.0-cdh5.4.8-bin
zookeeper -> /data/cloud/zookeeper-3.4.5-cdh5.4.8
zookeeper-3.4.5-cdh5.4.8
#SSH免登录:
A -> B & C
1.2软件:
#yum install -y snappysnappy-devel
autoconf
automake
libtool
git
gcc
gcc-c++
make
cmake
openssl-devel
ncurses-devel
bzip2-devel
rpm -aq|grep [snappy|autoconf|make|libtool|git|gcc|openssl|ncurses|bzip2]
#jdk7(/usr/local)
http://download.oracle.com/otn-pub/java/jdk/7u79-b15/jdk-7u79-linux-x64.tar.gz
java -version
#maven:(/usr/local/maven)
http://ftp.cuhk.edu.hk/pub/packages/apache.org/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz
mvn -version
#ant(/usr/local/ant)
http://apache.01link.hk//ant/binaries/apache-ant-1.9.6-bin.tar.gz
ant -version
#protobuf(/usr/local/protobuf)
https://code.google.com/p/protobuf/downloads/list
protoc --version
参考:http://blog.youkuaiyun.com/huguoping830623/article/details/45482725
#snappy(/usr/local/snappy)
https://github.com/google/snappy/releases/download/1.1.3/snappy-1.1.3.tar.gz
参考:http://google.github.io/snappy/
2.Hadoop安装
2.1:hadoop编译
#hadoop(/data/src/hadoop-2.6.0-cdh5.4.8)
http://archive.cloudera.com/cdh5/cdh/5/hadoop-2.6.0-cdh5.4.8-src.tar.gz
mvn clean package -DskipTests -Pdist,native -Dtar -Dsnappy.lib=/usr/local/snappy/lib/ -Dbundle.snappy
/data/src/hadoop-2.6.0-cdh5.4.8/src/hadoop-dist/target/hadoop-2.6.0-cdh5.4.8.tar.gz
cp /data/src/hadoop-2.6.0-cdh5.4.8/hadoop-2.6.0-cdh5.4.8.tar.gz /data/cloud/
cd /data/cloud/
tar xvfz hadoop-2.6.0-cdh5.4.8.tar.gz
ln -s /data/cloud/hadoop-2.6.0-cdh5.4.8 /data/cloud/hadoop
chown -R hadoop:hadoop /data/cloud/
#hadoop-snappy(/data/src/hadoop-snappy)
https://github.com/electrum/hadoop-snappy
mvn package -Dsnappy.prefix=/usr/local/snappy
tar xvfz hadoop-snappy-0.0.1-SNAPSHOT.tar.gz
cp -r ./hadoop-snappy-0.0.1-SNAPSHOT/lib/* $HADOOP_HOME/lib
ll hadoop-snappy-master/target/hadoop-snappy-0.0.1-SNAPSHOT-tar/hadoop-snappy-0.0.1-SNAPSHOT/lib/
hadoop-snappy-0.0.1-SNAPSHOT.jar
#/data/cloud/hadoop/bin/hadoop checknative -a(验证so文件)
ll /data/cloud/hadoop/lib/native/
ll /data/cloud/hadoop/lib/native/Linux-amd64-64/
2.2环境配置(/etc/profile.d/ice.sh)
#javaexport JAVA_HOME=/usr/local/java
export CLASSPATH=$JAVA_HOME/lib:$JAVA_HOME/jre/lib:.
#protobuf
export PROTOBUF_HOME=/usr/local/protobuf
#hadoop
export HADOOP_HOME=/data/cloud/hadoop
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/native"
#yarn
export YARN_HOME=$HADOOP_HOME
export YARN_CONF_DIR=$HADOOP_HOME/etc/hadoop
#spark
export SPARK_HOME=/data/cloud/spark
export SPARK_JAR=$SPARK_HOME/spark-assembly-1.3.0-cdh5.4.8-hadoop2.6.0-cdh5.4.8.jar
#scala
export SCALA_HOME=/usr/local/scala
export PATH=$JAVA_HOME/bin:$PROTOBUF_HOME/bin:$SCALA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$SPARK_HOME/bin:$SPARK_HOME/sbin:$PATH
2.3 系统配置:
#core-site.xml
<configuration><property>
<name>fs.default.name</name>
<value>hdfs://carcloud81:9000</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>file:/data/cloud/hadoop/tmp</value>
</property>
</configuration>
#hdfs-site.xml
<configuration><property>
<name>dfs.namenode.name.dir</name>
<value>file:/data/cloud/hadoop/dfs/nnode</value>
</property>
<property>
<name>dfs.namenode.data.dir</name>
<value>file:/data/cloud/hadoop/dfs/dnode</value>
</property>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
</configuration>
#mapred-site.xml
<configuration><property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobtracker.address</name>
<value>carcloud81:8021</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>carcloud81:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>carcloud81:19888</value>
</property>
<property>
<name>mapred.max.maps.per.node</name>
<value>4</value>
</property>
<property>
<name>mapred.max.reduces.per.node</name>
<value>2</value>
</property>
<property>
<name>mapreduce.map.memory.mb</name>
<value>1408</value>
</property>
<property>
<name>mapreduce.map.java.opts</name>
<value>-Xmx1126M</value>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>2816</value>
</property>
<property>
<name>mapreduce.reduce.java.opts</name>
<value>-Xmx2252M</value>
</property>
<property>
<name>mapreduce.task.io.sort.mb</name>
<value>512</value>
</property>
<property>
<name>mapreduce.task.io.sort.factor</name>
<value>100</value>
</property>
</configuration>
#master
carcloud81#slaves
carcloud82carcloud83
#yarn-site.xml
<configuration><!-- Site specific YARN configuration properties -->
<property>
<name>yarn.resourcemanager.address</name>
<value>carcloud81:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>carcloud81:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>carcloud81:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>carcloud81:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>carcloud81:8088</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<description>Classpath for typical applications.</description>
<name>yarn.application.classpath</name>
<value>$HADOOP_CONF_DIR
,$HADOOP_COMMON_HOME/share/hadoop/common/*
,$HADOOP_COMMON_HOME/share/hadoop/common/lib/*
,$HADOOP_HDFS_HOME/share/hadoop/hdfs/*
,$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*
,$YARN_HOME/share/hadoop/yarn/*</value>
</property>
<!-- Configurations for NodeManager -->
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>5632</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>1408</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>5632</value>
</property>
</configuration>
2.4 启动与关闭
#将82,83的软件环境和hadoop包,进行同步安装即可。
#在81上初始化服务:
>hdfs namenode -format
#在81上启动服务:
>start-dfs.sh
>start-yarn.sh
#在81上JPS,进程list:
SecondaryNameNode
NameNode
ResourceManager
Master
Worker
#在82,83上JPS,进程list:
DataNode
NodeManager
#在81上关闭服务:
>stop-yarn.sh
>stop-dfs.sh