1 root用户,建立haoop用户
useradd -g root hadoop
2 编译hadoop(64位hadoop需要自己编译)
2.1 下载jdk、maven、hadoop-src
2.2 配置环境变量
sudo vim /etc/profile
JAVA_HOME=/usr/local/jdk1.7.0_25
JRE_HOME=$JAVA_HOME/jre
CLASSPATH=.:$JAVA_HOME/lib:$JRE_HOME/lib:$CLASSPATH
MAVEN_HOME=/usr/local/apache-maven-3.2.2
PATH=$JAVA_HOME/bin:$JRE_HOME/bin:$MAVEN_HOME/bin:$PATH
export JAVA_HOME JRE_HOMECLASSPATH MAVEN_HOME PATH
2.3 maven配置
<?xmlversion="1.0" encoding="utf-8"?>
<settingsxmlns="http://maven.apache.org/SETTINGS/1.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/SETTINGS/1.0.0http://maven.apache.org/xsd/settings-1.0.0.xsd">
<pluginGroups></pluginGroups>
<proxies></proxies>
<servers></servers>
<mirrors>
<mirror>
<id>nexus-osc</id>
<mirrorOf>*</mirrorOf>
<name>Nexusosc</name>
<url>http://maven.oschina.net/content/groups/public/</url>
</mirror>
</mirrors>
<profiles>
<profile>
<id>jdk-1.7</id>
<activation>
<jdk>1.7</jdk>
</activation>
<repositories>
<repository>
<id>nexus</id>
<name>local privatenexus</name>
<url>http://maven.oschina.net/content/groups/public/</url>
<releases>
<enabled>true</enabled>
</releases>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
</repositories>
<pluginRepositories>
<pluginRepository>
<id>nexus</id>
<name>local privatenexus</name>
<url>http://maven.oschina.net/content/groups/public/</url>
<releases>
<enabled>true</enabled>
</releases>
<snapshots>
<enabled>false</enabled>
</snapshots>
</pluginRepository>
</pluginRepositories>
</profile>
</profiles>
</settings>
2.4 安装依赖
yum install gcc
yum intall gcc-c++
yum install make
https://protobuf.googlecode.com/files/protobuf-2.5.0.tar.gz
tar -xvf protobuf-2.5.0.tar.bz2
cd protobuf-2.5.0
./configure --prefix=/usr/local/protoc/
make && make install
将/usr/local/protoc/bin 加入 path。
yum install cmake
yum install openssl-devel
yum install ncurses-devel
2.5 编译
cd hadoop-2.2.0-src
mvn package -Pdist,native -DskipTests -Dtar
编译后的路径在:hadoop-2.2.0-src/hadoop-dist/target/hadoop-2.2.0
3 root用户,关闭防火墙
chkconfig iptables off #重启后永久性生效
service iptables stop #即时生效,重启后失效
service iptables status #查看防火墙状态
4 root用户,hosts配置
192.168.101.128 master
192.168.101.129 slave1
192.168.101.130 slave2
192.168.101.131 slave3
ifconfig eth2192.168.101.130 netmask 255.255.255.0 #设置ip
5 hadoop用户,ssh免登陆
ssh-keygen -t rsa
cd .ssh
cat id_rsa.pub >authorized_keys
#如果autorized_keys是新创建的,要更改权限为600:
chmod 600 authorized_keys
#如果其他机器是克隆机,就不必执行下面的拷贝:
scp .ssh/authorized_keys hadoop@192.168.101.129:/home/hadoop/.ssh/
scp .ssh/authorized_keys hadoop@192.168.101.130:/home/hadoop/.ssh/
scp .ssh/authorized_keys hadoop@192.168.101.131:/home/hadoop/.ssh/
6 hadoop配置
6.1 创建三个目录,用来放hadoop文件、日志、临时文件
mkdir -p ~/dfs/name
mkdir -p ~/dfs/data
mkdir -p ~/temp
6.2 hadoop-env.sh和yarn-env.sh修改JAVA_HOME
6.3 core-site.xml
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>file:/home/hadoop/temp</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>4096</value>
</property>
<property>
<name>hadoop.http.staticuser.user</name>
<value>hadoop</value>
</property>
6.4 hdfs-site.xml
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>master:9001</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/home/hadoop/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/home/hadoop/dfs/data</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
6.5 mapred-site.xml
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>master:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>master:19888</value>
</property>
6.6 yarn-site.xml
<property>
<name>yarn.resourcemanager.address</name>
<value>master:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>master:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>master:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>master:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>master:8088</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>8192</value>
</property>
6.7 slaves
slave1
slave2
slave3
7 启动hadoop和yarn
bin/hdfs namenode -format
sbin/start-dfs.sh
sbin/start-yarn.sh
8 管理地址
hadoop: http://master:50070
yarn: http://master:8088
9 基本命令
hadoop fs -mkdir /test
hadoop fs -put /usr/local/hadoop-2.2.0/bin/hdfs /test
hadoop fs -get/test/hdfs.cmd /usr/local/
hadoop fs -rm/test/hdfs.cmd
hadoop fs -rmr /test