Linux安装Hadoop
一、jdk安装
1、下载
[root@VM-0-8-centos ~]# wget --no-cookies --no-check-certificate --header "Cookie: gpw_e24=http%3A%2F%2Fwww.oracle.com%2F; oraclelicense=accept-securebackup-cookie" "http://download.oracle.com/otn-pub/java/jdk/8u141-b15/336fa29ff2bb4ef291e347e091f7f4a7/jdk-8u141-linux-x64.tar.gz"
2、解压
[root@VM-0-8-centos ~]# tar -zxvf jdk-8u141-linux-x64.tar.gz -C /usr/java/
3、权限
[root@VM-0-8-centos ~]# chown -R root:root /usr/java/*
[root@VM-0-8-centos java]# ll -a
total 12
drwxr-xr-x 3 root root 4096 Nov 24 18:07 .
drwxr-xr-x. 15 root root 4096 Nov 24 16:20 ..
drwxr-xr-x 8 10 143 4096 Jul 12 2017 jdk1.8.0_141
[root@VM-0-8-centos java]# chown -R root:root /usr/java/*
[root@VM-0-8-centos java]# ll -a
total 12
drwxr-xr-x 3 root root 4096 Nov 24 18:07 .
drwxr-xr-x. 15 root root 4096 Nov 24 16:20 ..
drwxr-xr-x 8 root root 4096 Jul 12 2017 jdk1.8.0_141
4、配置全局环境变量
[root@VM-0-8-centos ~]# vi /etc/profile
末尾插入
export JAVA_HOME=/usr/java/jdk1.8.0_141
export PATH=$JAVA_HOME/bin:$PATH
5、配置生效,安装成功
[root@VM-0-8-centos ~]# source /etc/profile
[root@VM-0-8-centos ~]# which java
/usr/java/jdk1.8.0_141/bin/java 表示安装成功
[root@VM-0-8-centos ~]# java -version
java version "1.8.0_141"
Java(TM) SE Runtime Environment (build 1.8.0_141-b15)
Java HotSpot(TM) 64-Bit Server VM (build 25.141-b15, mixed mode)
二、安装Hadoop
1、建用户和文件夹:
[root@VM-0-8-centos ~]# useradd xr
[root@VM-0-8-centos ~]# id xr
[root@VM-0-8-centos ~]# id xr
uid=1000(xr) gid=1001(xr) groups=1001(xr)
[root@VM-0-8-centos ~]# su - xr
Last login: Fri Nov 26 13:50:28 CST 2021 on pts/1
[xr@VM-0-8-centos ~]$ mkdir sourcecode software app log data lib tmp
[xr@VM-0-8-centos ~]$ ll
total 28
drwxrwxr-x 2 xr xr 4096 Nov 25 17:22 app
drwxrwxr-x 2 xr xr 4096 Nov 25 17:22 data
drwxrwxr-x 2 xr xr 4096 Nov 25 17:22 lib
drwxrwxr-x 2 xr xr 4096 Nov 25 17:22 log
drwxrwxr-x 2 xr xr 4096 Nov 25 17:22 software
drwxrwxr-x 2 xr xr 4096 Nov 25 17:22 sourcecode
drwxrwxr-x 2 xr xr 4096 Nov 25 17:22 tmp
为什么自己要建个tmp目录,不使用自带的? 定期不访问,根据当前版本特性,默认30天自动清除;避免被清除
当前用户登出
[xr@VM-0-8-centos ~]$ exit
logout
[root@VM-0-8-centos ~]#
2、解压安装
[root@VM-0-8-centos ~]# ll
total 386220
drwxr-xr-x 2 root root 4096 Nov 15 17:25 app
drwxr-xr-x 2 root root 4096 Nov 15 17:25 data
drwxr-xr-x 2 root root 4096 Nov 15 17:25 doc
-rw-r--r-- 1 root root 395448622 Nov 25 21:12 hadoop-3.2.2.tar.gz
drwxr-xr-x 2 root root 4096 Nov 15 17:25 lib
drwxr-xr-x 2 root root 4096 Nov 15 17:25 log
drwxr-xr-x 2 root root 4096 Nov 15 17:25 script
drwxr-xr-x 2 root root 4096 Nov 15 17:25 software
drwxr-xr-x 2 root root 4096 Nov 15 17:25 source
drwxr-xr-x 2 root root 4096 Nov 15 17:25 tmp
移动&赋权限
[root@VM-0-8-centos ~]# mv hadoop-3.2.2.tar.gz /home/xr/software/
[root@VM-0-8-centos ~]# chown xr:xr /home/xr/software/*
解压
[xr@VM-0-8-centos ~]$ tar -xzvf hadoop-3.2.2.tar.gz -C ../app/
[xr@VM-0-8-centos ~]$
[xr@VM-0-8-centos ~]$ cd ../app
[xr@VM-0-8-centos app]$ ll
total 4
drwxr-xr-x 9 xr xr 4096 Jan 3 2021 hadoop-3.2.2
[xr@VM-0-8-centos app]$ cd hadoop-3.2.2
[xr@VM-0-8-centos hadoop-3.2.2]$ ll
total 204
drwxr-xr-x 2 xr xr 4096 Jan 3 2021 bin 命令执行脚本
drwxr-xr-x 3 xr xr 4096 Jan 3 2021 etc 配置文件
drwxr-xr-x 2 xr xr 4096 Jan 3 2021 include
drwxr-xr-x 3 xr xr 4096 Jan 3 2021 lib
drwxr-xr-x 4 xr xr 4096 Jan 3 2021 libexec
-rw-rw-r-- 1 xr xr 150569 Dec 5 2020 LICENSE.txt
-rw-rw-r-- 1 xr xr 21943 Dec 5 2020 NOTICE.txt
-rw-rw-r-- 1 xr xr 1361 Dec 5 2020 README.txt
drwxr-xr-x 3 xr xr 4096 Jan 3 2021 sbin 启动停止脚本
drwxr-xr-x 4 xr xr 4096 Jan 3 2021 share
3、ssh: 远程登录到目标地址(执行命令)
[root@VM-0-8-centos ~]# which ssh
/usr/bin/ssh
ssh 用户@机器地址
[root@VM-0-8-centos ~]# ssh root@VM-0-8-centos date
The authenticity of host 'vm-0-8-centos (::1)' can't be established.
ECDSA key fingerprint is SHA256:xjZ9+fhbUNUIPvjAGGxVkUsrzF4X9VRmbcOOM4xYCW4.
ECDSA key fingerprint is MD5:b6:a3:ab:8e:4f:9f:e7:bb:6d:ba:c7:bd:41:c4:83:48.
Are you sure you want to continue connecting (yes/no)? yes
Warning: Permanently added 'vm-0-8-centos' (ECDSA) to the list of known hosts.
root@vm-0-8-centos's password:
Fri Nov 26 15:10:13 CST 2021
A---》B机器 远程登录 ssh root@ruozedata001===》B机器
A---》B机器 远程登录再执行命令,ssh root@ruozedata001 date 思考题:当前会话光标在A机器、B机器?
[root@VM-0-8-centos ~]# su - xr
Last login: Fri Nov 26 14:51:12 CST 2021 on pts/1
[xr@VM-0-8-centos ~]$ ssh root@VM-0-8-centos date
The authenticity of host 'vm-0-8-centos (::1)' can't be established.
ECDSA key fingerprint is SHA256:xjZ9+fhbUNUIPvjAGGxVkUsrzF4X9VRmbcOOM4xYCW4.
ECDSA key fingerprint is MD5:b6:a3:ab:8e:4f:9f:e7:bb:6d:ba:c7:bd:41:c4:83:48.
Are you sure you want to continue connecting (yes/no)? yes
Warning: Permanently added 'vm-0-8-centos' (ECDSA) to the list of known hosts.
root@vm-0-8-centos's password:
Fri Nov 26 15:17:34 CST 2021
[xr@VM-0-8-centos ~]$ pwd
/home/xr
4、配置JAVA_HOME
[root@VM-0-8-centos hadoop]# pwd
/home/xr/app/hadoop/etc/hadoop
[root@VM-0-8-centos hadoop]# vi hadoop-env.sh
JAVA_HOME=/usr/java/jdk1.8.0_141
5、单机模式:无进程
[root@VM-0-8-centos hadoop]# cd /home/xr/app/hadoop
[root@VM-0-8-centos hadoop]# pwd
/home/xr/app/hadoop
[root@VM-0-8-centos hadoop]# ll
total 204
drwxr-xr-x 2 xr xr 4096 Jan 3 2021 bin
drwxr-xr-x 3 xr xr 4096 Jan 3 2021 etc
drwxr-xr-x 2 xr xr 4096 Jan 3 2021 include
drwxr-xr-x 3 xr xr 4096 Jan 3 2021 lib
drwxr-xr-x 4 xr xr 4096 Jan 3 2021 libexec
-rw-rw-r-- 1 xr xr 150569 Dec 5 2020 LICENSE.txt
-rw-rw-r-- 1 xr xr 21943 Dec 5 2020 NOTICE.txt
-rw-rw-r-- 1 xr xr 1361 Dec 5 2020 README.txt
drwxr-xr-x 3 xr xr 4096 Jan 3 2021 sbin
drwxr-xr-x 4 xr xr 4096 Jan 3 2021 share
[root@VM-0-8-centos hadoop]# mkdir input
[root@VM-0-8-centos hadoop]# ll
total 208
drwxr-xr-x 2 xr xr 4096 Jan 3 2021 bin
drwxr-xr-x 3 xr xr 4096 Jan 3 2021 etc
drwxr-xr-x 2 xr xr 4096 Jan 3 2021 include
drwxr-xr-x 2 root root 4096 Nov 26 15:37 input
drwxr-xr-x 3 xr xr 4096 Jan 3 2021 lib
drwxr-xr-x 4 xr xr 4096 Jan 3 2021 libexec
-rw-rw-r-- 1 xr xr 150569 Dec 5 2020 LICENSE.txt
-rw-rw-r-- 1 xr xr 21943 Dec 5 2020 NOTICE.txt
-rw-rw-r-- 1 xr xr 1361 Dec 5 2020 README.txt
drwxr-xr-x 3 xr xr 4096 Jan 3 2021 sbin
drwxr-xr-x 4 xr xr 4096 Jan 3 2021 share
[root@VM-0-8-centos hadoop]# cp etc/hadoop/*.xml input
[root@VM-0-8-centos hadoop]# cd input
[root@VM-0-8-centos input]# ll
total 52
-rw-r--r-- 1 root root 9213 Nov 26 15:37 capacity-scheduler.xml
-rw-r--r-- 1 root root 774 Nov 26 15:37 core-site.xml
-rw-r--r-- 1 root root 11392 Nov 26 15:37 hadoop-policy.xml
-rw-r--r-- 1 root root 775 Nov 26 15:37 hdfs-site.xml
-rw-r--r-- 1 root root 620 Nov 26 15:37 httpfs-site.xml
-rw-r--r-- 1 root root 3518 Nov 26 15:37 kms-acls.xml
-rw-r--r-- 1 root root 682 Nov 26 15:37 kms-site.xml
-rw-r--r-- 1 root root 758 Nov 26 15:37 mapred-site.xml
-rw-r--r-- 1 root root 690 Nov 26 15:37 yarn-site.xml
[root@VM-0-8-centos hadoop]# bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-3.2.2.jar grep input output 'dfs[a-z.]+'https://www.runoob.com/java/java-regular-expressions.html
[root@VM-0-8-centos hadoop]# ll
total 212
drwxr-xr-x 2 xr xr 4096 Jan 3 2021 bin
drwxr-xr-x 3 xr xr 4096 Jan 3 2021 etc
drwxr-xr-x 2 xr xr 4096 Jan 3 2021 include
drwxr-xr-x 2 root root 4096 Nov 26 15:37 input
drwxr-xr-x 3 xr xr 4096 Jan 3 2021 lib
drwxr-xr-x 4 xr xr 4096 Jan 3 2021 libexec
-rw-rw-r-- 1 xr xr 150569 Dec 5 2020 LICENSE.txt
-rw-rw-r-- 1 xr xr 21943 Dec 5 2020 NOTICE.txt
drwxr-xr-x 2 root root 4096 Nov 26 15:41 output
-rw-rw-r-- 1 xr xr 1361 Dec 5 2020 README.txt
drwxr-xr-x 3 xr xr 4096 Jan 3 2021 sbin
drwxr-xr-x 4 xr xr 4096 Jan 3 2021 share
[root@VM-0-8-centos hadoop]# cd output/
[root@VM-0-8-centos output]# ll
total 4
-rw-r--r-- 1 root root 11 Nov 26 15:41 part-r-00000
-rw-r--r-- 1 root root 0 Nov 26 15:41 _SUCCESS
[root@VM-0-8-centos output]# cat part-r-00000
1 dfsadmin
[root@VM-0-8-centos output]#
成功
4、伪分布式部署:
配置文件
[root@VM-0-8-centos hadoop]# vi etc/hadoop/core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://VM-0-8-centos:9000</value>
</property>
</configuration>
[root@VM-0-8-centos hadoop]# vi etc/hadoop/hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
修正用户 用户组
[root@VM-0-8-centos app]# ll
total 4
drwxr-xr-x 11 xr xr 4096 Nov 26 15:41 hadoop
[root@VM-0-8-centos app]# chown -R xr:xr hadoop/*
[root@VM-0-8-centos app]# ll
total 4
drwxr-xr-x 11 xr xr 4096 Nov 26 15:41 hadoop
[root@VM-0-8-centos app]# su - xr
Last login: Fri Nov 26 15:16:47 CST 2021 on pts/1
单向登陆的操作过程:
1、登录A机器
2、ssh-keygen -t [rsa|dsa],将会生成密钥文件和私钥文件 id_rsa,id_rsa.pub或id_dsa,id_dsa.pub
[xr@VM-0-8-centos ~]$ ssh-keygen
Generating public/private rsa key pair.
Enter file in which to save the key (/home/xr/.ssh/id_rsa):
Enter passphrase (empty for no passphrase):
Enter same passphrase again:
Your identification has been saved in /home/xr/.ssh/id_rsa.
Your public key has been saved in /home/xr/.ssh/id_rsa.pub.
The key fingerprint is:
SHA256:beTJnM7L9aHHUNzvV8gy+sDSSRRM+ZNolYF7FXz/VWw xr@VM-0-8-centos
The key's randomart image is:
+---[RSA 2048]----+
| ooo.+.o |
| +.o o E|
| o= + +o|
| B+o= o +|
| S.X. + .+|
| B .+ o +|
| . B..=...|
| o.+ oo.o|
| o.o.. .|
+----[SHA256]-----+
[xr@VM-0-8-centos ~]$
3、将 .pub 文件复制到B机器的 .ssh 目录, 并 cat id_dsa.pub >> ~/.ssh/authorized_keys
[xr@VM-0-8-centos ~]$ cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
[xr@VM-0-8-centos ~]$ chmod 0600 ~/.ssh/authorized_keys
4、大功告成,从A机器登录B机器的目标账户,不再需要密码了;(直接运行 #ssh ip地址 )
[xr@VM-0-8-centos ~]$ ssh VM-0-8-centos
Last login: Fri Nov 26 16:18:50 2021
[xr@VM-0-8-centos ~]$ ssh xr@VM-0-8-centos
Last login: Fri Nov 26 16:35:23 2021 from ::1
[xr@VM-0-8-centos ~]$
不需要密码
[xr@VM-0-8-centos ~]$ cd ~/.ssh
[xr@VM-0-8-centos .ssh]$ chmod 600 authorized_keys
[xr@VM-0-8-centos .ssh]$ ssh VM-0-8-centos
Last login: Fri Nov 26 16:35:58 2021 from ::1
[xr@VM-0-8-centos ~]$
格式化
[xr@VM-0-8-centos hadoop]$ pwd
/home/xr/app/hadoop
[xr@VM-0-8-centos hadoop]$ bin/hdfs namenode -format
WARNING: /home/xr/app/hadoop/logs does not exist. Creating.
2021-11-26 16:43:58,071 INFO namenode.NameNode: STARTUP_MSG:
/************************************************************
STARTUP_MSG: Starting NameNode
STARTUP_MSG: host = VM-0-8-centos/127.0.0.1
STARTUP_MSG: args = [-format]
STARTUP_MSG: version = 3.2.2
2021-11-26 16:43:58,796 INFO util.GSet: Computing capacity for map NameNodeRetryCache
2021-11-26 16:43:58,796 INFO util.GSet: VM type = 64-bit
2021-11-26 16:43:58,796 INFO util.GSet: 0.029999999329447746% max memory 1.7 GB = 534.2 KB
2021-11-26 16:43:58,796 INFO util.GSet: capacity = 2^16 = 65536 entries
2021-11-26 16:43:58,819 INFO namenode.FSImage: Allocated new BlockPoolId: BP-892934164-127.0.0.1-1637916238812
2021-11-26 16:43:58,838 INFO common.Storage: Storage directory /tmp/hadoop-xr/dfs/name has been successfully formatted.
2021-11-26 16:43:58,863 INFO namenode.FSImageFormatProtobuf: Saving image file /tmp/hadoop-xr/dfs/name/current/fsimage.ckpt_0000000000000000000 using no compression
2021-11-26 16:43:58,954 INFO namenode.FSImageFormatProtobuf: Image file /tmp/hadoop-xr/dfs/name/current/fsimage.ckpt_0000000000000000000 of size 397 bytes saved in 0 seconds .
2021-11-26 16:43:58,969 INFO namenode.NNStorageRetentionManager: Going to retain 1 images with txid >= 0
2021-11-26 16:43:58,973 INFO namenode.FSImage: FSImageSaver clean checkpoint: txid=0 when meet shutdown.
2021-11-26 16:43:58,974 INFO namenode.NameNode: SHUTDOWN_MSG:
/************************************************************
SHUTDOWN_MSG: Shutting down NameNode at VM-0-8-centos/127.0.0.1
************************************************************/
NameNode daemon and DataNode daemon启动:
[xr@VM-0-8-centos hadoop]$ sbin/start-dfs.sh
Starting namenodes on [VM-0-8-centos]
Starting datanodes
localhost: Warning: Permanently added 'localhost' (ECDSA) to the list of known hosts.
Starting secondary namenodes [VM-0-8-centos]
[xr@VM-0-8-centos hadoop]$
老大 老二(1小时同步一下) 小弟
老大 老二 11点同步一下
11点半 老大有问题 无法恢复
老二来顶替老大,只能恢复到11点那个状态
查看服务有没有启动成功,但是不保险,建议还是以ps -ef| grep hadoop为准
[xr@VM-0-8-centos hadoop]$ jps
16545 NameNode
17300 Jps
16696 DataNode
16895 SecondaryNameNode
[xr@VM-0-8-centos hadoop]$
[xr@VM-0-8-centos hadoop]$ ll logs
total 116
-rw-rw-r-- 1 xr xr 30687 Nov 26 16:49 hadoop-xr-datanode-VM-0-8-centos.log
-rw-rw-r-- 1 xr xr 701 Nov 26 16:49 hadoop-xr-datanode-VM-0-8-centos.out
-rw-rw-r-- 1 xr xr 37513 Nov 26 16:50 hadoop-xr-namenode-VM-0-8-centos.log
-rw-rw-r-- 1 xr xr 701 Nov 26 16:49 hadoop-xr-namenode-VM-0-8-centos.out
-rw-rw-r-- 1 xr xr 32362 Nov 26 16:50 hadoop-xr-secondarynamenode-VM-0-8-centos.log
-rw-rw-r-- 1 xr xr 701 Nov 26 16:49 hadoop-xr-secondarynamenode-VM-0-8-centos.out
-rw-rw-r-- 1 xr xr 0 Nov 26 16:43 SecurityAuth-xr.audit
[xr@VM-0-8-centos hadoop]$
登录: http://IP:9870/
hdfs操作命令
[xr@VM-0-8-centos hadoop]$ bin/hdfs dfs -mkdir /user
[xr@VM-0-8-centos hadoop]$ bin/hdfs dfs -ls /
Found 1 items
drwxr-xr-x - xr supergroup 0 2021-11-26 16:56 /user
[xr@VM-0-8-centos hadoop]$ bin/hdfs dfs -mkdir /user/<username>
-bash: syntax error near unexpected token `newline'
[xr@VM-0-8-centos hadoop]$ bin/hdfs dfs -mkdir /user/xr
[xr@VM-0-8-centos hadoop]$ bin/hdfs dfs -mkdir input
[xr@VM-0-8-centos hadoop]$ bin/hdfs dfs -ls /user/xr/
Found 1 items
drwxr-xr-x - xr supergroup 0 2021-11-26 17:01 /user/xr/input
[xr@VM-0-8-centos hadoop]$
[xr@VM-0-8-centos hadoop]$ bin/hdfs dfs -put etc/hadoop/*.xml input
[xr@VM-0-8-centos hadoop]$ bin/hdfs dfs -ls /user/xr/input/
Found 9 items
-rw-r--r-- 1 xr supergroup 9213 2021-11-26 17:03 /user/xr/input/capacity-scheduler.xml
-rw-r--r-- 1 xr supergroup 889 2021-11-26 17:03 /user/xr/input/core-site.xml
-rw-r--r-- 1 xr supergroup 11392 2021-11-26 17:03 /user/xr/input/hadoop-policy.xml
-rw-r--r-- 1 xr supergroup 871 2021-11-26 17:03 /user/xr/input/hdfs-site.xml
-rw-r--r-- 1 xr supergroup 620 2021-11-26 17:03 /user/xr/input/httpfs-site.xml
-rw-r--r-- 1 xr supergroup 3518 2021-11-26 17:03 /user/xr/input/kms-acls.xml
-rw-r--r-- 1 xr supergroup 682 2021-11-26 17:03 /user/xr/input/kms-site.xml
-rw-r--r-- 1 xr supergroup 758 2021-11-26 17:03 /user/xr/input/mapred-site.xml
-rw-r--r-- 1 xr supergroup 690 2021-11-26 17:03 /user/xr/input/yarn-site.xml
[xr@VM-0-8-centos hadoop]$