8.hadoop脚本分析
hadoop脚本在../hadoop/sbin目录下,也可以通过which cmd查看脚本路径
1.star-all.sh
#!/usr/bin/env bash
echo "This script is Deprecated. Instead use start-dfs.sh and start-yarn.sh"
bin=`dirname "${BASH_SOURCE-$0}"` //通过环境变量BASH_SOURCE获取路径,如果不存在获取$0,即文件本身路径
bin=`cd "$bin"; pwd` //获取绝对路径
DEFAULT_LIBEXEC_DIR="$bin"/../libexec //获取上级目录libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} //类似于-,变量替换
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh //执行hadoop-config.sh脚本,设置变量
# start hdfs daemons if hdfs is present
if [ -f "${HADOOP_HDFS_HOME}"/sbin/start-dfs.sh ]; then
"${HADOOP_HDFS_HOME}"/sbin/start-dfs.sh --config $HADOOP_CONF_DIR //调用./sbin/start-dfs.sh,启动hdfs
fi
# start yarn daemons if yarn is present
if [ -f "${HADOOP_YARN_HOME}"/sbin/start-yarn.sh ]; then
"${HADOOP_YARN_HOME}"/sbin/start-yarn.sh --config $HADOOP_CONF_DIR //调用./start-yarn.sh,启动yarn
fi
2.hadoop-config.sh
设置各种环境变量
HADOOP_CONF_DIR
HEAP_SIZE=1000m
CLASSPATH
3.star-dfs.sh –config $HADOOP_CONF_DIR //启动hdfs
1.libexec/hdfs-config.sh //设置环境变量
2.hdfs getconf -namenodes //获取namenode主机名
3."./sbin/hadoop-damons.sh " \ //启动namenode
--config "HADOOP_CONF_DIR" \
--hostname "NAMENODES" \
--script "$bin/hdfs" start namenode $nameStartOpt
4."./sbin/hadoop-damons.sh " \ //启动datanode
--config "HADOOP_CONF_DIR" \
--script "$bin/hdfs" start datanode $nameStartOpt
5."./sbin/hadoop-damons.sh " \ //启动secondarynamenode
--config "HADOOP_CONF_DIR" \
--hostname "SECONDARY_NAMENODES" \
--script "$bin/hdfs" start secondaryname
4.hdfs-config.sh
最终还是会调用 hadoop-config.sh
5../sbin/hadoop-damons.sh //启动守护进程
1.hadoop-config.sh
2.循环slaves文件,通过ssh方式登录远程主机,执行相应命令
exec "$bin/slaves.sh" --config $HADOOP_CONF_DIR cd "$HADOOP_PREFIX" \; "$bin/hadoop-
daemon.sh" --config $HADOOP_CONF_DIR "$@"
6../sbin/hadoop-damon.sh
1.hadoop-config.sh
2../bin/hdfs //调用具体的类启动进程
7../bin/hadoop
1.hadoop-config.sh
2.调用java类
8../bin/hdfs
1.hadoop-config.sh
2.调用java类
9.hadoop常用命令和操作
1.集群基本命令:
1.格式化文件系统
hadoop namenode -format
2.单节点启动datanode/namenode
hadoop-dameon.sh start datanode
hadoop-dameon.sh start namenode
3.启动集群datanode
hadoop-dameons.sh start datanode
hadoop-dameons.sh start namenode
4.启动yarn
start-yarn.sh
5.启动hdfs
start-dfs.sh
2.HDFS基本命令(和linux命令基本一致)
1.上传文件
hadoop fs -put/hdfs dfs -put
2.重命名
hadoop fs -mv/hdfs dfs -mv
3.下载
hadoop fs -get/hdfs dfs -get
4.删除文件
hadoop fs -rm -r/hdfs dfs -rm -r
5.递归查看文件
hadoop fs -lsr/hdfs dfs -lsr
6.在hdfs上进行文件复制
hadoop fs -cp/hdfs dfs -cp
7.修改文件权限
hadoop fs -chmod/hdfs dfs -chmod
8.修改用户组
hadoop fs -chown/hdfs dfs -chown
10.hadoop配置信息
1.namenode的本地目录配置成多个,则每个目录存放内容相同
【hdfs-site.xml】
dfs.namenode.name.dir=dir1,dir2
2.datanode的本地目录可以配置多个,但不是备用目录,数据是随机存放的
【hdfs-site.xml】
dfs.datanode.data.dir=dir1,dir2
11.操作hdfs
1.通过API操作hdfs文件系统
public class TestHDFS {
//注册协议处理器工厂,让java层序能够识别协议
static{
URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory());
}
public static void main(String[] args) throws Exception {
//定义url地址
String url = "hdfs://localhost:8020/test.txt";
//url链接
URLConnection conn = new URL(url).openConnection();
//打开输入流
InputStream is = conn.getInputStream();
FileOutputStream fos = new FileOutputStream("/Users/username/api.txt");
byte[] buf = new byte[1024];
int len = -1;
while((len = is.read(buf)) != -1) {
fos.write(buf, 0, len);
}
fos.close();
is.close();
System.out.println(new String(buf));
System.out.println("----- over -----");
}
}
2.使用hadoop的文件系统操作hdfs
/**
*
*使用hadoop的文件系统进行文件读取
*/
public class TestFileSystem {
private FileSystem fs;
@Before
public void iniConf() {
try {
//写操作
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://localhost:8020/");
fs = FileSystem.get(conf);
} catch (IOException e) {
e.printStackTrace();
}
}
//给hdfs写文件
@Test
public void writeFile() throws Exception {
Path path = new Path("/write.txt");
FSDataOutputStream dos = fs.create(path);
dos.write("hello write!\n".getBytes());
dos.close();
System.out.println("----- over -----");
}
//从hdfs下载文件
@Test
public void readFile() throws Exception {
Path path = new Path("/read.txt");
FSDataInputStream fis = fs.open(path);
FileOutputStream fos = new FileOutputStream("/Users/username/read.txt");
IOUtils.copyBytes(fis, fos, 1024);
IOUtils.closeStream(fos);
IOUtils.closeStream(fis);
System.out.println("----- over -----");
}
//向hdfs写入文件,指定副本数
@Test
public void writerFileInReplication() throws Exception {
Path path = new Path("hdfs://localhost:8020/replication.txt");
FSDataOutputStream dos = fs.create(path, (short)2);
dos.write("hello replication!\n".getBytes());
dos.close();
System.out.println("----- over -----");
}
//向hdfs上传jdk-8u111-macosx-x64.dmg,读取第一个block
@Test
public void readFileSeek128() throws Exception {
Path path = new Path("hdfs://localhost:8020/jdk-8u111-macosx-x64.dmg");
FSDataInputStream fis = fs.open(path);
FileOutputStream fos = new FileOutputStream("/Users/username/jdk-part0");
byte[] buf = new byte[1024];
for(int i = 0; i< 128 * 1024; i++) {
fis.read(buf);
fos.write(buf);
}
}
//向hdfs上传jdk-8u111-macosx-x64.dmg,读取第二个block
@Test
public void readFileSeek() throws Exception {
Path path = new Path("hdfs://localhost:8020/jdk-8u111-macosx-x64.dmg");
FSDataInputStream fis = fs.open(path);
FileOutputStream fos = new FileOutputStream("/Users/username/jdk-part1");
//定位文件偏移量
fis.seek(1024 * 1024 * 128);
IOUtils.copyBytes(fis, fos, 1024);
IOUtils.closeStream(fos);
IOUtils.closeStream(fis);
}
//创建目录
@Test
public void mkdir() throws Exception {
Path path = new Path("/zhao/zhe");
//创建目录
fs.mkdirs(path);
//创建目录,赋予权限
fs.mkdirs(path, new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL));
}
//获取状态,遍历目录
@Test
public void fileStatus() throws Exception {
Path path = new Path("/");
FileStatus fstatus = fs.getFileStatus(path);
Class clazz = FileStatus.class;
Method[] ms = clazz.getDeclaredMethods();
for(Method m : ms) {
String mname = m.getName();
Class[] ptype = m.getParameterTypes();
if(mname.startsWith("get") && (ptype == null || ptype.length ==0)) {
if(!mname.equals("getSymlink")) {
Object ret = m.invoke(fstatus, null);
System.out.println(mname + "() = " + ret);
}
}
}
}
}
//遍历目录
@Test
public void recursiveFile() throws Exception {
print(fs.getFileStatus(new Path("/")));
}
private void print(FileStatus fss) {
try {
Path path = fss.getPath();
//打印路径名称
System.out.println(path.toUri().getPath());
if(fss.isDirectory()) {
//列出路径下的所有资源
FileStatus[] fsarr = fs.listStatus(path);
if(fsarr != null && fsarr.length > 0) {
for(FileStatus ff: fsarr) {
print(ff);
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
//删除文件或者目录
@Test
public void deleteFile() throws Exception {
Path path = new Path("/read.txt");
//递归删除
fs.delete(path, true);
}