大数据-Hadoop学习笔记03

本文主要介绍了Hadoop的学习笔记,包括hadoop脚本分析,如star-all.sh、hadoop-config.sh等,以及hadoop的常用命令和操作,还详细讲解了如何配置Hadoop和操作HDFS。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

8.hadoop脚本分析

hadoop脚本在../hadoop/sbin目录下,也可以通过which cmd查看脚本路径

1.star-all.sh

#!/usr/bin/env bash
echo "This script is Deprecated. Instead use start-dfs.sh and start-yarn.sh"

bin=`dirname "${BASH_SOURCE-$0}"` //通过环境变量BASH_SOURCE获取路径,如果不存在获取$0,即文件本身路径
bin=`cd "$bin"; pwd` //获取绝对路径

DEFAULT_LIBEXEC_DIR="$bin"/../libexec  //获取上级目录libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}    //类似于-,变量替换
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh //执行hadoop-config.sh脚本,设置变量

# start hdfs daemons if hdfs is present
if [ -f "${HADOOP_HDFS_HOME}"/sbin/start-dfs.sh ]; then
  "${HADOOP_HDFS_HOME}"/sbin/start-dfs.sh --config $HADOOP_CONF_DIR //调用./sbin/start-dfs.sh,启动hdfs
fi

# start yarn daemons if yarn is present
if [ -f "${HADOOP_YARN_HOME}"/sbin/start-yarn.sh ]; then
  "${HADOOP_YARN_HOME}"/sbin/start-yarn.sh --config $HADOOP_CONF_DIR   //调用./start-yarn.sh,启动yarn
fi

2.hadoop-config.sh

    设置各种环境变量
    HADOOP_CONF_DIR
    HEAP_SIZE=1000m
    CLASSPATH

3.star-dfs.sh –config $HADOOP_CONF_DIR //启动hdfs

    1.libexec/hdfs-config.sh  //设置环境变量
    2.hdfs getconf -namenodes  //获取namenode主机名
    3."./sbin/hadoop-damons.sh " \  //启动namenode
        --config "HADOOP_CONF_DIR" \
        --hostname "NAMENODES" \
        --script "$bin/hdfs" start namenode $nameStartOpt
    4."./sbin/hadoop-damons.sh " \  //启动datanode
        --config "HADOOP_CONF_DIR" \
        --script "$bin/hdfs" start datanode $nameStartOpt
    5."./sbin/hadoop-damons.sh " \  //启动secondarynamenode
        --config "HADOOP_CONF_DIR" \
        --hostname "SECONDARY_NAMENODES" \
        --script "$bin/hdfs" start secondaryname

4.hdfs-config.sh

    最终还是会调用 hadoop-config.sh

5../sbin/hadoop-damons.sh //启动守护进程

    1.hadoop-config.sh
    2.循环slaves文件,通过ssh方式登录远程主机,执行相应命令
      exec "$bin/slaves.sh" --config $HADOOP_CONF_DIR cd "$HADOOP_PREFIX" \; "$bin/hadoop-        
      daemon.sh" --config $HADOOP_CONF_DIR "$@"

6../sbin/hadoop-damon.sh

    1.hadoop-config.sh
    2../bin/hdfs     //调用具体的类启动进程

7../bin/hadoop

    1.hadoop-config.sh
    2.调用java类

8../bin/hdfs

    1.hadoop-config.sh
    2.调用java类

9.hadoop常用命令和操作

1.集群基本命令:
    1.格式化文件系统
        hadoop namenode -format
    2.单节点启动datanode/namenode
        hadoop-dameon.sh start datanode
        hadoop-dameon.sh start namenode
    3.启动集群datanode
        hadoop-dameons.sh start datanode
        hadoop-dameons.sh start namenode
    4.启动yarn
        start-yarn.sh
    5.启动hdfs
        start-dfs.sh
2.HDFS基本命令(和linux命令基本一致)
    1.上传文件
        hadoop fs -put/hdfs dfs -put 
    2.重命名
        hadoop fs -mv/hdfs dfs -mv
    3.下载
        hadoop fs -get/hdfs dfs -get
    4.删除文件
        hadoop fs -rm -r/hdfs dfs -rm -r
    5.递归查看文件
        hadoop fs -lsr/hdfs dfs -lsr
    6.在hdfs上进行文件复制
        hadoop fs -cp/hdfs dfs -cp
    7.修改文件权限
        hadoop fs -chmod/hdfs dfs -chmod
    8.修改用户组
        hadoop fs -chown/hdfs dfs -chown

10.hadoop配置信息

1.namenode的本地目录配置成多个,则每个目录存放内容相同
    【hdfs-site.xml】
    dfs.namenode.name.dir=dir1,dir2
2.datanode的本地目录可以配置多个,但不是备用目录,数据是随机存放的
    【hdfs-site.xml】
    dfs.datanode.data.dir=dir1,dir2

11.操作hdfs

1.通过API操作hdfs文件系统
public class TestHDFS {
    //注册协议处理器工厂,让java层序能够识别协议
    static{
        URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory());
    }
    public static void main(String[] args) throws Exception {   
        //定义url地址
        String url = "hdfs://localhost:8020/test.txt";
        //url链接
        URLConnection conn = new URL(url).openConnection();
        //打开输入流
        InputStream is = conn.getInputStream();
        FileOutputStream fos = new FileOutputStream("/Users/username/api.txt");

        byte[] buf = new byte[1024];
        int len = -1;
        while((len = is.read(buf)) != -1) {
            fos.write(buf, 0, len);
        }
        fos.close();
        is.close();
        System.out.println(new String(buf));
        System.out.println("----- over -----");
    }
}
2.使用hadoop的文件系统操作hdfs
/**
 * 
 *使用hadoop的文件系统进行文件读取
 */
public class TestFileSystem {
    private FileSystem fs;

    @Before
    public void iniConf() {     
        try {
            //写操作
            Configuration conf = new Configuration();
            conf.set("fs.defaultFS", "hdfs://localhost:8020/");
            fs = FileSystem.get(conf);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    //给hdfs写文件
    @Test
    public void writeFile() throws Exception {
        Path path = new Path("/write.txt");
        FSDataOutputStream dos = fs.create(path);
        dos.write("hello write!\n".getBytes());
        dos.close();
        System.out.println("----- over -----");
    }

    //从hdfs下载文件
    @Test
    public void readFile() throws Exception {
        Path path = new Path("/read.txt");
        FSDataInputStream fis = fs.open(path);
        FileOutputStream fos = new FileOutputStream("/Users/username/read.txt");
        IOUtils.copyBytes(fis, fos, 1024);
        IOUtils.closeStream(fos);
        IOUtils.closeStream(fis);
        System.out.println("----- over -----");
    }

    //向hdfs写入文件,指定副本数
    @Test
    public void writerFileInReplication() throws Exception {
        Path path = new Path("hdfs://localhost:8020/replication.txt");
        FSDataOutputStream dos = fs.create(path, (short)2);
        dos.write("hello replication!\n".getBytes());
        dos.close();
        System.out.println("----- over -----");
    }

    //向hdfs上传jdk-8u111-macosx-x64.dmg,读取第一个block
    @Test
    public void readFileSeek128() throws Exception {
        Path path = new Path("hdfs://localhost:8020/jdk-8u111-macosx-x64.dmg");
        FSDataInputStream fis = fs.open(path);
        FileOutputStream fos = new FileOutputStream("/Users/username/jdk-part0");
        byte[] buf = new byte[1024];
        for(int i = 0; i< 128 * 1024; i++) {
            fis.read(buf);
            fos.write(buf);
        }
    }

    //向hdfs上传jdk-8u111-macosx-x64.dmg,读取第二个block
    @Test
    public void readFileSeek() throws Exception {
        Path path = new Path("hdfs://localhost:8020/jdk-8u111-macosx-x64.dmg");
        FSDataInputStream fis = fs.open(path);
        FileOutputStream fos = new FileOutputStream("/Users/username/jdk-part1");
        //定位文件偏移量
        fis.seek(1024 * 1024 * 128);
        IOUtils.copyBytes(fis, fos, 1024);
        IOUtils.closeStream(fos);
        IOUtils.closeStream(fis);
    }

    //创建目录
    @Test
    public void mkdir() throws Exception {
        Path path = new Path("/zhao/zhe");
        //创建目录
        fs.mkdirs(path);
        //创建目录,赋予权限
        fs.mkdirs(path, new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL));
    }

    //获取状态,遍历目录
    @Test
    public void fileStatus() throws Exception {
        Path path = new Path("/");
        FileStatus fstatus = fs.getFileStatus(path);
        Class clazz = FileStatus.class;
        Method[] ms = clazz.getDeclaredMethods();
        for(Method m : ms) {
            String mname = m.getName();
            Class[] ptype = m.getParameterTypes();
            if(mname.startsWith("get") && (ptype == null || ptype.length ==0)) {
                if(!mname.equals("getSymlink")) {                   
                    Object ret = m.invoke(fstatus, null);
                    System.out.println(mname + "() = " + ret);
                }
            }
        }
    }
}
    //遍历目录
    @Test
    public void recursiveFile() throws Exception {
        print(fs.getFileStatus(new Path("/")));
    }
    private void print(FileStatus fss) {
        try {
            Path path = fss.getPath();
            //打印路径名称
            System.out.println(path.toUri().getPath());
            if(fss.isDirectory()) {
                //列出路径下的所有资源
                FileStatus[] fsarr = fs.listStatus(path);
                if(fsarr != null && fsarr.length > 0) {
                    for(FileStatus ff: fsarr) {
                        print(ff);
                    }
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    //删除文件或者目录
    @Test
    public void deleteFile() throws Exception {
        Path path = new Path("/read.txt");
        //递归删除
        fs.delete(path, true);
    }
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值