Hadoop API 操作

最新推荐文章于 2022-07-15 00:49:17 发布

原创最新推荐文章于 2022-07-15 00:49:17 发布 · 392 阅读

3 ·

CC 4.0 BY-SA版权

文章标签：

#hadoop #大数据

HADOOP API 专栏收录该内容

2 篇文章

订阅专栏

本文介绍了如何在Hadoop中使用FileSystem API进行文件系统的基本操作，包括创建、删除文件夹及文件，遍历HDFS中的文件，以及文件的上传和下载等。同时，还提供了多种获取FileSystem实例的方法。

导入依赖

     <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>3.1.3</version>
        </dependency>

获取FileSystem的四种方式

 /**
     * Configuration   该类的对象封装了客户端或者服务器的配置
     * FileSystem 该类的对象是一个文件系统对象,可以使用该类对象的一些方法来对文件进行操作,通过FileSyatem
     * 的静态方法get获得该对象
     **/
    //获取FileSystem的集中类型
    //第一种
    @Test
    public void FileSystemdemo01() throws IOException {
        Configuration configuration = new Configuration();
        //指定使用的文件系统类型
        //fs.defaultFS 固定不变的参数
        configuration.set("fs.defaultFS", "hdfs://node01:8020/");
        //获取指定的文件系统
        //导包注意 import org.apache.hadoop.fs
        FileSystem fileSystem = FileSystem.get(configuration);
        System.out.println("demo01-->" + fileSystem);
    }

    //第二种
    @Test
    public void FileSystemdemo02() throws Exception {
        FileSystem fileSystem = FileSystem.get(
                new URI("hdfs://node01:8020")
                , new Configuration()
        );
        System.out.println("demo02-->" + fileSystem);
    }

    //第三种
    @Test
    public void FileSystemdemo03() throws IOException {
        Configuration configuration = new Configuration();
        //指定文件系统
        configuration.set("fa.defaultFS", "hdfs://node01:8020");
        //创阿FileSystem的实列
        FileSystem fileSystem = FileSystem.newInstance(configuration);
        System.out.println("demo03-->" + fileSystem.toString());
    }

    //地四种
    @Test
    public void FileSystemdemo04() throws Exception {
        FileSystem fileSystem = FileSystem.newInstance(
                new URI("hdfs://node01:8020")
                , new Configuration());
        System.out.println("demo04-->" + fileSystem);
    }

使用url访问数据

//1.错误1

    /**
     * 是maven加载hadoop的依赖包之后，启动项目出现了以上异常
     * 这个异常是jar包的冲突，删除掉slf4j-log4j12-1.7.26.jar就可以了
     * 我们在maven依赖中对hadoop-hdfs和hadoop-client的依赖都去除slf4j-log4j12依赖即可
     */
    @Test  //使用url方式访问数据
    public void demo01() throws IOException {
        //1.注册hdfs的url
        URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory());
        //2.获取文件输入流
        InputStream inputStream = new URL("hdfs://node01:8020/dir1/a.txt").openStream();
        //3.获取文件输出流
        FileOutputStream fileOutputStream = new FileOutputStream(new File("D:/a.txt"));
        //4.实现文件的拷贝
        IOUtils.copy(inputStream, fileOutputStream);
        //5.关闭流
        System.out.println("完成");
        org.apache.commons.io.IOUtils.closeQuietly(inputStream);
        org.apache.commons.io.IOUtils.closeQuietly(fileOutputStream);

    }

HADOOP API 操作

    //遍历hdfs中的所有文件
    @Test
    public void mylistFile() throws Exception {
        //获取FileSystem
        FileSystem fileSystem = FileSystem.newInstance(
                new URI("hdfs://node01:8020"),
                new Configuration());
        //获取RemoteIterator(迭代器) 得到所有的文件夹 第一个参数指定要遍历的路径,第二参数指定是否递归遍历
        RemoteIterator<LocatedFileStatus> fileStatusRemoteIterator =
                fileSystem.listFiles(new Path("/"), true);
        while (fileStatusRemoteIterator.hasNext()) {
            //获取文件
            LocatedFileStatus next = fileStatusRemoteIterator.next();
            //获取block信息
            BlockLocation[] blockLocations = next.getBlockLocations();
            System.out.println("block数量-->" + blockLocations.length);
            System.out.println("获取文件名字-->" + next.getPath().getName());
            System.out.println("获取文件路径-->" + next.getPath().toString());
        }
        //关闭
        fileSystem.close();
    }


    //hdfs创建文件夹
    @Test
    public void mkdirdemo() throws Exception {
        FileSystem fileSystem = FileSystem.newInstance(
                new URI("hdfs://node01:8020"),
                new Configuration());
        boolean mkdirs = fileSystem.mkdirs(new Path("/aa/bb/cc"));
        FSDataOutputStream fsDataOutputStream = fileSystem.create(new Path("/aa/bb/cc/a.txt"));
        if (mkdirs) {
            System.out.println("创建成功");
        }
        fileSystem.close();
    }


    @Test//创建文件
    public void mkdirdemo01() throws Exception {
        FileSystem fileSystem = FileSystem.newInstance(
                new URI("hdfs://node01:8020"),
                new Configuration());
        //如果输入的路径不存在也会自动创建
        fileSystem.create(new Path("/aa/bb/cc/a.txt"));
        fileSystem.close();
    }


    @Test//删除文件或文件夹
    public void deletefileandwjj() throws Exception {
        FileSystem fileSystem = FileSystem.newInstance(
                new URI("hdfs://node01:8020"),
                new Configuration());
        //删除(删除路径最后面的文件或者文件夹)
        fileSystem.deleteOnExit(new Path("/aa/bb/cc"));
        fileSystem.close();
    }

    //文件的下载
    @Test
    public void FileToLocal() throws URISyntaxException, IOException {
        FileSystem fileSystem = FileSystem.newInstance(
                new URI("hdfs://node01:8020")
                , new Configuration());
        //路径是hadoop的路径 通过filesystem的open方法
        FSDataInputStream open = fileSystem.open(new Path("/aa/bb/a.txt"));
         //上传到本地
        FileOutputStream fileOutputStream =new FileOutputStream(new File("D:/aa.txt"));
         //一边读取一边上传
        IOUtils.copy(open,fileOutputStream);
        //关闭流
        org.apache.commons.io.IOUtils.closeQuietly(open);
        org.apache.commons.io.IOUtils.closeQuietly(fileOutputStream);
        //关闭filesystem
        fileSystem.close();
    }


    //文件下载方法二
    @Test
    public void  FileToLocal02() throws URISyntaxException, IOException {
        FileSystem fileSystem = FileSystem.newInstance(
                new URI("hdfs://node01:8020")
                , new Configuration());
        fileSystem.copyToLocalFile(new Path("/aa/bb/a.txt"),new Path("D:/bb.txt"));
        fileSystem.close();
    }

    //本地文件上传到hadoop
    @Test
    public  void  LocalToFile() throws URISyntaxException, IOException {
        FileSystem fileSystem = FileSystem.newInstance(
                new URI("hdfs://node01:8020")
                , new Configuration());
        fileSystem.copyFromLocalFile(new Path("file:///D:/abc.txt"),new Path("/aa"));
        fileSystem.close();
    }