HADOOP Java API 之 HDFS2.x操作

本文介绍使用 Java API 对 HDFS 进行多种文件操作的方法,包括创建目录、上传文件、重命名文件、删除文件、扫描目录及文件信息等。同时展示了如何查找文件在 HDFS 集群中的位置及获取集群节点信息。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

Java api操作hdfs2.x, 主要包括以下几个方法:

  • 1. create dir
  • 2.1 create file(don’t use IOUtils)
  • 2.2 create file(use IOUtils)
  • 3. upload local file(s)
  • 4. rename file(s)
  • 5. rename file(s)
  • 6. scan dirs and file information
  • 7. 查找某个文件在HDFS集群的位置
  • 8. 获取HDFS集群上所有节点名称信息

代码demo如下

package com.david.bigdata.hadoop2x.hads_api;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.io.IOUtils;

import java.io.FileInputStream;
import java.io.IOException;
import java.net.URI;

/**
 * Created by david on 16/11/13.
 */
public class HdfsCURDTest {

    public static void main(String[] args) {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://localhost:9000");
        conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");

        FileSystem hdfs = null;
        FileStatus[] fstatus = null;
        try {
            hdfs = FileSystem.get(conf);
            System.out.println("connect HDFS: " + new URI("hdfs://localhost:9000"));
        } catch (Exception e) {
            System.err.println("Error on connect HDFS");
        }

        System.out.println("\n-------------昏鸽线--------------\n");
        System.out.println("\n-------------hdfs操作之: 1. create dir--------------\n");

        //hdfs操作之: 1. create dir
        try {
            if (!hdfs.exists(new Path("/liuwei0376"))) {
                hdfs.mkdirs(new Path("/liuwei0376"));
            }
        } catch (IOException e) {
            e.printStackTrace();
        }

        System.out.println("\n-------------昏鸽线--------------\n");
        System.out.println("\n-------------hdfs操作之: 2.1 create file(don't use IOUtils)--------------\n");

        //hdfs操作之: 2.1 create file(don't use IOUtils)
        String fsrc = "/Users/david/Downloads/accounts.json";
        FileInputStream fis = null;
        FSDataOutputStream fsDataOutputStream = null;
        try {
            fis = new FileInputStream(fsrc);
            Path path = new Path("/liuwei0376/mr/accounts.json");
            fsDataOutputStream = hdfs.create(path);
            byte[] buff = new byte[1024];
            int readCount = 0;
            readCount = fis.read(buff);
            while (readCount != -1) {
                fsDataOutputStream.write(buff, 0, readCount);
                readCount = fis.read(buff);//read next patch data
            }
            System.out.println(path + " create is over");
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            if (fis != null) {
                try {
                    fis.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }

            if (fsDataOutputStream != null) {
                try {
                    fsDataOutputStream.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }

        System.out.println("\n-------------昏鸽线--------------\n");
        System.out.println("\n-------------hdfs操作之: 2.2 create file(use IOUtils)--------------\n");

        //hdfs操作之: 2.2 create file(use IOUtils)
        FSDataOutputStream out2 = null;
        FileInputStream in2 = null;
        int buff2 = 1024;
        try {
            out2 = hdfs.create(new Path("/liuwei0376/mr/dependency.txt"));

            in2 = new FileInputStream("/Users/david/Downloads/dependency.txt");

            /**
             * copyBytes method usage
             *
             * in: origin file path
             * out: hdfs dir
             * buff2: buffer size
             * close: whether close the stream.
             */
            IOUtils.copyBytes(in2, out2, buff2, true);
        } catch (Exception e) {
            e.printStackTrace();
        }

        System.out.println("\n-------------昏鸽线--------------\n");
        System.out.println("\n-------------hdfs操作之: 3. upload local file(s)--------------\n");

        //hdfs操作之: 3. upload local file(s)
        /**
         * delSrc - whether to delete the src是否删除源文件
         overwrite - whether to overwrite an existing file是否覆盖已存在的文件
         srcs - array of paths which are source 可以上传多个文件数组方式
         dst – path 目标路径

         fileSystem.copyFromLocalFile(src, dst);
         fileSystem.copyFromLocalFile(delSrc, src, dst);
         fileSystem.copyFromLocalFile(delSrc, overwrite, src, dst);
         fileSystem.copyFromLocalFile(delSrc, overwrite, srcs, dst);
         */
        try {
            hdfs.copyFromLocalFile(
                    true,
                    true,
                    new Path("/Users/david/Downloads/selenium-java-3.0.0-beta2"),
                    new Path("/liuwei0376/mr/selenium-java-3.0.0-beta2")
            );
        } catch (IOException e) {
            System.out.println("error in copyFromLocalFile");
        }

        System.out.println("\n-------------昏鸽线--------------\n");
        System.out.println("\n-------------hdfs操作之: 4. rename file(s)--------------\n");

        //hdfs操作之: 4. rename file(s)
        /**
         * fileSystem.rename(src, dst);
         */
        try {
            hdfs.rename(
                    new Path("/liuwei0376/mr/selenium-java-3.0.0-beta2"),
                    new Path("/liuwei0376/mr/selenium-java-3.0.0-beta3")
            );
        } catch (IOException e) {
            System.err.println("hdfs.rename error");
        }

        System.out.println("\n-------------昏鸽线--------------\n");
        System.out.println("\n-------------hdfs操作之: 5. rename file(s)--------------\n");

        //hdfs操作之: 5. rename file(s)
        /**
         * True 表示递归删除
         * fileSystem.delete(new Path("/d1"), true);
         */
        try {
            hdfs.delete(new Path("/liuwei0376/mr/selenium-java-3.0.0-beta3"), true);
        } catch (IOException e) {
            System.err.println("hdfs.delete error");
        }

        System.out.println("\n-------------昏鸽线--------------\n");
        System.out.println("\n-------------hdfs操作之: 6. scan dirs and file information--------------\n");

        //hdfs操作之: 6. scan dirs and file information
        int i = 0;
        try {
            fstatus = hdfs.listStatus(new Path("/user/david/hadoop_java_files"));
            System.out.println(fstatus.length);

            //列出文件属性
            for (FileStatus fs : fstatus) {
                System.out.println("\n------- " + ++i + " -------");

                System.out.println("fs.getAccessTime() = " + fs.getAccessTime());
                System.out.println("fs.getGroup() = " + fs.getGroup());
                System.out.println("fs.getOwner() = " + fs.getOwner());
                System.out.println("fs.getBlockSize() = " + fs.getBlockSize());
                System.out.println("fs.getLen() = " + fs.getLen());
                System.out.println("fs.getModificationTime() = " + fs.getModificationTime());
                System.out.println("fs.getPath() = " + fs.getPath());
                System.out.println("fs.getPermission() = " + fs.getPermission());
                System.out.println("fs.getReplication() = " + fs.getReplication());
            }
        } catch (IOException e) {
            e.printStackTrace();
        }

        System.out.println("\n-------------昏鸽线--------------\n");
        System.out.println("\n-------------hdfs操作之: 7. 查找某个文件在HDFS集群的位置--------------\n");

        //hdfs操作之: 7. 查找某个文件在HDFS集群的位置
        /**
         * 注意: Path 必须是文件, 不能为目录
         */
        try {
            FileStatus fs7 = hdfs.getFileStatus(new Path("/liuwei0376/mr/accounts.json"));
            BlockLocation[] blockLocations = hdfs.getFileBlockLocations(fs7, 0, fs7.getLen());
            for (int j = 0, k = blockLocations.length; j < k; j++) {
                String[] hosts = blockLocations[j].getHosts();
                System.out.println("block_" + j + "_location: " + hosts[0]);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }

        System.out.println("\n-------------昏鸽线--------------\n");
        System.out.println("\n-------------hdfs操作之: 8. 获取HDFS集群上所有节点名称信息--------------\n");

        //hdfs操作之: 8. 获取HDFS集群上所有节点名称信息
        DistributedFileSystem distributedFileSystem = (DistributedFileSystem) hdfs;
        try {
            DatanodeInfo[] datanodeInfos = distributedFileSystem.getDataNodeStats();

            for (int n = 0, m = datanodeInfos.length; n < m; n++) {
                System.out.println("datanode_" + n + "_name: " + datanodeInfos[n].getHostName());
            }
        } catch (IOException e) {
            e.printStackTrace();
        }

        if (hdfs != null) {
            try {
                hdfs.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值