java API访问hadoop(含HA)--FileSystem

最新推荐文章于 2025-10-22 13:20:23 发布

原创最新推荐文章于 2025-10-22 13:20:23 发布 · 1.5k 阅读

3 ·

CC 4.0 BY-SA版权

文章标签：

#hadoop

博客介绍了使用Hadoop访问HDFS集群的两种模式。非HA模式下，直接在URI中写明hdfs地址；HA模式下，使用Hadoop Java API访问时，创建FileSystem对象需指定NameSpace和主备NameNode的IP及端口等信息。

文章目录

1 非HA
2 HA

1 非HA

直接在URI中写明hdfs地址即可。如：

    static FileSystem fs;

    static {
        try {
            fs = FileSystem.get(new URI("hdfs://cluster-host1:9000"),new Configuration(),"hadoop");
        } catch (IOException e) {
            e.printStackTrace();
        } catch (URISyntaxException e) {
            e.printStackTrace();
        }
    }
    @Test
    public void mkdir() throws IOException {
        String path = "/test/fs";
        fs.mkdirs(new Path(path),new FsPermission("755"));//绝对路径，相对路径会在每个用户下
    }
    public static void listFiles(String dirName)throws IOException {
        Path f = new Path(dirName);
        FileStatus[] status =fs.listStatus(f);
        System.out.println(dirName +" has all files:");
        for (int i = 0; i<status.length; i++) {
            System.out.println(status[i].getPath().toString());
            System.out.print("  | 是否目录："+status[i].isDirectory());
            System.out.print("  | 是否文件："+status[i].isFile());
            System.out.print("  | permission:"+status[i].getPermission());
            System.out.print("  | owner:"+status[i].getOwner());
            System.out.println();
        }
    }

2 HA

在使用Hadoop Java API访问HDFS集群时，在创建FileSystem对象时，直接指定NameNode的IP以及端口号即可。但是在HA模式下，访问HDFS集群却有一些不同，需要指定NameSpace和主备NameNode的IP以及端口等信息，具体操作方式见如下代码：

public class FileSystemHA {

    //方式一：这种方式测试通过
    @Test
    public void test1() throws Exception{
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS","hdfs://hadoop-ns1");//nameservices地址
        conf.set("dfs.nameservices", "hadoop-ns1");
        conf.set("dfs.ha.namenodes.hadoop-ns1", "nn1,nn2");
        conf.set("dfs.namenode.rpc-address.hadoop-ns1.nn1", "hadoop-master1:8020");
        conf.set("dfs.namenode.rpc-address.hadoop-ns1.nn2", "hadoop-master2:8020");
        conf.set("dfs.client.failover.proxy.provider.ns1", "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider");
        FileSystem fs = FileSystem.get(URI.create("hdfs://hadoop-ns1"), conf, "hadoop");

        String dirName = "/";
        Path f = new Path(dirName);
        FileStatus[] status =fs.listStatus(f);
        System.out.println(dirName +" has all files:");
        for (int i = 0; i<status.length; i++) {
            System.out.println(status[i].getPath().toString());
            System.out.print("  | 是否目录："+status[i].isDirectory());
            System.out.print("  | 是否文件："+status[i].isFile());
            System.out.print("  | permission:"+status[i].getPermission());
            System.out.print("  | owner:"+status[i].getOwner());
            System.out.println();
        }
    }

    //方式二：这种方式ok了。配置文件是从hadoop-master1上复制的
    //有用的配置参考方式一的k-v
    @Test
    public void test2() throws Exception{
        Configuration conf = new Configuration();
        conf.addResource(new Path("core-site.xml"));
        conf.addResource(new Path("hdfs-site.xml"));
        FileSystem fs = FileSystem.get(conf);
        String dirName = "/";
        Path f = new Path(dirName);
        FileStatus[] status =fs.listStatus(f);
        System.out.println(dirName +" has all files:");
        for (int i = 0; i<status.length; i++) {
            System.out.println(status[i].getPath().toString());
            System.out.print("  | 是否目录："+status[i].isDirectory());
            System.out.print("  | 是否文件："+status[i].isFile());
            System.out.print("  | permission:"+status[i].getPermission());
            System.out.print("  | owner:"+status[i].getOwner());
            System.out.println();
        }
    }

}