HDFS API 使用示例

本文详细介绍了如何使用Java API操作Hadoop Distributed File System (HDFS),包括文件的读写、目录管理等核心操作,是大数据开发者的实用指南。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

 <dependencies>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>2.7.4</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.7.4</version>
        </dependency>    
 </dependencies>

import org.apache.poi.ss.formula.functions.T;

import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.List;

/**
 * @Author: gh
 * @Description: 查询文件列表,删除文件,下载文件,新增文件,某个路径下的文件数量;
 * hdfs利用率,hdfs存储大小,获取文件的字节数组
 */
public interface HdfsDao {
///opt/cloudera/parcels/CDH-5.14.2-1.cdh5.14.2.p0.3/lib/hadoop/etc/hadoop
    //查询文件列表
    public String listPath(String path);

    /**
     * 下载文件到本地
     * @param src  fs上的文件路径地址
     * @param dst  本地存放的地址
     * @return 1表示下载成功 ,-1表示下载失败
     */
    public int copyFileToLocal(String src,String dst);

    /**
     * 下载文件到客户端
     * @param resp 对客户端请求的响应
     * @param src fs上的文件路径地址
     * @return 1表示下载成功 ,-1表示下载失败
     */
    public int copyFileToClient(HttpServletResponse resp,String src);

    /**
     * 上传客户端文件到远程Hadoop文件系统。
     * @param req 客户端请求
     * @param hdfsPath HDFS文件路径
     * @return 1表示上传成功 ,-1表示上传失败
     */
    public int uploadToHDFS(HttpServletRequest req,String hdfsPath);

    /**
     * 删除HDFS上的文件OR文件夹(删除文件夹下的所有内容)
     * @param hdfsPath HDFS路径
     * @return 1表示删除成功 ,-1表示删除失败
     */
    public int deleteHDFSFile(String hdfsPath);
    /**
     * 统计某个路径下(次级目录)的文件总数。
     * @param pathName 路径的名称
     * @return
     */
    public long totalCount(String pathName);
    /**
     * 统计某个路径(相当于数据库)次级目录下所有的文件(相当于表)的数量
     * @param dirPath 文件夹路径
     * @return 文件的数量
     */
    public int countTables(String dirPath);
    /**
     * 获取某个文件夹路径(相当于数据库)下所有文件(相当于表)的名称列表。
     * @return
     */
    public List<String> getTablenamesOfDB(String dirPath);
    /**
     * Non DFS used = ( Total Disk Space - Reserved Space) - Remaining Space - DFS Used
     * hdfs使用率 = DFS used / DFS space
     *           = DFS used / ( Total Disk Space - Reserved Space - Remaining Space)
     * DFS used = bpUsed(block pool)
     * Reserved Space = dfs.datanode.du.reserved
     * @return
     */
    public double usedRate();
    /**
     * @param path 需要查询的路径
     * @return (某个路径下)已使用存储空间的大小
     */
    public double storeSizeOfMB(String path);
    /**
     * @return 全部已使用存储空间的大小
     */
    public double storeSizeOfMB();

    /**
     * 获取文件的字节数组
     * @param filePath 文件路径
     * @return
     */
    public InputStream getFileBytes(String filePath);
}

import com.alibaba.fastjson.JSONObject;
import com.genius.pojo.pg.dto.DataBaseDTO;
import com.google.common.base.Charsets;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.hdfs.HAUtil;
import org.springframework.util.StringUtils;

import javax.servlet.ServletOutputStream;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.*;
import java.net.InetSocketAddress;
import java.net.URI;
import java.net.URL;
import java.net.URLConnection;
import java.util.*;

/**
 * @Author: gh
 * @Description: 连接访问HDFS,增删查下载文件。
 */
public class HdfsDaoImpl implements HdfsDao{

    FileSystem fs = null;
    DataBaseDTO dataBaseDTO = null;
    //CapacityTotalGB = 磁盘总空间Total Disk Space(Configured Capacity)
    //CapacityRemainingGB = 磁盘剩余空间Remaining Space(DFS Remaining)
    //CapacityUsedGB = DFS已使用空间(DFS Used)
    //磁盘预留空间 = dfs.datanode.du.reserved
    final String JMX_QRY = "/jmx?qry=Hadoop:service=NameNode,name=FSNamesystem";

    public HdfsDaoImpl(DataBaseDTO dbd) {
       // System.setProperty("hadoop.home.dir", "C:/hadoop-2.6.0");
        dataBaseDTO = dbd;
        fs = connectToHDFS();
    }

    public FileSystem getFs() {
        return fs;
    }
    public void setFs(FileSystem fs) {
        this.fs = fs;
    }

    /**
     * 关闭文件系统
     */
    public void close(){
        try{
            if(this.fs != null){
                this.fs.close();
            }
        }catch(IOException e){
            e.printStackTrace();
        }
    }
    public boolean connected(){
        try{
            if(getFs() == null){
                return false;
            }
            /*InetSocketAddress isa = HAUtil.getAddressOfActive(getFs());
            String hostString = isa.getHostString();*/
            boolean flag = getFs().exists(new Path(this.dataBaseDTO.getDbName()));
            if(flag){
                return true;
            }
        }catch(IOException e){
            e.printStackTrace();
        }
        return false;
    }
    @Override
    public double usedRate() {
        double rate = 0.0;
        Configuration conf = fs.getConf();
        //conf.set("fs.defaultFS", "hdfs://"+dataBaseDTO.getIp()+":"+dataBaseDTO.getHost());
        //conf.set("dfs.namenode.http-address","http://zya3:9870");
        //单位:byte
        final long reservedSpace = conf.getLongBytes("dfs.datanode.du.reserved", 0);
        FsStatus fsStatus = null;
        try {
            fsStatus = fs.getStatus();
            final long capacityTotal = fsStatus.getCapacity(); //byte
            final long capacityUsed = fsStatus.getUsed();//byte
            final long capacityRemaining = fsStatus.getRemaining();//byte
            //System.out.println("capacity: "+capacityTotal+" ,used: "+capacityUsed+" ,remain: "+capacityRemaining);
            //rate = DFS used / ( Total Disk Space - Reserved Space - Remaining Space)
            rate = (double)capacityUsed / (capacityTotal - reservedSpace - capacityRemaining);
        } catch (IOException e) {
            e.printStackTrace();
        }finally {
            return Double.parseDouble(String.format("%.2f",rate*100));
        }
        //NOTE:以下代码也可以获取使用空间,问题时namenode webUI 端口从50070变成了9870,无法动态获取到。
        /*try {
            List<DFSUtil.ConfiguredNNAddress> nns = DFSUtil.flattenAddressMap(
                    DFSUtil.getNNServiceRpcAddresses(conf));
            for (DFSUtil.ConfiguredNNAddress nnAddress : nns) {
                InetSocketAddress addr = nnAddress.getAddress();
                URI uri = DFSUtil.getInfoServer(addr, conf, DFSUtil.getHttpClientScheme(conf));
                
                URL url = new URL(uri.toURL(), JMX_QRY);
                String result = readOutput(url);
                JSONObject jo = JSON.parseObject(result)
                                    .getJSONArray("beans")
                                    .getJSONObject(0);
                final long capacityTotal = jo.getLongValue("CapacityTotal"); //byte
                final long capacityUsed = jo.getLongValue("CapacityUsed");//byte
                final long capacityRemaining = jo.getLongValue("CapacityRemaining");//byte
                //rate = DFS used / ( Total Disk Space - Reserved Space - Remaining Space)
                rate = (double)capacityUsed / (capacityTotal - reservedSpace - capacityRemaining);
                System.out.println("rate:  "+rate);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }*/
    }
    private  String readOutput(URL url) throws IOException {
        StringBuilder out = new StringBuilder();
        URLConnection connection = url.openConnection();
        BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream(), Charsets.UTF_8));
        String inputLine;
        while ((inputLine = in.readLine()) != null) {
            out.append(inputLine);
        }
        in.close();
        return out.toString();
    }

    /**
     * 通过HTTP REST API来访问HDFS中的文件系统(FileSystem/FileContext)接口。
     * 安全通信:kerberos
     * HDFS文件系统的路径
     * @return
     */
    private FileSystem connectToHDFS()  {
        try {
            if(fs == null){
                Configuration conf = new Configuration();
              
                //读取resources目录下的配置文件
                //fs = FileSystem.get(conf);
                //不读配置文件
                String ip = dataBaseDTO.getIp();
                String port = dataBaseDTO.getHost();
                //TODO: 用户名、密码做校验
                fs = FileSystem.get(new URI("hdfs://"+ip+":"+port),conf,"hdfs");
                // FSDataInputStream fis = fs.open(new Path(dataBaseDTO.getDbName()));
                return fs;
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return fs;
    }

    /**
     * 获取给定路径下的目录和文件信息。
     * @param path
     */
    @Override
    public String listPath(String path){
        Map<String,Object> map = new HashMap<>();
        iteratePath(path,map);
        JSONObject ob = new JSONObject();
        ob.putAll(map);
        return ob.toJSONString();
    }
    private void iteratePath(String path,Map<String,Object> map){
        try {
            Path p = new Path(path);
            if(fs.exists(p)){
                //RemoteIterator<LocatedFileStatus> it= fs.listFiles(path, true);
                //NOTE:用户名不一样时会有读取权限问题:
                // org.apache.hadoop.ipc.RemoteException: Permission denied
                //org.apache.hadoop.security.AccessControlException
                FileStatus[] statuses = fs.listStatus(p);
                for (FileStatus status : statuses) {
                    String name = status.getPath().getName();
                    map.put(name,"");
                    if(status.isDirectory()){
                        Map<String,Object>tmp = new HashMap<>();
                        map.put(name, tmp);
                        iteratePath(status.getPath().toString(),tmp);
                    }
                }
            }else{
                System.out.printf("path(%s) NOT exist.\n",path);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    @Override
    public long totalCount(String pathName) {
        return getTablenamesOfDB(pathName).size();
    }
    @Override
    public int countTables(String dirPath) {
        return (int)totalCount(dirPath);
    }
    @Override
    public List<String> getTablenamesOfDB(String dirPath) {
        Path p = new Path(dirPath);
        List<String> list = new ArrayList<>();
        try{
            if(fs.exists(p)){
                FileStatus[] statuses = fs.listStatus(p);

                for (FileStatus status : statuses) {
                    if(status.isFile()){
                        list.add(status.getPath().toUri().getPath());
                    }
                }
            }
        }catch (Exception e) {
            e.printStackTrace();
        }finally{
            return list;
        }
    }
    //封装文件信息返回,用于列表展示
    public Map<String,Object> extractFile(String filePath){
        Map<String,Object> rowDatas = new HashMap<>();
        List<Map<String,Object>> data = new ArrayList<>();
        Set<String> fields = new HashSet<>();
        //判断是否是文件
        try{
            Path p = new Path(filePath);
            boolean existed = fs.exists(p);
            boolean isFile = fs.isFile(p);
            if(existed && isFile){
                Map<String, Object> oneRowData = new HashMap<>();
                int sep = filePath.lastIndexOf("/");
                oneRowData.put("ID", filePath);
                oneRowData.put("文件路径", filePath);
                oneRowData.put("文件名称", filePath.substring(sep+1));
                data.add(oneRowData);
                fields.add("ID");
                fields.add("文件路径");
                fields.add("文件名称");
                rowDatas.put("data", data);
                rowDatas.put("fields", fields);
                rowDatas.put("pk", "ID");
            }
        }catch (Exception e){
            e.printStackTrace();
        }
        return rowDatas;
    }
    @Override
    public int copyFileToLocal(String src,String dst) {
        try {
            Path srcPath = new Path(src);
            Path dstPath = new Path(dst);
            boolean existed = fs.exists(srcPath);
            boolean isFile = fs.isFile(srcPath);
            if(existed && isFile){
                //默认不删除被下载的文件
                fs.copyToLocalFile(false,srcPath, dstPath,true);
                return 1; //下载成功
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return -1;//下载失败
    }

    @Override
    public int copyFileToClient(HttpServletResponse resp,String src) {
        FSDataInputStream fis = null;
        ServletOutputStream sos = null;
        try {
            Path srcPath = new Path(src);
            boolean existed = fs.exists(srcPath);
            boolean isFile = fs.isFile(srcPath);
            if(existed && isFile){
                //默认不删除被下载的文件
                fis = fs.open(srcPath, 8192);
                sos = resp.getOutputStream();
                byte[]buffer=new byte[8192];
                int read = 0;
                while((read = fis.read(buffer)) != -1){
                    sos.write(buffer, 0, read);
                }
                return 1; //下载成功
            }
        } catch (Exception e) {
            e.printStackTrace();
        }finally{
            try{
                if(sos != null){
                    sos.close();
                }
                if(fis != null){
                    fis.close();
                }
            }catch(IOException e){}
        }
        return -1;//下载失败
    }

    @Override
    public int uploadToHDFS(HttpServletRequest req, String hdfsPath) {
        BufferedReader reader = null;
        FSDataOutputStream fos = null;
        try {
            reader = req.getReader();
            //不需要检查文件路径是否存在,如果不存在,就自动创建。
            Path path = new Path(hdfsPath);
            //不允许重复上传
            fs.getConf().setInt("io.file.buffer.size", 8192);
            fos = fs.create(path, false);
            int read = 0;
            while((read = reader.read())!=-1){
                fos.write(read);
            }
            return 1; //上传成功
        } catch (IOException e) {
            e.printStackTrace();
        }finally{
            try {
                if(reader != null){
                    reader.close();
                }
                if(fos != null){
                    fos.close();
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        return -1; //上传失败
    }
    @Override
    public int deleteHDFSFile(String hdfsPath) {
        try {
            Path path = new Path(hdfsPath);
            //不需要检验path是否存在,如果不存在就自动删除失败了。
            //循环删除文件夹下的内容
            boolean isDeleted = fs.delete(path, true);
            if(isDeleted){
                return 1; //删除成功
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return -1; //删除失败
    }
    @Override
    public double storeSizeOfMB(String path) {
        try {
            if(StringUtils.isEmpty(path)){
                //查询fs下所有文件
                long used = fs.getStatus().getUsed(); //bytes
                return Double.parseDouble(String.format("%.2f",(double)used / (1024*1024)));
            }else if(fs.exists(new Path(path))){
                //查询相应路径下的文件
                List<String> pathList = new ArrayList<>();
                getAllFilesPath(new Path(path),pathList);
                long used = 0;
                for (String filePath : pathList) {
                    used += fs.getFileStatus(new Path(filePath)).getLen();//bytes
                }
                return Double.parseDouble(String.format("%.2f",(double)used / (1024*1024)));
            }
        }catch (IOException e) {
            e.printStackTrace();
        }
        return 0.0;
    }
    @Override
    public double storeSizeOfMB() {
        return storeSizeOfMB(null);
    }
    //获取某个路径(文件夹路径或文件路径)下所有的文件路径的列表
    private void getAllFilesPath(Path path,List<String> pathList){
        try{
            if(fs.exists(path)){
                if(fs.isFile(path)){
                    pathList.add(fs.getFileStatus(path).getPath().toUri().getPath());
                }else{
                    FileStatus[] statuses = fs.listStatus(path);
                    for (FileStatus status : statuses) {
                        getAllFilesPath(status.getPath(), pathList);
                    }
                }
            }
        }catch(Exception e){
            e.printStackTrace();
        }
    }
    //TODO: 返回视频文件给前端:http://www.it1352.com/888048.html
    @Override
    public FSDataInputStream getFileBytes(String filePath) {
        try {
            if(StringUtils.isEmpty(filePath)){
                return null;
            }
            Path p = new Path(filePath);
            boolean existed = fs.exists(p);
            boolean isFile = fs.isFile(p);
            if(existed && isFile){
                return fs.open(p);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return null;
    }
    public class PathInfos implements Comparable<PathInfos>{
        //文件夹或文件的名称
        String name;
        PathInfos children;
        //List<PathInfos> children;
        public PathInfos(String name) {
            this.name = name;
            //this.children = new ArrayList<>();
        }
        public PathInfos() {}
        public String getName() {
            return name;
        }
        public void setName(String name) {
            this.name = name;
        }
        public PathInfos getChildren() {
            return children;
        }

        public void setChildren(PathInfos children) {
            this.children = children;
        }

        /*public List<PathInfos> getChildren() {
                    return children;
                }
                public void setChildren(List<PathInfos> children) {
                    this.children = children;
                }
                public void addChildren(PathInfos path){
                    this.children.add(path);
                }*/
        @Override
        public String toString() {
            return "{\""+name+"\":"+children+"}";
        }
        @Override
        public int compareTo(PathInfos o) {
            return 1;
        }
    }

}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值