<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.4</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.4</version>
</dependency>
</dependencies>
import org.apache.poi.ss.formula.functions.T;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.List;
/**
* @Author: gh
* @Description: 查询文件列表,删除文件,下载文件,新增文件,某个路径下的文件数量;
* hdfs利用率,hdfs存储大小,获取文件的字节数组
*/
public interface HdfsDao {
///opt/cloudera/parcels/CDH-5.14.2-1.cdh5.14.2.p0.3/lib/hadoop/etc/hadoop
//查询文件列表
public String listPath(String path);
/**
* 下载文件到本地
* @param src fs上的文件路径地址
* @param dst 本地存放的地址
* @return 1表示下载成功 ,-1表示下载失败
*/
public int copyFileToLocal(String src,String dst);
/**
* 下载文件到客户端
* @param resp 对客户端请求的响应
* @param src fs上的文件路径地址
* @return 1表示下载成功 ,-1表示下载失败
*/
public int copyFileToClient(HttpServletResponse resp,String src);
/**
* 上传客户端文件到远程Hadoop文件系统。
* @param req 客户端请求
* @param hdfsPath HDFS文件路径
* @return 1表示上传成功 ,-1表示上传失败
*/
public int uploadToHDFS(HttpServletRequest req,String hdfsPath);
/**
* 删除HDFS上的文件OR文件夹(删除文件夹下的所有内容)
* @param hdfsPath HDFS路径
* @return 1表示删除成功 ,-1表示删除失败
*/
public int deleteHDFSFile(String hdfsPath);
/**
* 统计某个路径下(次级目录)的文件总数。
* @param pathName 路径的名称
* @return
*/
public long totalCount(String pathName);
/**
* 统计某个路径(相当于数据库)次级目录下所有的文件(相当于表)的数量
* @param dirPath 文件夹路径
* @return 文件的数量
*/
public int countTables(String dirPath);
/**
* 获取某个文件夹路径(相当于数据库)下所有文件(相当于表)的名称列表。
* @return
*/
public List<String> getTablenamesOfDB(String dirPath);
/**
* Non DFS used = ( Total Disk Space - Reserved Space) - Remaining Space - DFS Used
* hdfs使用率 = DFS used / DFS space
* = DFS used / ( Total Disk Space - Reserved Space - Remaining Space)
* DFS used = bpUsed(block pool)
* Reserved Space = dfs.datanode.du.reserved
* @return
*/
public double usedRate();
/**
* @param path 需要查询的路径
* @return (某个路径下)已使用存储空间的大小
*/
public double storeSizeOfMB(String path);
/**
* @return 全部已使用存储空间的大小
*/
public double storeSizeOfMB();
/**
* 获取文件的字节数组
* @param filePath 文件路径
* @return
*/
public InputStream getFileBytes(String filePath);
}
import com.alibaba.fastjson.JSONObject;
import com.genius.pojo.pg.dto.DataBaseDTO;
import com.google.common.base.Charsets;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.hdfs.HAUtil;
import org.springframework.util.StringUtils;
import javax.servlet.ServletOutputStream;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.*;
import java.net.InetSocketAddress;
import java.net.URI;
import java.net.URL;
import java.net.URLConnection;
import java.util.*;
/**
* @Author: gh
* @Description: 连接访问HDFS,增删查下载文件。
*/
public class HdfsDaoImpl implements HdfsDao{
FileSystem fs = null;
DataBaseDTO dataBaseDTO = null;
//CapacityTotalGB = 磁盘总空间Total Disk Space(Configured Capacity)
//CapacityRemainingGB = 磁盘剩余空间Remaining Space(DFS Remaining)
//CapacityUsedGB = DFS已使用空间(DFS Used)
//磁盘预留空间 = dfs.datanode.du.reserved
final String JMX_QRY = "/jmx?qry=Hadoop:service=NameNode,name=FSNamesystem";
public HdfsDaoImpl(DataBaseDTO dbd) {
// System.setProperty("hadoop.home.dir", "C:/hadoop-2.6.0");
dataBaseDTO = dbd;
fs = connectToHDFS();
}
public FileSystem getFs() {
return fs;
}
public void setFs(FileSystem fs) {
this.fs = fs;
}
/**
* 关闭文件系统
*/
public void close(){
try{
if(this.fs != null){
this.fs.close();
}
}catch(IOException e){
e.printStackTrace();
}
}
public boolean connected(){
try{
if(getFs() == null){
return false;
}
/*InetSocketAddress isa = HAUtil.getAddressOfActive(getFs());
String hostString = isa.getHostString();*/
boolean flag = getFs().exists(new Path(this.dataBaseDTO.getDbName()));
if(flag){
return true;
}
}catch(IOException e){
e.printStackTrace();
}
return false;
}
@Override
public double usedRate() {
double rate = 0.0;
Configuration conf = fs.getConf();
//conf.set("fs.defaultFS", "hdfs://"+dataBaseDTO.getIp()+":"+dataBaseDTO.getHost());
//conf.set("dfs.namenode.http-address","http://zya3:9870");
//单位:byte
final long reservedSpace = conf.getLongBytes("dfs.datanode.du.reserved", 0);
FsStatus fsStatus = null;
try {
fsStatus = fs.getStatus();
final long capacityTotal = fsStatus.getCapacity(); //byte
final long capacityUsed = fsStatus.getUsed();//byte
final long capacityRemaining = fsStatus.getRemaining();//byte
//System.out.println("capacity: "+capacityTotal+" ,used: "+capacityUsed+" ,remain: "+capacityRemaining);
//rate = DFS used / ( Total Disk Space - Reserved Space - Remaining Space)
rate = (double)capacityUsed / (capacityTotal - reservedSpace - capacityRemaining);
} catch (IOException e) {
e.printStackTrace();
}finally {
return Double.parseDouble(String.format("%.2f",rate*100));
}
//NOTE:以下代码也可以获取使用空间,问题时namenode webUI 端口从50070变成了9870,无法动态获取到。
/*try {
List<DFSUtil.ConfiguredNNAddress> nns = DFSUtil.flattenAddressMap(
DFSUtil.getNNServiceRpcAddresses(conf));
for (DFSUtil.ConfiguredNNAddress nnAddress : nns) {
InetSocketAddress addr = nnAddress.getAddress();
URI uri = DFSUtil.getInfoServer(addr, conf, DFSUtil.getHttpClientScheme(conf));
URL url = new URL(uri.toURL(), JMX_QRY);
String result = readOutput(url);
JSONObject jo = JSON.parseObject(result)
.getJSONArray("beans")
.getJSONObject(0);
final long capacityTotal = jo.getLongValue("CapacityTotal"); //byte
final long capacityUsed = jo.getLongValue("CapacityUsed");//byte
final long capacityRemaining = jo.getLongValue("CapacityRemaining");//byte
//rate = DFS used / ( Total Disk Space - Reserved Space - Remaining Space)
rate = (double)capacityUsed / (capacityTotal - reservedSpace - capacityRemaining);
System.out.println("rate: "+rate);
}
} catch (Exception e) {
e.printStackTrace();
}*/
}
private String readOutput(URL url) throws IOException {
StringBuilder out = new StringBuilder();
URLConnection connection = url.openConnection();
BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream(), Charsets.UTF_8));
String inputLine;
while ((inputLine = in.readLine()) != null) {
out.append(inputLine);
}
in.close();
return out.toString();
}
/**
* 通过HTTP REST API来访问HDFS中的文件系统(FileSystem/FileContext)接口。
* 安全通信:kerberos
* HDFS文件系统的路径
* @return
*/
private FileSystem connectToHDFS() {
try {
if(fs == null){
Configuration conf = new Configuration();
//读取resources目录下的配置文件
//fs = FileSystem.get(conf);
//不读配置文件
String ip = dataBaseDTO.getIp();
String port = dataBaseDTO.getHost();
//TODO: 用户名、密码做校验
fs = FileSystem.get(new URI("hdfs://"+ip+":"+port),conf,"hdfs");
// FSDataInputStream fis = fs.open(new Path(dataBaseDTO.getDbName()));
return fs;
}
} catch (Exception e) {
e.printStackTrace();
}
return fs;
}
/**
* 获取给定路径下的目录和文件信息。
* @param path
*/
@Override
public String listPath(String path){
Map<String,Object> map = new HashMap<>();
iteratePath(path,map);
JSONObject ob = new JSONObject();
ob.putAll(map);
return ob.toJSONString();
}
private void iteratePath(String path,Map<String,Object> map){
try {
Path p = new Path(path);
if(fs.exists(p)){
//RemoteIterator<LocatedFileStatus> it= fs.listFiles(path, true);
//NOTE:用户名不一样时会有读取权限问题:
// org.apache.hadoop.ipc.RemoteException: Permission denied
//org.apache.hadoop.security.AccessControlException
FileStatus[] statuses = fs.listStatus(p);
for (FileStatus status : statuses) {
String name = status.getPath().getName();
map.put(name,"");
if(status.isDirectory()){
Map<String,Object>tmp = new HashMap<>();
map.put(name, tmp);
iteratePath(status.getPath().toString(),tmp);
}
}
}else{
System.out.printf("path(%s) NOT exist.\n",path);
}
} catch (Exception e) {
e.printStackTrace();
}
}
@Override
public long totalCount(String pathName) {
return getTablenamesOfDB(pathName).size();
}
@Override
public int countTables(String dirPath) {
return (int)totalCount(dirPath);
}
@Override
public List<String> getTablenamesOfDB(String dirPath) {
Path p = new Path(dirPath);
List<String> list = new ArrayList<>();
try{
if(fs.exists(p)){
FileStatus[] statuses = fs.listStatus(p);
for (FileStatus status : statuses) {
if(status.isFile()){
list.add(status.getPath().toUri().getPath());
}
}
}
}catch (Exception e) {
e.printStackTrace();
}finally{
return list;
}
}
//封装文件信息返回,用于列表展示
public Map<String,Object> extractFile(String filePath){
Map<String,Object> rowDatas = new HashMap<>();
List<Map<String,Object>> data = new ArrayList<>();
Set<String> fields = new HashSet<>();
//判断是否是文件
try{
Path p = new Path(filePath);
boolean existed = fs.exists(p);
boolean isFile = fs.isFile(p);
if(existed && isFile){
Map<String, Object> oneRowData = new HashMap<>();
int sep = filePath.lastIndexOf("/");
oneRowData.put("ID", filePath);
oneRowData.put("文件路径", filePath);
oneRowData.put("文件名称", filePath.substring(sep+1));
data.add(oneRowData);
fields.add("ID");
fields.add("文件路径");
fields.add("文件名称");
rowDatas.put("data", data);
rowDatas.put("fields", fields);
rowDatas.put("pk", "ID");
}
}catch (Exception e){
e.printStackTrace();
}
return rowDatas;
}
@Override
public int copyFileToLocal(String src,String dst) {
try {
Path srcPath = new Path(src);
Path dstPath = new Path(dst);
boolean existed = fs.exists(srcPath);
boolean isFile = fs.isFile(srcPath);
if(existed && isFile){
//默认不删除被下载的文件
fs.copyToLocalFile(false,srcPath, dstPath,true);
return 1; //下载成功
}
} catch (Exception e) {
e.printStackTrace();
}
return -1;//下载失败
}
@Override
public int copyFileToClient(HttpServletResponse resp,String src) {
FSDataInputStream fis = null;
ServletOutputStream sos = null;
try {
Path srcPath = new Path(src);
boolean existed = fs.exists(srcPath);
boolean isFile = fs.isFile(srcPath);
if(existed && isFile){
//默认不删除被下载的文件
fis = fs.open(srcPath, 8192);
sos = resp.getOutputStream();
byte[]buffer=new byte[8192];
int read = 0;
while((read = fis.read(buffer)) != -1){
sos.write(buffer, 0, read);
}
return 1; //下载成功
}
} catch (Exception e) {
e.printStackTrace();
}finally{
try{
if(sos != null){
sos.close();
}
if(fis != null){
fis.close();
}
}catch(IOException e){}
}
return -1;//下载失败
}
@Override
public int uploadToHDFS(HttpServletRequest req, String hdfsPath) {
BufferedReader reader = null;
FSDataOutputStream fos = null;
try {
reader = req.getReader();
//不需要检查文件路径是否存在,如果不存在,就自动创建。
Path path = new Path(hdfsPath);
//不允许重复上传
fs.getConf().setInt("io.file.buffer.size", 8192);
fos = fs.create(path, false);
int read = 0;
while((read = reader.read())!=-1){
fos.write(read);
}
return 1; //上传成功
} catch (IOException e) {
e.printStackTrace();
}finally{
try {
if(reader != null){
reader.close();
}
if(fos != null){
fos.close();
}
} catch (Exception e) {
e.printStackTrace();
}
}
return -1; //上传失败
}
@Override
public int deleteHDFSFile(String hdfsPath) {
try {
Path path = new Path(hdfsPath);
//不需要检验path是否存在,如果不存在就自动删除失败了。
//循环删除文件夹下的内容
boolean isDeleted = fs.delete(path, true);
if(isDeleted){
return 1; //删除成功
}
} catch (IOException e) {
e.printStackTrace();
}
return -1; //删除失败
}
@Override
public double storeSizeOfMB(String path) {
try {
if(StringUtils.isEmpty(path)){
//查询fs下所有文件
long used = fs.getStatus().getUsed(); //bytes
return Double.parseDouble(String.format("%.2f",(double)used / (1024*1024)));
}else if(fs.exists(new Path(path))){
//查询相应路径下的文件
List<String> pathList = new ArrayList<>();
getAllFilesPath(new Path(path),pathList);
long used = 0;
for (String filePath : pathList) {
used += fs.getFileStatus(new Path(filePath)).getLen();//bytes
}
return Double.parseDouble(String.format("%.2f",(double)used / (1024*1024)));
}
}catch (IOException e) {
e.printStackTrace();
}
return 0.0;
}
@Override
public double storeSizeOfMB() {
return storeSizeOfMB(null);
}
//获取某个路径(文件夹路径或文件路径)下所有的文件路径的列表
private void getAllFilesPath(Path path,List<String> pathList){
try{
if(fs.exists(path)){
if(fs.isFile(path)){
pathList.add(fs.getFileStatus(path).getPath().toUri().getPath());
}else{
FileStatus[] statuses = fs.listStatus(path);
for (FileStatus status : statuses) {
getAllFilesPath(status.getPath(), pathList);
}
}
}
}catch(Exception e){
e.printStackTrace();
}
}
//TODO: 返回视频文件给前端:http://www.it1352.com/888048.html
@Override
public FSDataInputStream getFileBytes(String filePath) {
try {
if(StringUtils.isEmpty(filePath)){
return null;
}
Path p = new Path(filePath);
boolean existed = fs.exists(p);
boolean isFile = fs.isFile(p);
if(existed && isFile){
return fs.open(p);
}
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
public class PathInfos implements Comparable<PathInfos>{
//文件夹或文件的名称
String name;
PathInfos children;
//List<PathInfos> children;
public PathInfos(String name) {
this.name = name;
//this.children = new ArrayList<>();
}
public PathInfos() {}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public PathInfos getChildren() {
return children;
}
public void setChildren(PathInfos children) {
this.children = children;
}
/*public List<PathInfos> getChildren() {
return children;
}
public void setChildren(List<PathInfos> children) {
this.children = children;
}
public void addChildren(PathInfos path){
this.children.add(path);
}*/
@Override
public String toString() {
return "{\""+name+"\":"+children+"}";
}
@Override
public int compareTo(PathInfos o) {
return 1;
}
}
}