前言:
本篇文章讲述在java对hdfs系统中的文件进行操作的过程
环境:
1、hadoop集群已经搭建完成;
2、java的开发环境意见搭建完成;
3、使用maven搭建eclipse项目(可以确保jar的完整);
代码:
package com.lu.maven;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.builder.ToStringBuilder;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.Progressable;
public class TestMaven {
public static String basePath = "hdfs://mn1:8020";//此处的basepath对应core.xml中的fs.defaultFS
public static String fileRootPath = basePath
+ "/input";//操作的路径
/**
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
System.setProperty("hadoop.home.dir", "D:\\hadoop-common-2.2.0-bin-master");
String directoryName = "books2";
createDirectory(directoryName);//【1】创建文件夹
//【2】 删除文件夹
deleteDirectory(directoryName);
//【3】 显示文件
listAll("/input");
//【4】 文件上传
File file = new File("E:/apache-maven-3.5.2-bin.zip");
Path destPath = new Path(fileRootPath,directoryName+"/"+file.getName());
uploadLocalFileToHDFS(file, destPath);
//【5】文件下载
downloadFileFromHDFS(destPath, new File("D:/"));
//【6】删除文件
boolean flag = deleteFile(destPath);
System.out.println(flag);
}
public static void uploadLocalFileToHDFS(File localFile, Path destPath)
throws Exception {
System.out.println("开始上传文档:"+localFile.getName());
Configuration config = new Configuration();
FileSystem.setDefaultUri(config, new URI(basePath));
FileSystem hdfs = FileSystem.get(config);
// Path dst = new Path(fileRootPath,destPath);
// hdfs.copyFromLocalFile(src, dst);
FSDataOutputStream out = hdfs.create(destPath,new Progressable() {
public void progress() {
// System.out.println("文件进度");
}
});
InputStream in = new BufferedInputStream(new FileInputStream(localFile));
IOUtils.copy(in, out);
hdfs.close();
System.out.println("下载文档:"+localFile.getName()+"结束");
}
/**
* 文件下载
* @param destPath
* @param localDir
* @throws Exception
*/
public static void downloadFileFromHDFS(Path destPath,File localDir) throws Exception {
System.out.println("开始下载文档:"+localDir.getName());
Configuration config = new Configuration();
FileSystem.setDefaultUri(config, new URI(basePath));
FileSystem hdfs = FileSystem.get(config);
if (hdfs.exists(destPath)) {
FSDataInputStream in = hdfs.open(destPath);
FileStatus stat = hdfs.getFileStatus(destPath);
byte[] buffer = new byte[Integer.parseInt(String.valueOf(stat.getLen()))];
in.readFully(0, buffer);
in.close();
hdfs.close();
IOUtils.write(buffer, new FileOutputStream(localDir+"/"+destPath.getName()));
}
System.out.println("下载文档:"+localDir.getName()+"结束");
}
/**
* 删除文件
* @param destPath
* @throws Exception
*/
public static boolean deleteFile(Path destPath)throws Exception {
Configuration config = new Configuration();
FileSystem.setDefaultUri(config, new URI(basePath));
FileSystem hdfs = FileSystem.get(config);
if (hdfs.exists(destPath)) {
System.out.println("删除成功!");
return hdfs.delete(destPath,true);
}
return false;
}
public static void listAll(String dir) throws Exception {
Configuration config = new Configuration();
FileSystem.setDefaultUri(config, new URI(basePath));
FileSystem fs = FileSystem.get(config);
FileStatus[] stats = fs.listStatus(new Path(basePath,dir));
for (int i = 0; stats != null && i < stats.length; ++i) {
// System.out.println(ToStringBuilder.reflectionToString(stats[i]));
if (!stats[i].isDir()) {
// regular file
System.out.println("文件:"+stats[i].getPath().toString()+"===="+stats[i].getGroup());
} else if (stats[i].isDir()) {
// dir
System.out.println("文件夹:"+stats[i].getPath().toString()+"===="+stats[i].getGroup());
}
}
fs.close();
}
public static void createDirectory(String directoryName) throws Exception {
Configuration config = new Configuration();
FileSystem.setDefaultUri(config, new URI(basePath));
FileSystem fs = FileSystem.get(config);
System.out.println(ToStringBuilder.reflectionToString(fs));
fs.mkdirs(new Path(fileRootPath, directoryName));
fs.close();
System.out.println("创建"+fileRootPath+"/"+directoryName+"成功");
}
public static void deleteDirectory(String directoryName) throws Exception {
Configuration config = new Configuration();
FileSystem.setDefaultUri(config, new URI(basePath));
FileSystem fs = FileSystem.get(config);
fs.delete(new Path(fileRootPath, directoryName),true);
fs.close();
System.out.println("删除"+fileRootPath+"/"+directoryName+"成功");
}
}
以上部分为java代码部分。请在完成环境搭建的前提下执行。
遇到的问题
1、遇到没有权限的问题:
原因:在集群中,默认只有HDFS用户可对对HDFS进行删除、新增等修改操作。而使用java开发时,明显不是使用HDFS用户进行操作。故。。。
解决办法为:为操作的HDFS目录(本人的为/input)进行赋权。具体操作可参考:
2、找不到\bin\winutils.exe
18/01/23 09:40:08 ERROR util.Shell: Failed to locate the winutils binary in the hadoop binary path
java.io.IOException: Could not locate executable null\bin\winutils.exe in the Hadoop binaries.
at org.apache.hadoop.util.Shell.getQualifiedBinPath(Shell.java:404)
at org.apache.hadoop.util.Shell.getWinUtilsPath(Shell.java:419)
如上为错误内容。
解决办法:
在main方法中,添加以下代码即可:
System.setProperty("hadoop.home.dir", "D:\\hadoop-common-2.2.0-bin-master");
可在一下连接中下载bin文件。解压即可
http://download.youkuaiyun.com/download/qq_18684463/10219651
总结
要点有二:
一:使用maven搭建项目
二:权限管理