前面刚刚介绍了hdfs的架构及运行原理,有想看的见https://blog.youkuaiyun.com/weixin_42231373/article/details/85005667
现在我们来说说如何使用java API 开发HDFS,下面是些简单的思路 后面会有代码
建项目时修改pom.xml 文件 增加Hadoop的依赖 强调一下我说的是maven项目,普通项目自行下载依赖并添加到类路径,如果不知道的话见之前的博客,略有说明 https://blog.youkuaiyun.com/weixin_42231373/article/details/84654576
maven的依赖为
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.4</version>
<scope>provided</scope>
</dependency>
获得日志对象 Logger logger = Logger.get(cls);
建立hdfs配置单例 Configrutaion conf = new Configrutaion();
通过conf传递参数 conf.set(name, value); conf.get(name)
获得hdfs文件系统 FileSystem fs = FileSystem.get(conf);
获得hdfs处理(输入输出流,各种操作)
FSDataInputStream fsis = fs.open(new Path(“hdfs路径”));
实际处理逻辑 (读取输入流,写入 等)
开发完后打包上传,测试运行
下面是操纵HDFS文件具体的代码 以工具类的方式呈现
import java.io.IOException;
import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;
public class HdfsFileOperatorUtil {
Logger loger = Logger.getLogger(HdfsFileOperatorUtil.class);
static Configuration hadoopConf = new Configuration();
public static byte[] readFileToByteArray(String filePath) throws Exception {
byte[] result = null;
if (filePath == null || filePath.trim().length() == 0) {
throw new Exception("文件路径不对" + filePath + ",请检查");
}
FSDataInputStream hdfsIS = null;
ByteArrayOutputStream baos = null;
try {
FileSystem fs = FileSystem.get(hadoopConf);
Path hdfsPath = new Path(filePath);
hdfsIS = fs.open(hdfsPath);
byte[] b = new byte[65536];
baos = new ByteArrayOutputStream();
int flag = -1;
while ((flag = hdfsIS.read(b)) != -1) {
baos.write(b);
// baos.write(flag);//不成功
b = new byte[65536];
}
result = baos.toByteArray();
} catch (Exception e) {
e.printStackTrace();
} finally {
CloseUtil.close(hdfsIS);
CloseUtil.close(baos);
}
return result;
}
public static String readFileToString(String filePath) throws Exception {
String result = null;
result = new String(readFileToByteArray(filePath), "utf-8");
return result;
}
public static boolean writeFile(byte[] content, String toFilePath)
throws Exception {
boolean isFinish = false;
if (toFilePath == null || toFilePath.trim().length() == 0) {
throw new Exception("文件路径不对" + toFilePath + ",请检查");
}
FSDataOutputStream hdfsOS = null;
try {
FileSystem fs = FileSystem.get(hadoopConf);
Path hdfsPath = new Path(toFilePath);
hdfsOS = fs.create(hdfsPath);
hdfsOS.write(content);
isFinish = true;
} catch (Exception e) {
isFinish = false;
e.printStackTrace();
} finally {
CloseUtil.close(hdfsOS);
}
return isFinish;
}
/**
* 查看某个目录下的所有文件
*
* @throws IOException
*
*/
public static String readFilesListToString(String filesPath)
throws IOException {
StringBuffer sb = new StringBuffer();
FileSystem fs = FileSystem.get(hadoopConf);
FileStatus[] fileStatuses = fs.listStatus(new Path(filesPath));
for (FileStatus fileStatus : fileStatuses) {
String path = fileStatus.getPath().toString();
sb.append(path + " ");
}
return sb.toString();
}
}
集群本地文件操作的工具类
package cn.tl.util;
import java.io.BufferedInputStream;
import java.io.FileInputStream;
import org.apache.commons.io.output.ByteArrayOutputStream;
public class LocalFileOperatorUtil {
public static byte[] readFileToByte(String filePath) throws Exception{
byte[] result = null;
if (filePath == null || filePath.trim().length() == 0) {
throw new Exception("文件路径不对" + filePath + ",请检查");
}
BufferedInputStream bis = null;
ByteArrayOutputStream baos = null;
try {
bis = new BufferedInputStream(new FileInputStream(filePath));
baos = new ByteArrayOutputStream();
int reads = -1;
while((reads = bis.read()) != -1){
baos.write(reads);
}
result = baos.toByteArray();
} catch (Exception e) {
e.printStackTrace();
} finally{
CloseUtil.close(baos);
CloseUtil.close(bis);
}
return result;
}
public static String readFileToString(String filePath) throws Exception{
String result = null;
if (filePath == null || filePath.trim().length() == 0) {
throw new Exception("文件路径不对" + filePath + ",请检查");
}
result = new String(readFileToByte(filePath), "utf-8");
return result;
}
}
附CloseUtil工具类
package cn.tl.util;
public class CloseUtil {
public static void close(AutoCloseable obj) {
if (obj != null) {
try {
obj.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
测试类
package cn.tl.dataCirculation;
import java.io.File;
import cn.tl.util.HdfsFileOperatorUtil;
import cn.tl.util.LocalFileOperatorUtil;
public class readHtml {
public static void main(String[] args) throws Exception {
String filePath = File.separator + "home" + File.separator + "zhangxin"
+ File.separator + "index.html";// 本地路径
String toFilePath = File.separator + "user" + File.separator
+ "zhangxin" + File.separator + "index.html";// hdfs路径
System.out.println("reading.....");
byte[] readToByte = LocalFileOperatorUtil.readFileToByte(filePath);
System.out.println("readEnd,writing....");
boolean flag = HdfsFileOperatorUtil.writeFile(readToByte, toFilePath);
String result = HdfsFileOperatorUtil.readFileToString(toFilePath);
System.out.println(result);
System.out.println("done");
System.out.println(flag);
}
}