对HDFS操作可以通过命令行,-put,-mkdir,-rm等等,也可以通过java代码来实现,
总结一下读写的两个操作,其他的功能可以通过FileSystem类的方法获取。
HDFS读文件的操作:
可以通过简单的URL读取,也可以通过FileSystem读取,代码如下:
//通过URL读取
public class URLText {
static{
URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory());
}
public static void main(String[] a) throws Exception{
InputStream in = null;
try{
in = new URL(a[0]).openStream();
//参1输入流,参2,输出流,参3缓存字节,参4是否设置自动关闭输入输出流
IOUtils.copyBytes(in, System.out, 4096, false);
}finally{
IOUtils.closeStream(in);
}
}
}
//通过FileSystem读取
public class FileSystemText {
public static void main(String[] args) throws Exception {
String url = args[0];
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(URI.create(url), configuration);
InputStream in = null;
try{
in = fs.open(new Path(url));
IOUtils.copyBytes(in, System.out, configuration, false);
}finally{
IOUtils.closeStream(in);
}
}
}
HDFS写文件的操作:
public class CopyFile {
public static void main(String[] a) throws Exception {
String localurl = a[0];//本地文件地址
String hdfsurl = a[1];//输出流在hdfs里存放文件地址
Configuration configuration = new Configuration();
InputStream inputStream
= new BufferedInputStream(new FileInputStream(localurl));
FileSystem fs = FileSystem.get(URI.create(hdfsurl), configuration);
// OutputStream outputStream = fs.create(new Path(hdfsurl));
OutputStream outputStream = fs.append(new Path(hdfsurl));
IOUtils.copyBytes(inputStream, outputStream, configuration, true);
}
}
获取HDFS文件目录的操作:
public class FileListCat {
public static void main(String[] a) throws Exception {
String url = a[0];
Path[] p = new Path[a.length];
for (int i = 0; i < a.length; i++) {
p[i] = new Path(a[i]);
}
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(URI.create(url), configuration);
FileStatus[] listStatus = fs.listStatus(p);
/* for (FileStatus fileStatus : listStatus) {
System.out.println(fileStatus.getPath());
} */
Path[] stat2Paths = FileUtil.stat2Paths(listStatus);
for (Path path : stat2Paths) {
System.out.println(path);
}
}
}