/**
* 根据文件路径读取hdfs下的文件内容
* @param hdfsPath hdfs访问路径
* @param pathName 要读取文件的全路径
* @return
*/
public static List<String> fileReadFromHdfs(String hdfsPath,String pathName){
List<String> list = new ArrayList<>();
try {
Configuration conf = new Configuration();
URI uri = URI.create(hdfsPath);
FileSystem hdfs = null;
Path path = new Path(pathName);
hdfs = FileSystem.get(uri, conf, "hdfs");
log.info("----------------------------------");
log.info("hdfs uri:"+uri);
log.info("path: "+path);
log.info("----------------------------------");
FileStatus[] files = hdfs.globStatus(path);
for (FileStatus file : files) {
if (file.isDirectory()) {
RemoteIterator<LocatedFileStatus> iterator = hdfs.listFiles(file.getPath(), false);
while (iterator.hasNext()) {
LocatedFileStatus fileStatus = iterator.next();
Path fullPath = fileStatus.getPath();
System.out.println(fullPath);
log.info("---------文件夹下的子文件路径:"+fullPath);
//------start 获取到全路径 读取文件内容
FileSystem fs = FileSystem.get(URI.create(fullPath.toString()),conf);
FSDataInputStream hdfsInStream = fs.open(new Path(fullPath.toString()));
BufferedReader reader = new BufferedReader(new InputStreamReader(hdfsInStream));
String line = null;
while ((line=reader.readLine()) != null) {
System.out.println(line);
list.add(line.toString());
}
hdfsInStream.close();
fs.close();
//------end 获取到全路径 读取文件内容
}
} else {
System.out.println(file.getPath());
}
}
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
return list;
}