前提:
windows环境需安装将winutil.exe,原因是windows上没有Hadoop运行必须的插件wintils.exe,这个文件在Hadoop官网的安装 包中是没有的,所以需要自行下载。
以下为各版本的winutils.exe和hadoop.dll, https://github.com/cdarlint/winutils
第一步:将winutils.exe解压至没有空格和中文的目录中,并将bin/hadoop/dll复制至C:\Windows\System32中去;
第二步:配置环境变量
重启后生效
获得HDFS文件系统
FileSystem构造方法 该类是一个抽象类,通过以下三种方法可以获取FileSystem实例:
public void getFileSystem3() throws Exception {
Configuration configuration = new Configuration();
FileSystem fileSystem = FileSystem.newInstance(new URI("hdfs://192.168.211.102:8020"), configuration);
System.out.println(fileSystem.toString());
fileSystem.close();
}
public void getFileSystem2() throws Exception {
Configuration configuration = new Configuration();
configuration.set("fs.defaultFS","hdfs://192.168.211.102:8020");
FileSystem fileSystem = FileSystem.get(configuration);
System.out.println(fileSystem.toString());
fileSystem.close();
public void getFileSystem4() throws Exception{
Configuration configuration = new Configuration();
configuration.set("fs.defaultFS","hdfs://192.168.211.102:8020");
FileSystem fileSystem = FileSystem.newInstance(configuration);
System.out.println(fileSystem.toString());
fileSystem.close();
}
递归遍历文件
public void listMyFiles()throws Exception{
//获取fileSystem类
FileSystem fileSystem = FileSystem.get(new URI("hdfs://192.168.211.102:8020"), new Configuration());
//获取RemoteIterator 得到所有的文件或者文件夹,第一个参数指定遍历的路径,第二个参数表示是否要递归遍历
RemoteIterator<LocatedFileStatus> locatedFileStatusRemoteIterator =
fileSystem.listFiles(new Path("/"), true);
while (locatedFileStatusRemoteIterator.hasNext()){
LocatedFileStatus next = locatedFileStatusRemoteIterator.next();
System.out.println(next.getPath().toString());
}
fileSystem.close();
}
HDFS文件上传
public void copyFromLocalFile() throws Exception {
Configuration config = new Configuration();
FileSystem fs = FileSystem.get(new
URI("hdfs://hadoop102:8020"), config, "root");
fs.copyFromLocalFile(new Path("E:\\input\\phone_data.txt"),
new Path("/test/abc"));
}
HDFS文件下载
public void copyToLocalFile() throws Exception {
Configuration config = new Configuration();
// 1 获取文件系统
FileSystem fs = FileSystem.get(new
URI("hdfs://hadoop102:8020"), config, "root");
fs.copyToLocalFile(new Path("/test/abc/phone_data.txt"),
new Path("f:/1123/phone_data.txt"));
// 3 关闭资源
fs.close();
}
HDFS文件夹删除
相当于命令: hdfs dfs -rm -r
Configuration config = new Configuration();
FileSystem fs = FileSystem.get(new
URI("hdfs:hadoop102:8020"), config,"root");
fs.delete(new Path("/test/abc/phone_data.txt"),true);
fs.close();
HDFS文件详情查看
Configuration config = new Configuration();
FileSystem fs = FileSystem.get(new
URI("hdfs://hadoop102:8020"), config, "root");
RemoteIterator<LocatedFileStatus> ri = fs.listFiles(new
Path("/"), true);
while (ri.hasNext()) {
LocatedFileStatus lf = ri.next();
System.out.println("文件名:" + lf.getPath().getName());
System.out.println("分组:" + lf.getGroup());
System.out.println("文件长度:" + lf.getLen());
System.out.println("权限:" + lf.getPermission());
System.out.println(lf.getBlockSize());
System.out.println("*******存储块信息******");
BlockLocation[] bls = lf.getBlockLocations();
for (BlockLocation bl : bls)
{
//获得块存储的主机节点
String[] hosts = bl.getHosts();
System.out.println("获得块存储的主机节点:
"+Arrays.toString(hosts));
}
System.out.println("=================================");
}
HDFS文件名更改
Configuration config = new Configuration();
FileSystem fs = FileSystem.get(new
URI("hdfs://hadoop102:8020"), config, "root");
fs.rename(new Path("/mmy/a.txt"), new Path("/mmy/a1.txt"));
fs.close();
HDFS的I/O流操作
HDFS文件下载
public void getFileToLocal()throws Exception{
FileSystem fileSystem = FileSystem.get(new URI("hdfs://192.168.211.102:8020"), new Configuration());
FSDataInputStream open = fileSystem.open(new Path("/test/put/profile"));
FileOutputStream fileOutputStream = new FileOutputStream(new File("D:\\profile"));
IOUtils.copy(open,fileOutputStream );
IOUtils.closeQuietly(open);
IOUtils.closeQuietly(fileOutputStream);
fileSystem.close();
}
HDFS文件上传
public void putData2() throws Exception{
FileSystem fileSystem = FileSystem.get(new URI("hdfs://192.168.211.102:8020"), new Configuration());
FSDataOutputStream fsDataOutputStream = fileSystem.create(new Path("/hello/mydir/test/upload/"));
FileInputStream fileInputStream = new FileInputStream("D:\\abc.txt");
IOUtils.copy(fileInputStream,fsDataOutputStream);
IOUtils.closeQuietly(fileInputStream);
IOUtils.closeQuietly(fsDataOutputStream);
fileSystem.close();
}
多个本地系统文件,上传到hdfs,并合并成一个大的文件
public void mergeFile() throws Exception{
//获取分布式文件系统并伪造用户
FileSystem fileSystem = FileSystem.get(new URI("hdfs://192.168.211.102:8020"), new Configuration(),"root");
FSDataOutputStream outputStream = fileSystem.create(new Path("/bigfile.xml"));
//获取本地文件系统
LocalFileSystem local = FileSystem.getLocal(new Configuration());
//通过本地文件系统获取文件列表,为一个集合
FileStatus[] fileStatuses = local.listStatus(new Path("file:///D:\\merge"));
for (FileStatus fileStatus : fileStatuses) {
FSDataInputStream inputStream = local.open(fileStatus.getPath());
IOUtils.copy(inputStream,outputStream);
IOUtils.closeQuietly(inputStream);
}
IOUtils.closeQuietly(outputStream);
local.close();
fileSystem.close();
}