测试以下代码全部有效。
1.检查文件或文件夹是否存在
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class CheckFileExist {
/**
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub
Configuration conf = new Configuration();
FileSystem hdfs = FileSystem.get(conf);
//设置要查看的文件的目录
Path showpath = new Path("/home");
boolean isExist = hdfs.exists(showpath);
System.out.println("Exist?"+isExist);
}
}
2.将本地文件上传到HDFS的指定的位置
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
//将本地文件上传到HDFS的指定的位置上,其中src和dst为文件的完整路径
public class CopyFile {
/**
* @param args
*/
public static void main(String[] args)throws Exception {
// TODO Auto-generated method stub
Configuration conf =new Configuration();
FileSystem hdfs = FileSystem.get(conf);
//设置本地文件路径
Path src = new Path("/home/caiyong/桌面/ubuntu命令");
//设置上传的HDFS文件路径
Path dst = new Path("hdfs://10.20.68.112/home/caiyong/testCopyFile/");
hdfs.copyFromLocalFile(src, dst);
System.out.println("Upload to "+conf.get("fs.default.name"));
FileStatus[] files = hdfs.listStatus(dst);
for(FileStatus file:files){
System.out.println(file.getPath());
}
}
}
3.创建HDFS目录
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
//创建HDFS目录
public class CreateDIr {
/**
* @param args
*/
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub
Configuration conf = new Configuration();
FileSystem hdfs = FileSystem.get(conf);
//设置文件路径 *不要忘记前面的斜冈*
Path dfs = new Path("/testCreateDir");
//创建文件夹
hdfs.mkdirs(dfs);
System.out.println("SUCCESS!");
}
}
4.在HDFS上面创建文件
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
//在HDFS上面创建文件,f为文件的完整路径
public class CreateFIle {
/**
* @param args
*/
public static void main(String[] args) throws Exception{
// TODO Auto-generated method stub
Configuration conf = new Configuration();
FileSystem hdfs = FileSystem.get(conf);
byte[] buff = "Hello1CaiYong!".getBytes();
//设置文件的路径
Path dfs = new Path("/testCreateFile");
//采用FileSystem.create(Path f)来创建文件
FSDataOutputStream outputStream = hdfs.create(dfs);
outputStream.write(buff,0,buff.length);
System.out.println("SUCCESS!");
}
}
5.删除HDFS上的文件
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
//删除HDFS上的文件
//删除HDFS上的目录同这个一样,如果目录下有文件,要进行递归删除
public class DeleteFile {
/**
* @param args
*/
public static void main(String[] args) throws Exception{
// TODO Auto-generated method stub
Configuration conf = new Configuration();
FileSystem hdfs = FileSystem.get(conf);
//删除文件的完整路径
Path delpath = new Path("");
boolean isdelete = hdfs.delete(delpath, false);
//递归删除
//boolean isdelete = hdfs.delete(delpath,true);
System.out.println("Delete?"+isdelete);
}
}
6.查看文件在集群中的位置
//查看文件在集群中的位置
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class FileLocation {
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
// TODO Auto-generated method stub
//声明两个实例
Configuration conf = new Configuration();
FileSystem hdfs = FileSystem.get(conf);
//设置文件路径,不能是目录,必须是文件的完整路径
Path filepath = new Path("/home/caiyong/hdfs-site.xml");
FileStatus status = hdfs.getFileStatus(filepath);
//查找指定文件在集群中的位置
BlockLocation[] bloc = hdfs.getFileBlockLocations(status, 0, status.getLen());
int blocklen = bloc.length;
//遍历出集群中的位置
for(int i=0;i<blocklen;i++)
{
String[] hosts = bloc[i].getHosts();
System.out.println("Block"+i+"'s location is:"+hosts[0]);
}
System.out.println("SUCCESS!");
}
}
7.获取集群中所有节点的信息
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
public class GetNodeInfo {
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
// TODO Auto-generated method stub
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
DistributedFileSystem hdfs = (DistributedFileSystem)fs;
//获取所有节点的名称
DatanodeInfo[] datanodeinfo = hdfs.getDataNodeStats();
//循环输出节点信息
for(int i=0;i<datanodeinfo.length;i++)
{
System.out.println("DataNode "+i+" 的Name is : "+datanodeinfo[i].getHostName());
}
System.out.println("SUCCESS1");
}
}
8.查看文件最后修改的时间
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
//查看文件最后修改的时间
public class LatestModifyTime {
/**
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub
Configuration conf = new Configuration();
FileSystem hdfs = FileSystem.get(conf);
//设置要查看文件的路径
Path filepath = new Path("/home/caiyong/hdfs-site.xml");
FileStatus fileStatus = hdfs.getFileStatus(filepath);
//查看指定文件的修改时间
long modifytime = fileStatus.getModificationTime();
System.out.println(filepath+"的最后修改时间是:"+modifytime);
}
}
9.查看目录下的所有文件
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class ListAllFIles {
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
// TODO Auto-generated method stub
Configuration conf = new Configuration();
FileSystem hdfs = FileSystem.get(conf);
//设置文件路径
Path listpath = new Path("/home/caiyong/biginput");
//通过FileStatus.getPath()循环查看某一个目录下面的所有的文件
FileStatus[] status = hdfs.listStatus(listpath);
//循环遍历
for(int i=0;i<status.length;i++)
{
System.out.println(status[i].getPath().toString());
}
hdfs.close();
}
}
10.重命名HDFS文件
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
//HDFS上重命名文件,src和dst都为完整的文件路径
public class RenameFile {
/**
* @param args
*/
public static void main(String[] args) throws Exception{
// TODO Auto-generated method stub
Configuration conf = new Configuration();
FileSystem hdfs = FileSystem.get(conf);
//设置旧的文件名
Path oldpath = new Path("/testCreateFile");
//设置新的文件名
Path newpath =new Path("/testCreateFileRenameTest");
//重新命名
boolean isRename = hdfs.rename(oldpath, newpath);
String result = isRename?"SUCCESS":"FALSE";
System.out.println("文件重名名的结果为:"+result);
}
}
11.通过URL查看HDFS文件内容
import java.io.InputStream;
import java.net.URL;
import org.apache.hadoop.fs.FsUrlStreamHandlerFactory;
import org.apache.hadoop.io.IOUtils;
//通过URL查看文件内容
public class URLCat {
static{
URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory());
}
public static void main(String[] args)throws Exception{
InputStream in=null;
try{
in = new URL(args[0]).openStream();
IOUtils.copyBytes(in, System.out, 4096,false);
}finally{
IOUtils.closeStream(in);
}
}
}