Hadoop-常用操作

本文详细介绍了如何使用Hadoop的FileSystem接口进行HDFS文件系统的操作,包括读取文件、复制文件、文件属性查询、文件过滤、删除、重命名、文件存在性检查、文件位置查找、本地和远程文件的checksum生成、文件压缩与解压缩等关键操作。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

使用URL的方式读取一个文件内容,需要设置一个handler工厂,这个工厂只能设置一次
	static {
URL.setURLStreamHandlerFactory( new FsUrlStreamHandlerFactory() );
}

public void test1() throws IOException {
URL u = new URL("hdfs://IP:8020/test");
InputStream is = u.openStream();
BufferedReader br = new BufferedReader(new InputStreamReader(is));
String line = null;

while( (line=br.readLine()) != null ) {
System.out.println(line);
}
br.close();
}



使用hadoop的FileSystem读取文件
	public void test2() throws IOException {
String url = "hdfs://IP:8020/test-data/hello.txt";
Configuration config = new Configuration();
FileSystem fs = FileSystem.get(URI.create(url),config);
InputStream is = null;
is = fs.open(new Path(url));
IOUtils.copyBytes(is, System.out, 4096, false);
}


将一个本地文件拷贝到hadoop文件系统中
	public void test3() throws IOException {
String src = "C:\\test.txt";
String dest = "hdfs://IP:8020/test-data/hello.txt";
InputStream is = new BufferedInputStream(new FileInputStream(src));
Configuration config = new Configuration();
FileSystem fs = FileSystem.get(URI.create(dest), config);
OutputStream os = fs.create(new Path(dest), new Progressable() {
@Override
public void progress() {
System.out.print(".");
}
});
IOUtils.copyBytes(is, os, 4096, true);
System.out.println("ok~");
}


列出文件属性
	public void test4() throws IOException {
String url = "hdfs:/IP:8020/test-data/hello.txt";
Configuration config = new Configuration();
FileSystem fs = FileSystem.get(URI.create(url),config);
FileStatus status = fs.getFileStatus(new Path(url));
System.out.println("AccessTime : "+status.getAccessTime());
System.out.println("BlockSize : "+status.getBlockSize());
System.out.println("group : "+status.getGroup());
System.out.println("len : "+status.getLen());
System.out.println("ModificationTime : "+status.getModificationTime());
System.out.println("owner : "+status.getOwner());
System.out.println("is dir ? : "+status.isDir());
System.out.println("path : "+status.getPath());
System.out.println("permission : "+status.getPermission());
System.out.println("replication : "+status.getReplication());
}



通过路径过滤器查找文件
	public void test5() throws IOException {
String url = "hdfs://IP:8020/test-data/*";
Configuration config = new Configuration();
FileSystem fs = FileSystem.get(URI.create(url),config);

FileStatus[] status = fs.globStatus( new Path(url), new RegexPathFilter("^.*hello.*$") );
for(FileStatus s:status) {
System.out.println(s.getPath().toString());
}
System.out.println("filter execute ok");
}
//路径正则过滤器类
public class RegexPathFilter implements PathFilter {
private final String regex;

public RegexPathFilter(String regex) {
this.regex = regex;
}

@Override
public boolean accept(Path path) {
return path.toString().matches(regex);
}
}


删除,支持递归删除
	public void delete() throws IOException {
String url = "hdfs://IP:8020/test-data/xxx.txt";
Configuration config = new Configuration();
FileSystem fs = FileSystem.get(URI.create(url),config);
fs.delete(new Path(url), false);
System.out.println("delete ok");
}


重命名
	public void test5_rename() throws IOException {
String url = "hdfs://IP:8020/test-data/xx.txt";
String url2 = "hdfs://IP:8020/test-data/modify-xx.txt";
Configuration config = new Configuration();
FileSystem fs = FileSystem.get(URI.create(url),config);
boolean isok = fs.rename(new Path(url), new Path(url2));
System.out.println("complete : "+isok);
}



检查文件是否存在
	public void exist() throws IOException {
String url = "hdfs:/IP:8020/test-data/modify-xx.txt";
Configuration config = new Configuration();
FileSystem fs = FileSystem.get(URI.create(url),config);
boolean isExist = fs.exists( new Path(url));
System.out.println("exist ? "+isExist);
}



查找某个文件在HDFS中的位置
	public void test5_location() throws IOException {
String url = "hdfs://IP:8020/test-data/hello.txt";
Configuration config = new Configuration();
FileSystem fs = FileSystem.get(URI.create(url),config);
FileStatus status = fs.getFileStatus(new Path(url));
BlockLocation[] bls = fs.getFileBlockLocations(status, 0, status.getLen());
for(int i=0;i<bls.length;i++) {
String[] hosts = bls[i].getHosts();
System.out.println("block :"+i+"\tlocation : "+hosts[i]);
}
}


获取HDFS集群上所有节点的名称
	public void test5_allnode() throws IOException {
String url2 = "hdfs://IP:8020/test-data/modify-xx.txt";
Configuration config = new Configuration();
FileSystem fs = FileSystem.get(URI.create(url2),config);
DistributedFileSystem hdfs = (DistributedFileSystem)fs;
DatanodeInfo[] status = hdfs.getDataNodeStats();
for(DatanodeInfo d:status) {
System.out.println(d.getHostName());
}
}



创建本地和远端的checksum
	public void localCreateChecksum() throws IOException {
String url = "file:///C:/zzzz/abc.txt";
Configuration config = new Configuration();
FileSystem fs = FileSystem.get(URI.create(url),config);
ChecksumFileSystem cfs = new LocalFileSystem(fs);
FSDataOutputStream fsdos = cfs.create(new Path(url));
fsdos.write("hehe".getBytes());
fsdos.flush();
fsdos.close();
}


public void distributeCreateChecksum() throws IOException {
String url = "hdfs://IP:8020/test/abc.txt";
Configuration config = new Configuration();
FileSystem fs = FileSystem.get(URI.create(url),config);
ChecksumFileSystem cfs = new LocalFileSystem(fs);
FSDataOutputStream fsdos = cfs.create(new Path(url));
fsdos.write("hehe~".getBytes());
fsdos.flush();
fsdos.close();
}



压缩和解压缩,压缩池
	public void compress() throws IOException {
FileInputStream fis = new FileInputStream("C:\\zzzz\\xx.txt");
GzipCodec gc = new GzipCodec();
String url = "hdfs://IP:8020/test/compress.txt";
Configuration config = new Configuration();
FileSystem fs = FileSystem.get(URI.create(url), config);
OutputStream fdos = fs.create(new Path(url));
byte[] buf = new byte[10240];
int len = fis.read(buf);
System.out.println("content:");
System.out.println( new String(buf,0,len) );
CompressionOutputStream cos = gc.createOutputStream(fdos);
cos.write(buf,0,len);
cos.flush();
cos.close();
}

public void decompress() throws IOException {
String url = "hdfs://IP:8020/test/compress.gz";
Configuration config = new Configuration();
FileSystem fs = FileSystem.get(URI.create(url), config);
GzipCodec gc = new GzipCodec();
FSDataInputStream fdis = fs.open(new Path(url));
CompressionInputStream cis = gc.createInputStream(fdis);
IOUtils.copy(cis, System.out);
}

public void comprssPool() throws IOException {
FileInputStream fis = new FileInputStream("C:\\zzzz\\abc.txt");
GzipCodec gc = new GzipCodec();
FileOutputStream fdos = new FileOutputStream("C:/zzzz/pool.txt");
Compressor compressor = CodecPool.getCompressor(gc);
CompressionOutputStream cos = gc.createOutputStream(fdos, compressor);
IOUtils.copy(fis, cos);
CodecPool.returnCompressor(compressor);

}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值