利用java操作Hadoop文件 /hdfs

本文介绍如何在Eclipse环境中使用Java编程语言操作Hadoop的HDFS文件系统。详细探讨了相关API的使用和实践过程,但未验证其他开发环境的兼容性。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

在eclipse环境下进行的编程,未实验其他环境下是否可以

/**Name		:	KcnaufHadoopManagement
 *Author	:	Zhang Bing 
 *Created	:	2013-7-31
 *Function	:	Manipulate the hadoop filesystem
 *
 */
package com.exercise.hadoop.command;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.LinkedList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;

import com.exercise.shell.show.SSHShell;

public class HadoopManipulate {
	public static Configuration config;
	//config the Configuration to define which cluster to be connect.
	public HadoopManipulate() {
		if (null == config) {
			config = new Configuration();
			String fp = (this.getClass().getResource("/core-site.xml")).getPath();
			config.addResource(fp);
			System.out.println("============================");
			System.out.println(fp);
			System.out.println("============================");
			FileReader fReader = null;
			try {
				fReader = new FileReader(fp);
			} catch (FileNotFoundException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			} finally {

				try {
					fReader.close();
				} catch (IOException e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				}
			}
		}
	}

 
	/**
	 * read file in the HDFS
	 * @param filename
	 * @return
	 */
	public int readFile(String filename) {

		try {
			FileSystem hdfSystem = FileSystem.get(config);
			FSDataInputStream fsDataInputStream = hdfSystem.open(new Path(
					filename));
			
			IOUtils.copyBytes(fsDataInputStream, System.out, 4096, false);
			
			hdfSystem.close();
			return 1;
		} catch (Exception e) {
			e.printStackTrace();
			return -1;
			// TODO: handle exception
		}
	}
	//////////////////////////////////////////////////////////
	
	/**
	 * read file and return content
	 * @param filename
	 * @return
	 */
	public List<String> readFile_return(String filename) {

		try {
			FileSystem hdfSystem = FileSystem.get(config);
			FSDataInputStream fsDataInputStream = hdfSystem.open(new Path(
					filename));
			
			InputStreamReader isr=new InputStreamReader(fsDataInputStream,"UTF-8");
			BufferedReader bufferedReader=new BufferedReader(isr);
			
			List<String> lTest=new LinkedList<String>();
			
			String text;
			while((text=bufferedReader.readLine())!=null){
				lTest.add(text);
			}
			
			hdfSystem.close();
			return lTest;
		} catch (Exception e) {
			e.printStackTrace();
			return null;
			// TODO: handle exception
		}
	}
	
	public static void main(String[] args) {
		HadoopManipulate hm=new HadoopManipulate();
		hm.get("/ccccc", "/");

	}
	/**
	 * This method has warning of 'Unable to load native-hadoop library for your platform'
	 */
//	public int getFileFromHDFS(String HDFSFile, String LocalFile) {
//		try {
//			Configuration config1 = new Configuration();
//			config1.set("hadoop.job.ugi", "hadoop,supergroup");
//
//			FileSystem hdfSystem = FileSystem.get(config1);
//			Path hDFSPath = new Path(HDFSFile);
//			Path localPath = new Path(LocalFile);
//			hdfSystem.copyToLocalFile( hDFSPath, localPath);
//			hdfSystem.close();
//			return 1;
//		} catch (Exception e) {
//			e.printStackTrace();
//			return -1;
//			// TODO: handle exception
//		}
//	}
	
	/**
	 * get the file from HDFS 
	 * @param HDFSFile
	 * @param LocalFile
	 * @return
	 */
	public int get(String HDFSFile, String LocalFile){
		SSHShell ssh=new SSHShell();
		String text="hadoop fs -copyToLocal "+HDFSFile+" "+LocalFile;
		ssh.sshShell_return(text);
		return 1;
	}

	/**
	 * This method has warning of 'Unable to load native-hadoop library for your platform'
	 */
//	public int putFileToHDFS(String HDFSFile, String LocalFile) {
//		try {
//			FileSystem hdfSystem = FileSystem.get(config);
//			Path hdfsPath = new Path(HDFSFile);
//			Path localPath = new Path(LocalFile);
//			hdfSystem.copyFromLocalFile(localPath, hdfsPath);
//			hdfSystem.close();
//			return 1;
//		} catch (Exception e) {
//			e.printStackTrace();
//			return -1;
//			// TODO: handle exception
//		}
//	}
	
	/**
	 * put the file to HDFS
	 * @param HDFSFile
	 * @param LocalFile
	 * @return
	 */
	public int put(String HDFSFile, String LocalFile){
		SSHShell ssh=new SSHShell();
		String text="hadoop fs -copyFromLocal "+HDFSFile+" "+LocalFile;
		ssh.sshShell_return(text);
		return 1;
	}
	
	/**
	 * list all the file under HDFSFilePath
	 * @param HDFSFilePath
	 * @return
	 */
	public int lsInHadoop(String HDFSFilePath) {
		try {
			FileSystem hdfSystem = FileSystem.get(config);
			FileStatus[] fileStatus = hdfSystem.listStatus(new Path(
					HDFSFilePath));
			for (int i = 0; i < fileStatus.length; i++) {
				System.out
						.println(fileStatus[i].getPath().getName().toString());
			}
			hdfSystem.close();
			return 1;
		} catch (Exception e) {
			e.printStackTrace();
			return -1;
			// TODO: handle exception
		}
	}
	/////////////////////////////////////////////////
	
	/**
	 * list all the file under HDFSFilePath and return as List<String>
	 * @param HDFSFilePath
	 * @return
	 */
	public List<String> lsInHadoop_return(String HDFSFilePath) {
		try {
			FileSystem hdfSystem = FileSystem.get(config);
			FileStatus[] fileStatus = hdfSystem.listStatus(new Path(
					HDFSFilePath));
			List<String> lines=new LinkedList<String>(); 
			for (int i = 0; i < fileStatus.length; i++) {
				lines.add(fileStatus[i].getPath().getName().toString());
			}
			hdfSystem.close();
			return lines;
		} catch (Exception e) {
			e.printStackTrace();
			return null;
			// TODO: handle exception
		}
	}
	
	/**
	 * create a new file 
	 * @param HDFSFile
	 * @return
	 */
	public FSDataOutputStream createANewFile(String HDFSFile) {
		try {
			FileSystem fileSystem = FileSystem.get(config);
			Path hdfsPath = new Path(HDFSFile);
			FSDataOutputStream fsDataOutputStream = fileSystem.create(hdfsPath);
			if (fileSystem.exists(hdfsPath)) {

				// fsDataOutputStream.close();
				// fileSystem.close();
				return fsDataOutputStream;
			} else {
				return null;
			}

		} catch (Exception e) {
			e.printStackTrace();
			return null;
			// TODO: handle exception
		}
	}

	/**
	 * write new data to the file in HDFS .(append)
	 * @param HDFSFile
	 * @param content
	 * @return
	 */
	public int writeNewData(String HDFSFile, List<String> content) {
		try {
			FileSystem hdfSystem = FileSystem.get(config);
			FSDataOutputStream fsDataOutputStream = hdfSystem.append(new Path(
					HDFSFile));
			for (int i = 0; i < content.size(); i++) {
				fsDataOutputStream.writeBytes((content.get(i) + "\n"));
			}
			fsDataOutputStream.close();
			return 1;
		} catch (Exception e) {
			e.printStackTrace();
			System.out.println(e);
			return -1;
			// TODO: handle exception
		}
	}
	
	/**
	 * write new data to the file in HDFS .(append)
	 * @param HDFSFile
	 * @param content
	 * @return
	 */
	public int writeNewData(String HDFSFile, String content) {
		try {
			FileSystem hdfSystem = FileSystem.get(config);
			FSDataOutputStream fsDataOutputStream = hdfSystem.append(new Path(
					HDFSFile));
			/**
			 *The method 'writeUTF' will create code unexpected
			 */
			fsDataOutputStream.writeBytes((content + "\n"));
			fsDataOutputStream.close();
			return 1;
		} catch (Exception e) {
			e.printStackTrace();
			System.out.println(e);
			return -1;
			// TODO: handle exception
		}
	}

	/**
	 * copy in HDFS
	 * @param oldFile
	 * @param newFile
	 * @return
	 */
	public int copyFileInHDFS(String oldFile, String newFile) {
		try {
//			Path oldPath = new Path(oldFile);
//			Path newPath = new Path(newFile);
			/**
			 *To avoid the IOException as use  'fsDataInputStream' 
			 */
			createANewFile(newFile);
			writeNewData(newFile, readFile_return(oldFile));
//			FSDataInputStream fsDataInputStream = hdfSystem.open(oldPath);
//			FSDataOutputStream fsDataOutputStream = createANewFile(newFile);
//			fsDataOutputStream.writeUTF(fsDataInputStream.readUTF());
			return 1;
		} catch (Exception e) {
			e.printStackTrace();
			return -1;
			// TODO: handle exception
		}
	}

	/**
	 * move in the HDFS
	 * @param oldFile
	 * @param newFile
	 * @return
	 */
	public int moveFileInHDFS(String oldFile, String newFile) {
		try {
//			FileSystem hdfSystem = FileSystem.get(config);
//			Path oldPath = new Path(oldFile);
//			FSDataInputStream fsDataInputStream = hdfSystem.open(oldPath);
//			FSDataOutputStream fsDataOutputStream = createANewFile(newFile);
//			fsDataOutputStream.writeUTF(fsDataInputStream.readUTF());
			/**
			 *To avoid the IOException as use  'fsDataInputStream' 
			 */
			createANewFile(newFile);
			writeNewData(newFile, readFile_return(oldFile));
			/**
			 * delete the old file 
			 */
			deleteFileInHDFS(oldFile);
			return 1;
		} catch (Exception e) {
			e.printStackTrace();
			return -1;
			// TODO: handle exception
		}
	}

	/**
	 * delete in the HDFS
	 * @param deleteFile
	 * @return
	 */
	public int deleteFileInHDFS(String deleteFile) {
		try {
			FileSystem hdfSystem = FileSystem.get(config);
			Path deletePath = new Path(deleteFile);
			hdfSystem.delete(deletePath, true);
			return 1;
		} catch (Exception e) {
			e.printStackTrace();
			return -1;
			// TODO: handle exception
		}
	}
}


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值