1.首先pom.xml新增:
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>3.2.1</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>3.2.1</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.2.1</version>
</dependency>
2.在配置文件里面新增
hdfs.path=hdfs://10.202.234.244:9000 hdfs.username=peiyajie
3.创建工具类
@Component
public class HadoopUtil {
@Value("${hdfs.path}")
private String path;
@Value("${hdfs.username}")
private String username;
private static String hdfsPath;
private static String hdfsName;
/**
* 获取HDFS的配置信息
* @return
*/
private static Configuration getConfiguration(){
Configuration configuration=new Configuration();
configuration.set("fs.defaultFS",hdfsPath);
return configuration;
}
/**
* 获取HDFS文件系统对象
*
*/
public static FileSystem getFileSystem() throws URISyntaxException, IOException, InterruptedException {
System.out.println("hdfsPath:"+hdfsPath);
System.out.println("hdfsName:"+hdfsName);
FileSystem fileSystem=FileSystem.get(new URI(hdfsPath),getConfiguration(),hdfsName);
return fileSystem;
}
@PostConstruct
//Constructor >> @Autowired >> @PostConstruct.在初始话数据的时候有用,简单,干净
public void getPath() {
hdfsPath=this.path;
}
@PostConstruct
public void getUsername() {
hdfsName=this.username;
}
public static String getHdfsPath() {
return hdfsPath;
}
public static String getHdfsName() {
return hdfsName;
}
}
4.上代码
/**
* 创建文件夹
*/
@PostMapping("/mkdir")
///wordcount/inputpeiyajie
public String mkdir(String path) throws URISyntaxException, IOException, InterruptedException {
//文件对象
FileSystem fs= getFileSystem();
//创建目标路径
Path newPath=new Path(path);
//创建空文件夹
boolean isOk=fs.mkdirs(newPath);
fs.close();
if(isOk){
return "创建成功";
}else{
return "创建失败";
}
}
@PostMapping("/createFile")
public String createFile(String path, MultipartFile file) throws InterruptedException, IOException, URISyntaxException {
///wordcount/inputpeiyajie
String fileName=file.getOriginalFilename();
FileSystem fs= getFileSystem();
//上传时默认当前目录,自动拼接文档
Path newPath=new Path(path+"/"+fileName);
//打开一个输出流
FSDataOutputStream outputStream=fs.create(newPath);
outputStream.write(file.getBytes());
outputStream.close();
fs.close();
return "创建文件成功";
}
///wordcount/inputpeiyajie/新建文本文档.txt
//https://blog.youkuaiyun.com/zxl646801924/article/details/84615604
@PostMapping("/readFile")
public String readFile(@RequestParam("path") String path) throws InterruptedException, IOException, URISyntaxException {
FileSystem fs= getFileSystem();
Path newPath=new Path(path);
FSDataInputStream inputStream= fs.open(newPath);
/* BufferedReader bf=new BufferedReader(new InputStreamReader(inputStream));//防止中文乱码
String line=null;
while((line=bf.readLine())!=null){
System.out.println(line);
}*/
IOUtils.copyBytes(inputStream,System.out,4096);
IOUtils.closeStream(inputStream);
inputStream.close();
//bf.close();
fs.close();
return "读文件成功";
}
//读取demo下面所有文件
@PostMapping("/ListFile")
public String ListFile(@RequestParam("path") String path) throws InterruptedException, IOException, URISyntaxException {
FileSystem fs=HadoopUtil.getFileSystem();
Path newPath=new Path(path);
//递归寻找所有文件
RemoteIterator<LocatedFileStatus> fileLists=fs.listFiles(newPath,true);
List<Map<String,String>> returnList=new ArrayList<Map<String,String>>();
while(fileLists.hasNext()){
LocatedFileStatus next= fileLists.next();
String fileName=next.getPath().getName();
Path filePath=next.getPath();
Map<String,String> map=new HashMap<String,String>();
map.put("fileName",fileName);
map.put("filePath",filePath.toString());
System.out.println("map:"+map);
returnList.add(map);
}
return "查询所有文件";
}
//读取demo下面所有文件
@PostMapping("/deleteFile")
public String deleteFile(@RequestParam("path") String path) throws InterruptedException, IOException, URISyntaxException {
FileSystem fs = HadoopUtil.getFileSystem();
Path newPath = new Path(path);
boolean isOk=fs.deleteOnExit(newPath);
fs.close();
return "删除文件成功";
}
@PostMapping("/uploadFile")
public String uploadFile(@RequestParam("path") String path,@RequestParam("uploadPath") String uploadPath) throws InterruptedException, IOException, URISyntaxException {
FileSystem fs = HadoopUtil.getFileSystem();
Path newPath = new Path(path);
Path newUploadPath = new Path(uploadPath);
fs.copyFromLocalFile(false,newPath,newUploadPath);
fs.close();
return "上传文件成功";
}
@PostMapping("/downloadFile")
public String downloadFile(@RequestParam("path") String path,@RequestParam("downloadPath") String downloadPath) throws InterruptedException, IOException, URISyntaxException {
FileSystem fs = HadoopUtil.getFileSystem();
Path newPath = new Path(path);
Path newDownloadPath = new Path(downloadPath);
fs.copyToLocalFile(false,newPath,newDownloadPath);
fs.close();
return "下载文件成功";
}
注意:如果下载的过程中出现了 HADOOP_HOME and hadoop.home.dir are unset,那么就说明你没有配置windows本地的hadoop环境变量.
你可能会想我是远程调用linux下的hadoop,与我本地的hadoop有什么关系?如果你的操作只对远程的hadoop生效,如上传,创建目录,文件改名(写)等那么你是不需要在windows本地配置hadoop的,
可一旦涉及到下载(读),hadoop内部的缓存机制要求本地也必须有hadoop,于是也会出现HADOOP_HOME and hadoop.home.dir are unset,解决办法配置HADOOP_HOME并加入%HADOOP_HOME%\bin到PATH中,
之后测试下hadoop version命令,有效的话重启你的eclipse/myeclipse,但这还不够,windows下的hadoop还需要winutils.exe,否则会报Could not locate Hadoop executable: xxxx\winutils.exe
下载:https://github.com/cdarlint/winutils/tree/master/hadoop-3.2.1/bin
把winutils.exe和hadoop.dll放入hadoop的bin文件夹里面
系统配置
在path里面新增%HADOOP_HOME%\bin
系统变量里面新增HADOOP_HOME
HADOOP_HOME E:\software\hadoop-3.2.1
5.利用mapreduce统计结果存放在
package com.qihoo.hadoop.util;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class WCMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
//多个空格
String [] ws=value.toString().split("\\s+");
for(String word:ws){
//单词
context.write(new Text(word),new IntWritable(1));
}
}
}
package com.qihoo.hadoop.util;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class WCReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
protected void reduce(Text key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum ++;
}
context.write(key, new IntWritable(sum));
}
}
//单词统计测试hadoop内部的缓存机制要求本地也必须有hadoop
@Test
public void wordCount() throws IOException, ClassNotFoundException, InterruptedException {
System.out.println("HADOOP_HOME:"+System.getenv("HADOOP_HOME"));
//1配置对象
Configuration conf=new Configuration();
conf.set("fs.defaultFS","hdfs://10.202.234.244:9000");
//2创建任务对象
Job job=Job.getInstance(conf,"wc");
//2.1设置map和reduce作为两个阶段处理任务类
job.setMapperClass(WCMapper.class);
job.setReducerClass(WCReducer.class);
//2.2设置map和reduce的输出kv
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
//最后输出结果的key和value
job.setOutputValueClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//2.3设置reduce的个数,默认1
job.setNumReduceTasks(2);
//2.4设置输入和输出路径
//本地区分
//FileInputFormat.addInputPath(job,new Path("hdfs://10.202.234.244:9000/wordcount/inputpeiyajie/1.txt"));
//输出文件必须是不存在的
// FileOutputFormat.setOutputPath(job,new Path("hdfs://10.202.234.244:9000/wordcount/mapreduceFile"));
FileInputFormat.addInputPath(job,new Path("file:\\C:\\Users\\peiyajie\\Desktop\\1.txt"));
//输出文件必须是不存在的
FileOutputFormat.setOutputPath(job,new Path("file:\\C:\\Users\\peiyajie\\Desktop\\3"));
//也可以配置是否产生jar。
//2.5提交任务,等待程序执行完毕,返回是否成功
boolean b=job.waitForCompletion(true);
System.exit(b?0:-1);
}
本文详细介绍如何使用Hadoop进行文件操作,包括创建文件夹、上传文件、读取文件、列出文件、删除文件,以及通过MapReduce进行单词计数的完整过程。
2452

被折叠的 条评论
为什么被折叠?



