1.首先pom.xml新增:
<dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>3.2.1</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>3.2.1</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>3.2.1</version> </dependency>
2.在配置文件里面新增
hdfs.path=hdfs://10.202.234.244:9000 hdfs.username=peiyajie
3.创建工具类
@Component public class HadoopUtil { @Value("${hdfs.path}") private String path; @Value("${hdfs.username}") private String username; private static String hdfsPath; private static String hdfsName; /** * 获取HDFS的配置信息 * @return */ private static Configuration getConfiguration(){ Configuration configuration=new Configuration(); configuration.set("fs.defaultFS",hdfsPath); return configuration; } /** * 获取HDFS文件系统对象 * */ public static FileSystem getFileSystem() throws URISyntaxException, IOException, InterruptedException { System.out.println("hdfsPath:"+hdfsPath); System.out.println("hdfsName:"+hdfsName); FileSystem fileSystem=FileSystem.get(new URI(hdfsPath),getConfiguration(),hdfsName); return fileSystem; } @PostConstruct //Constructor >> @Autowired >> @PostConstruct.在初始话数据的时候有用,简单,干净 public void getPath() { hdfsPath=this.path; } @PostConstruct public void getUsername() { hdfsName=this.username; } public static String getHdfsPath() { return hdfsPath; } public static String getHdfsName() { return hdfsName; } }
4.上代码
/** * 创建文件夹 */ @PostMapping("/mkdir") ///wordcount/inputpeiyajie public String mkdir(String path) throws URISyntaxException, IOException, InterruptedException { //文件对象 FileSystem fs= getFileSystem(); //创建目标路径 Path newPath=new Path(path); //创建空文件夹 boolean isOk=fs.mkdirs(newPath); fs.close(); if(isOk){ return "创建成功"; }else{ return "创建失败"; } } @PostMapping("/createFile") public String createFile(String path, MultipartFile file) throws InterruptedException, IOException, URISyntaxException { ///wordcount/inputpeiyajie String fileName=file.getOriginalFilename(); FileSystem fs= getFileSystem(); //上传时默认当前目录,自动拼接文档 Path newPath=new Path(path+"/"+fileName); //打开一个输出流 FSDataOutputStream outputStream=fs.create(newPath); outputStream.write(file.getBytes()); outputStream.close(); fs.close(); return "创建文件成功"; } ///wordcount/inputpeiyajie/新建文本文档.txt //https://blog.youkuaiyun.com/zxl646801924/article/details/84615604 @PostMapping("/readFile") public String readFile(@RequestParam("path") String path) throws InterruptedException, IOException, URISyntaxException { FileSystem fs= getFileSystem(); Path newPath=new Path(path); FSDataInputStream inputStream= fs.open(newPath); /* BufferedReader bf=new BufferedReader(new InputStreamReader(inputStream));//防止中文乱码 String line=null; while((line=bf.readLine())!=null){ System.out.println(line); }*/ IOUtils.copyBytes(inputStream,System.out,4096); IOUtils.closeStream(inputStream); inputStream.close(); //bf.close(); fs.close(); return "读文件成功"; } //读取demo下面所有文件 @PostMapping("/ListFile") public String ListFile(@RequestParam("path") String path) throws InterruptedException, IOException, URISyntaxException { FileSystem fs=HadoopUtil.getFileSystem(); Path newPath=new Path(path); //递归寻找所有文件 RemoteIterator<LocatedFileStatus> fileLists=fs.listFiles(newPath,true); List<Map<String,String>> returnList=new ArrayList<Map<String,String>>(); while(fileLists.hasNext()){ LocatedFileStatus next= fileLists.next(); String fileName=next.getPath().getName(); Path filePath=next.getPath(); Map<String,String> map=new HashMap<String,String>(); map.put("fileName",fileName); map.put("filePath",filePath.toString()); System.out.println("map:"+map); returnList.add(map); } return "查询所有文件"; } //读取demo下面所有文件 @PostMapping("/deleteFile") public String deleteFile(@RequestParam("path") String path) throws InterruptedException, IOException, URISyntaxException { FileSystem fs = HadoopUtil.getFileSystem(); Path newPath = new Path(path); boolean isOk=fs.deleteOnExit(newPath); fs.close(); return "删除文件成功"; } @PostMapping("/uploadFile") public String uploadFile(@RequestParam("path") String path,@RequestParam("uploadPath") String uploadPath) throws InterruptedException, IOException, URISyntaxException { FileSystem fs = HadoopUtil.getFileSystem(); Path newPath = new Path(path); Path newUploadPath = new Path(uploadPath); fs.copyFromLocalFile(false,newPath,newUploadPath); fs.close(); return "上传文件成功"; } @PostMapping("/downloadFile") public String downloadFile(@RequestParam("path") String path,@RequestParam("downloadPath") String downloadPath) throws InterruptedException, IOException, URISyntaxException { FileSystem fs = HadoopUtil.getFileSystem(); Path newPath = new Path(path); Path newDownloadPath = new Path(downloadPath); fs.copyToLocalFile(false,newPath,newDownloadPath); fs.close(); return "下载文件成功"; } 注意:如果下载的过程中出现了 HADOOP_HOME and hadoop.home.dir are unset,那么就说明你没有配置windows本地的hadoop环境变量. 你可能会想我是远程调用linux下的hadoop,与我本地的hadoop有什么关系?如果你的操作只对远程的hadoop生效,如上传,创建目录,文件改名(写)等那么你是不需要在windows本地配置hadoop的, 可一旦涉及到下载(读),hadoop内部的缓存机制要求本地也必须有hadoop,于是也会出现HADOOP_HOME and hadoop.home.dir are unset,解决办法配置HADOOP_HOME并加入%HADOOP_HOME%\bin到PATH中, 之后测试下hadoop version命令,有效的话重启你的eclipse/myeclipse,但这还不够,windows下的hadoop还需要winutils.exe,否则会报Could not locate Hadoop executable: xxxx\winutils.exe 下载:https://github.com/cdarlint/winutils/tree/master/hadoop-3.2.1/bin 把winutils.exe和hadoop.dll放入hadoop的bin文件夹里面 系统配置 在path里面新增%HADOOP_HOME%\bin 系统变量里面新增HADOOP_HOME HADOOP_HOME E:\software\hadoop-3.2.1 5.利用mapreduce统计结果存放在
package com.qihoo.hadoop.util; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; public class WCMapper extends Mapper<LongWritable, Text, Text, IntWritable> { protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { //多个空格 String [] ws=value.toString().split("\\s+"); for(String word:ws){ //单词 context.write(new Text(word),new IntWritable(1)); } } }
package com.qihoo.hadoop.util; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; public class WCReducer extends Reducer<Text, IntWritable, Text, IntWritable> { @Override protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable val : values) { sum ++; } context.write(key, new IntWritable(sum)); } }
//单词统计测试hadoop内部的缓存机制要求本地也必须有hadoop @Test public void wordCount() throws IOException, ClassNotFoundException, InterruptedException { System.out.println("HADOOP_HOME:"+System.getenv("HADOOP_HOME")); //1配置对象 Configuration conf=new Configuration(); conf.set("fs.defaultFS","hdfs://10.202.234.244:9000"); //2创建任务对象 Job job=Job.getInstance(conf,"wc"); //2.1设置map和reduce作为两个阶段处理任务类 job.setMapperClass(WCMapper.class); job.setReducerClass(WCReducer.class); //2.2设置map和reduce的输出kv job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); //最后输出结果的key和value job.setOutputValueClass(Text.class); job.setOutputValueClass(IntWritable.class); //2.3设置reduce的个数,默认1 job.setNumReduceTasks(2); //2.4设置输入和输出路径 //本地区分 //FileInputFormat.addInputPath(job,new Path("hdfs://10.202.234.244:9000/wordcount/inputpeiyajie/1.txt")); //输出文件必须是不存在的 // FileOutputFormat.setOutputPath(job,new Path("hdfs://10.202.234.244:9000/wordcount/mapreduceFile")); FileInputFormat.addInputPath(job,new Path("file:\\C:\\Users\\peiyajie\\Desktop\\1.txt")); //输出文件必须是不存在的 FileOutputFormat.setOutputPath(job,new Path("file:\\C:\\Users\\peiyajie\\Desktop\\3")); //也可以配置是否产生jar。 //2.5提交任务,等待程序执行完毕,返回是否成功 boolean b=job.waitForCompletion(true); System.exit(b?0:-1); }