hadoop上传下载文件源码

最新推荐文章于 2025-04-13 12:41:41 发布

vicky_arg

最新推荐文章于 2025-04-13 12:41:41 发布

阅读量479

点赞数 1

本文链接：https://blog.youkuaiyun.com/PYJcsdn/article/details/107386608

版权

本文详细介绍如何使用Hadoop进行文件操作，包括创建文件夹、上传文件、读取文件、列出文件、删除文件，以及通过MapReduce进行单词计数的完整过程。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

1.首先pom.xml新增：

<dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-common</artifactId>
    <version>3.2.1</version>
</dependency>
<dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-hdfs</artifactId>
    <version>3.2.1</version>
</dependency>
<dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-client</artifactId>
    <version>3.2.1</version>
</dependency>

2.在配置文件里面新增

hdfs.path=hdfs://10.202.234.244:9000
hdfs.username=peiyajie

3.创建工具类

@Component
public class HadoopUtil {
    @Value("${hdfs.path}")
    private String path;
    @Value("${hdfs.username}")
    private String username;
    private static String hdfsPath;
    private static String hdfsName;

    /**
     * 获取HDFS的配置信息
     * @return
     */
    private static Configuration getConfiguration(){
        Configuration configuration=new Configuration();
        configuration.set("fs.defaultFS",hdfsPath);
        return configuration;
    }
    /**
     * 获取HDFS文件系统对象
     *
     */
    public static FileSystem getFileSystem() throws URISyntaxException, IOException, InterruptedException {
        System.out.println("hdfsPath:"+hdfsPath);
        System.out.println("hdfsName:"+hdfsName);
        FileSystem fileSystem=FileSystem.get(new URI(hdfsPath),getConfiguration(),hdfsName);
        return fileSystem;
    }
   @PostConstruct
   //Constructor >> @Autowired >> @PostConstruct.在初始话数据的时候有用，简单，干净
    public void getPath() {
       hdfsPath=this.path;
    }
    @PostConstruct
    public void getUsername() {
        hdfsName=this.username;
    }
    public static String getHdfsPath() {
        return hdfsPath;
    }
    public static String getHdfsName() {
        return hdfsName;
    }
}

4.上代码

/**
     * 创建文件夹
     */
    @PostMapping("/mkdir")
    ///wordcount/inputpeiyajie
    public String mkdir(String path) throws URISyntaxException, IOException, InterruptedException {
      //文件对象
        FileSystem fs= getFileSystem();
        //创建目标路径
        Path newPath=new Path(path);
        //创建空文件夹
        boolean isOk=fs.mkdirs(newPath);
        fs.close();
        if(isOk){
           return "创建成功";
        }else{
            return "创建失败";
        }
    }

    @PostMapping("/createFile")
    public String createFile(String path, MultipartFile file) throws InterruptedException, IOException, URISyntaxException {
       ///wordcount/inputpeiyajie
        String fileName=file.getOriginalFilename();
        FileSystem fs= getFileSystem();
        //上传时默认当前目录，自动拼接文档
        Path newPath=new Path(path+"/"+fileName);
        //打开一个输出流
        FSDataOutputStream outputStream=fs.create(newPath);
        outputStream.write(file.getBytes());
        outputStream.close();
        fs.close();
        return "创建文件成功";
    }
    ///wordcount/inputpeiyajie/新建文本文档.txt
//https://blog.youkuaiyun.com/zxl646801924/article/details/84615604
    @PostMapping("/readFile")
    public String readFile(@RequestParam("path") String  path) throws InterruptedException, IOException, URISyntaxException {
      FileSystem fs= getFileSystem();
      Path newPath=new Path(path);
      FSDataInputStream inputStream= fs.open(newPath);
  /*    BufferedReader bf=new BufferedReader(new InputStreamReader(inputStream));//防止中文乱码
        String line=null;
        while((line=bf.readLine())!=null){
            System.out.println(line);
        }*/
      IOUtils.copyBytes(inputStream,System.out,4096);
      IOUtils.closeStream(inputStream);
        inputStream.close();
        //bf.close();
      fs.close();
      return "读文件成功";
    }


    //读取demo下面所有文件
    @PostMapping("/ListFile")
    public String ListFile(@RequestParam("path") String  path) throws InterruptedException, IOException, URISyntaxException {
          FileSystem fs=HadoopUtil.getFileSystem();
          Path newPath=new Path(path);
          //递归寻找所有文件
        RemoteIterator<LocatedFileStatus> fileLists=fs.listFiles(newPath,true);
        List<Map<String,String>> returnList=new ArrayList<Map<String,String>>();
        while(fileLists.hasNext()){
            LocatedFileStatus next= fileLists.next();
            String fileName=next.getPath().getName();
            Path filePath=next.getPath();
            Map<String,String> map=new HashMap<String,String>();
            map.put("fileName",fileName);
            map.put("filePath",filePath.toString());
            System.out.println("map:"+map);
            returnList.add(map);
        }
        return "查询所有文件";
    }


    //读取demo下面所有文件
    @PostMapping("/deleteFile")
    public String deleteFile(@RequestParam("path") String  path) throws InterruptedException, IOException, URISyntaxException {
        FileSystem fs = HadoopUtil.getFileSystem();
        Path newPath = new Path(path);
        boolean isOk=fs.deleteOnExit(newPath);
        fs.close();
        return "删除文件成功";
    }


    @PostMapping("/uploadFile")
    public String uploadFile(@RequestParam("path") String  path,@RequestParam("uploadPath") String  uploadPath) throws InterruptedException, IOException, URISyntaxException {
        FileSystem fs = HadoopUtil.getFileSystem();
        Path newPath = new Path(path);
        Path newUploadPath = new Path(uploadPath);
        fs.copyFromLocalFile(false,newPath,newUploadPath);
        fs.close();
        return "上传文件成功";
    }

    @PostMapping("/downloadFile")
    public String downloadFile(@RequestParam("path") String  path,@RequestParam("downloadPath") String  downloadPath) throws InterruptedException, IOException, URISyntaxException {
        FileSystem fs = HadoopUtil.getFileSystem();
        Path newPath = new Path(path);
        Path newDownloadPath = new Path(downloadPath);
        fs.copyToLocalFile(false,newPath,newDownloadPath);
        fs.close();
        return "下载文件成功";
    }
注意：如果下载的过程中出现了 HADOOP_HOME and hadoop.home.dir are unset,那么就说明你没有配置windows本地的hadoop环境变量.
你可能会想我是远程调用linux下的hadoop,与我本地的hadoop有什么关系?如果你的操作只对远程的hadoop生效,如上传,创建目录,文件改名(写)等那么你是不需要在windows本地配置hadoop的,
可一旦涉及到下载(读),hadoop内部的缓存机制要求本地也必须有hadoop,于是也会出现HADOOP_HOME and hadoop.home.dir are unset,解决办法配置HADOOP_HOME并加入%HADOOP_HOME%\bin到PATH中,
之后测试下hadoop version命令,有效的话重启你的eclipse/myeclipse,但这还不够,windows下的hadoop还需要winutils.exe,否则会报Could not locate Hadoop executable: xxxx\winutils.exe
下载：https://github.com/cdarlint/winutils/tree/master/hadoop-3.2.1/bin
   把winutils.exe和hadoop.dll放入hadoop的bin文件夹里面
系统配置
在path里面新增%HADOOP_HOME%\bin
系统变量里面新增HADOOP_HOME
HADOOP_HOME  E:\software\hadoop-3.2.1


5.利用mapreduce统计结果存放在

package com.qihoo.hadoop.util;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class WCMapper extends Mapper<LongWritable, Text, Text, IntWritable> {


    protected void map(LongWritable key, Text value, Context context)
            throws IOException, InterruptedException {
           //多个空格
          String [] ws=value.toString().split("\\s+");
          for(String word:ws){
              //单词
              context.write(new Text(word),new IntWritable(1));
          }
    }
}

package com.qihoo.hadoop.util;


import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;


public class WCReducer extends Reducer<Text, IntWritable, Text, IntWritable> {

    @Override
    protected void reduce(Text key, Iterable<IntWritable> values,
                          Context context) throws IOException, InterruptedException {
        int sum = 0;
        for (IntWritable val : values) {
            sum ++;
        }
        context.write(key, new IntWritable(sum));
    }

}

 //单词统计测试hadoop内部的缓存机制要求本地也必须有hadoop
  @Test
public void  wordCount() throws IOException, ClassNotFoundException, InterruptedException {
      System.out.println("HADOOP_HOME:"+System.getenv("HADOOP_HOME"));
    //1配置对象
    Configuration conf=new Configuration();
      conf.set("fs.defaultFS","hdfs://10.202.234.244:9000");
    //2创建任务对象
    Job job=Job.getInstance(conf,"wc");
    //2.1设置map和reduce作为两个阶段处理任务类
    job.setMapperClass(WCMapper.class);
    job.setReducerClass(WCReducer.class);
    //2.2设置map和reduce的输出kv
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    //最后输出结果的key和value
    job.setOutputValueClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    //2.3设置reduce的个数，默认1
    job.setNumReduceTasks(2);
    //2.4设置输入和输出路径
      //本地区分
    //FileInputFormat.addInputPath(job,new Path("hdfs://10.202.234.244:9000/wordcount/inputpeiyajie/1.txt"));
   //输出文件必须是不存在的
   // FileOutputFormat.setOutputPath(job,new Path("hdfs://10.202.234.244:9000/wordcount/mapreduceFile"));

      FileInputFormat.addInputPath(job,new Path("file:\\C:\\Users\\peiyajie\\Desktop\\1.txt"));
      //输出文件必须是不存在的
      FileOutputFormat.setOutputPath(job,new Path("file:\\C:\\Users\\peiyajie\\Desktop\\3"));
      //也可以配置是否产生jar。


      //2.5提交任务，等待程序执行完毕，返回是否成功
    boolean b=job.waitForCompletion(true);
    System.exit(b?0:-1);
}