Hadoop运行一个job

最新推荐文章于 2021-08-12 01:46:12 发布

原创最新推荐文章于 2021-08-12 01:46:12 发布 · 1k 阅读

0 ·

CC 4.0 BY-SA版权

云计算专栏收录该内容

5 篇文章

订阅专栏

本文介绍了如何在Eclipse中安装hadoop-eclipse-plugin插件，配置Hadoop Map/Reduce，以及创建和运行MapReduce程序来实现倒排索引。详细步骤包括插件安装、设置Hadoop目录、新建MapReduce工程以及编写和理解代码。

一：安装eclipse插件

要想在eclipse上进行map-reduce编程，需要安装一个插件，hadoop-eclipse-plugin-1.0.0.jar，可以在这里下载

将插件拷贝到eclipse的plugins目录里即可。打开Eclipse，windows–preferences选择Hadoop Map/Reduce填写hadoop的安装目录

Eclipse所在的机器可以是hadoop集群中的任意一个节点

显示map-reduce视图：windows–show views–others 选择map-reduce，此时我们可以在Eclipse输出栏看到map-reduce视图

右键选择new hadoop location

填入配置文件中的主机名和端口号，到此就配置完了，可以在Eclipse上看到hdfs文件了，用Eclipse可以方便的上传删除查看文件等等

如果你安装插件出现问题，那么最可能的原因就是hadoop安装目录没有填写正确

二：编写Map-Reduce程序

新建map-reduce工程，src目录里新建class文件，工程名和类名一样

实现一个倒排索引

输入：

输出：

思路：

代码：

/**
 * map_reduce code 
 * @author wyp
 *
 */
 
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
 
 
 
public class Test_Phonenum extends Configured implements Tool{
 
    enum Counter{
         
        LINESKIP,
    }
     
    public static class Map extends Mapper<LongWritable, Text, Text, Text>
    {
        public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException
        {
            String line = value.toString();             
            try
            {
                 
                String [] lineSplit = line.split(" ");
                String anum = lineSplit[0];
                String bnum = lineSplit[1];
                 
                context.write(new Text(bnum), new Text(anum));     
            }
            catch(java.lang.ArrayIndexOutOfBoundsException e)
            {
                context.getCounter(Counter.LINESKIP).increment(1);      
                return;
            }
 
        }
    }
 
    public static class Reduce extends Reducer<Text, Text, Text, Text>
    {
        public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException
        {
            String valueString;
            String out = "";
             
            for(Text value:values)
            {
                valueString  = value.toString();
                out += valueString + "|";
            }
             
            context.write(key, new Text(out));
        }
    }
     
    public int run(String[] args) throws Exception
    {
        Configuration conf = getConf();
         
        Job job = new Job(conf, "WYP_Test_2");     
         
        FileInputFormat.addInputPath(job, new Path(args[0]));      
        FileOutputFormat.setOutputPath(job, new Path(args[1]));    
         
        job.setMapperClass(Map.class);                      
        job.setReducerClass(Reduce.class);                  
         
        job.setOutputFormatClass(TextOutputFormat.class);
        job.setOutputKeyClass(Text.class);             
        job.waitForCompletion(true);
         
        return job.isSuccessful() ? 0 : 1;
    }
     
    public static void main(String[] args) throws Exception{
         
        int res = ToolRunner.run(new Configuration(), new Test_Phonenum(), args);
        System.exit(res);
 
    }
 
}