eclipse hadoop wordcount

本文介绍了使用Hadoop实现的单词计数程序,包括Map和Reduce阶段的代码实现及配置细节。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

package org.xunw;
import java.io.IOException;
import java.util.*;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.util.*;
public class WC {
    public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> {
      private final static IntWritable one = new IntWritable(1);
      private Text word = new Text();
      public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
        String line = value.toString();
        StringTokenizer tokenizer = new StringTokenizer(line);
        while (tokenizer.hasMoreTokens()) {
          word.set(tokenizer.nextToken());
          output.collect(word, one);
        }
      }
    }
 
    public static class Reduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> {
      public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
        int sum = 0;
        while (values.hasNext()) {
          sum += values.next().get();
        }
        output.collect(key, new IntWritable(sum));
      }
    }
public static void main(String[] args) throws Exception {
      JobConf conf = new JobConf(WC.class);
      args=new String[]{"hdfs://192.168.1.24:9000/user/xunw/newin","hdfs://192.168.1.24:9000/user/xunw/newout5"};
      conf.setJobName("mywordcount");
      conf.set("mapred.job.tracker", "192.168.1.24:9001");
      conf.setOutputKeyClass(Text.class);
      conf.setOutputValueClass(IntWritable.class);
 
      conf.setMapperClass(Map.class);
      conf.setCombinerClass(Reduce.class);
      conf.setReducerClass(Reduce.class);
      conf.setJarByClass(WC.class);
      conf.set("mapred.jar", "/home/xunw/myHadoop.jar");  //export = 》<span style="font-family: Arial, Helvetica, sans-serif;">/home/xunw/myHadoop.jar</span>
 jar
      conf.setInputFormat(TextInputFormat.class);
      conf.setOutputFormat(TextOutputFormat.class);
      System.out.println(args[0]);
      FileInputFormat.setInputPaths(conf, new Path(args[0]));
      FileOutputFormat.setOutputPath(conf, new Path(args[1])); 
      JobClient.runJob(conf);
    }
 }
 

common.java

package org.xunw;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.*;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
import java.util.Comparator;
class Block implements Writable{
	//private String name = "Block";
	public Block(int _id){
		id = _id;
	}
	public Block(){}
	public int id;
	public void write(DataOutput out) throws IOException{
		out.writeInt(id);
	}
	public void readFields(DataInput in) throws IOException{
		id = in.readInt();
	}
}



public class Common {
	public static void main(String[] args) throws Exception{
		Block b2 = new Block(1024);
		
		FileOutputStream out = new FileOutputStream(new File("test.txt"));
        DataOutputStream dout = new DataOutputStream(out);
        b2.write(dout);
        
        FileInputStream in = new FileInputStream(new File("test.txt"));
        DataInputStream din = new DataInputStream(in);
        
        Block b3 = new Block();
        b3.readFields(din);
        System.out.println(b3.id);       
	}

}

hdfs.java


package org.xunw;
import java.io.IOException;

import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;

import java.io.*;

public class LeanHdfs {
	public static void main(String[] args) throws IOException{
	Configuration conf = new Configuration();
	conf.addResource(new Path("core-site.xml"));
	conf.addResource(new Path("hdfs-site.xml"));
	FileSystem hdfs = FileSystem.get(conf);
	
	//Path src= new Path("/home/xunw/lena1.jpg");
	Path dst = new Path("hdfs://192.168.1.24:9000/user/xunw/lena.jpg");
	
	byte[] buf = new byte[1024];
	//hdfs.copyFromLocalFile(src, dst);sss
	int len = 0 ;
	//FileInputStream fin = new FileInputStream(new File("/home/xunw/data/lena1.jpg"));
	FSDataInputStream fin = hdfs.open(dst);
	
	//FSDataOutputStream fout = hdfs.create(dst);
	FileOutputStream fout = new FileOutputStream(new File("/home/xunw/data/lenaHdfs.jpg"));
	
	while((len=fin.read(buf))!=-1)
		fout.write(buf,0,len);
	
	System.out.println("Upload to"+conf.get("fs.default.name"));
	FileStatus files[] = hdfs.listStatus(dst);
	for(FileStatus file:files){

        System.out.println(file.getPath());

    }
	}

}


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值