package sitesh; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; import java.util.ArrayList; public class TwoJoin { public static class TowJoinMap extends Mapper<Object, Text, Text, Text> { @Override protected void map(Object key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); if (line.contains("name") == true || line.contains("id") == true) { return; } String words[] = line.split(","); String flag = new String(); if (words[0].length() == 1) { flag = "2"; context.write(new Text(words[0]), new Text(flag + "|" + words[1])); } else if (words[0].length() > 1) { flag = "1"; context.write(new Text(words[1]), new Text(flag + "|" + words[0])); } } } public static class TwoJoinReduce extends Reducer<Text, Text, Text, Text> { private int time = 0; @Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { if (0 == time) { context.write(new Text("name"), new Text("id")); time++; } /*int namenum = 0; String[] name = new String[10]; int addresnum = 0; String[] addres = new String[10]; for (Text value : values) { String[] str2 = value.toString().split("\\+"); if (str2[0].compareTo("1") == 0) { name[namenum] = str2[1]; namenum++; } if (str2[0].compareTo("2") == 0) { addres[addresnum] = str2[1]; addresnum++; } } if (0 != namenum && 0 != addresnum) { for (int m = 0; m < namenum; m++) { for (int n = 0; n < addresnum; n++) { // 输出结果 context.write(new Text(name[m]), new Text(addres[n])); } } }*/ //reduce两种形式 ArrayList<String> left=new ArrayList<String>(); ArrayList<String> right=new ArrayList<String>(); for(Text v:values){ if(v.toString().contains("1")){ left.add(v.toString().split("|")[1]);//city名 }else{ right.add(v.toString().split("|")[1]);//工厂名 } } for(int i=0;i<left.size();i++){//left 城市名 for(int j=0;j<right.size();j++){ //right 工厂名 context.write(new Text(right.get(j)), new Text(left.get(i))); } //工厂名+ city } } } public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); //设置压缩格式 conf.set("mapreduce.output.fileoutputformat.compress", "false"); //设置内存 conf.set("mapreduce.map.memory.mb", "2048"); Job job = Job.getInstance(conf); job.setJarByClass(TwoJoin.class); job.setMapperClass(TowJoinMap.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); // job.setReducerClass(TwoJoinReduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // Path inPath = new Path(args[0]); FileInputFormat.addInputPath(job, inPath); Path outPath = new Path(args[1]); FileSystem fs = FileSystem.get(conf); if (fs.exists(outPath)) { fs.delete(outPath, true); } FileOutputFormat.setOutputPath(job, outPath); job.waitForCompletion(true); } }
MapReduce 两个表关联
最新推荐文章于 2024-07-29 09:02:49 发布