package sitesh; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; import java.util.Iterator; public class OneJoin { public static int time = 0; //map将输入分割成child和parent,然后正序输出一次作为右表,反序输出一次作为左表 //需要注意的是在输出的value中必须加上左右表区别标志 public static class OneJoinMap extends Mapper<Object, Text, Text, Text> { public void map(Object key, Text value, Context context) throws IOException, InterruptedException { String childname = new String(); String parentname = new String(); String relationtype = new String();//标识 String line = value.toString(); int i = 0; //文件以空格分隔 while (line.charAt(i) != ' ') { i++; } //拆分child 和 parent String[] values = {line.substring(0, i), line.substring(i + 1)}; if (values[0].compareTo("child") != 0) { childname = values[0]; parentname = values[1]; //左右表区分标志 relationtype = "1"; context.write(new Text(values[1]), new Text(relationtype + "+" + childname + "+" + parentname)); relationtype = "2"; context.write(new Text(values[0]), new Text(relationtype + "+" + childname + "+" + parentname)); } } } public static class OneJoinReduce extends Reducer<Text, Text, Text, Text> { public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { //输出表头 if (time == 0) { context.write(new Text("grandchild"), new Text("grandparent")); time++; } int grandchildnum = 0; String grandchild[] = new String[10]; int grandparentnum = 0; String grandparent[] = new String[10]; Iterator ite = values.iterator(); while (ite.hasNext()) { String record = ite.next().toString(); int len = record.length(); int i = 2; if (len == 0) { continue; } //取得左右表标识 char relationtype = record.charAt(0); //定义孩子和父母的变量 String childname = new String(); String parentname = new String(); //获取value_list 当中的value的child while (record.charAt(i) != '+') { childname = childname + record.charAt(i); i++; } i = i + 1; //获取value_list当中value的parent while (i < len) { parentname = parentname + record.charAt(i); i++; } if (relationtype == '1') { grandchild[grandchildnum] = childname; ; grandchildnum++; } else { grandparent[grandparentnum] = parentname; grandparentnum++; } } if (grandparentnum != 0 && grandchildnum != 0) { for (int m = 0; m < grandchildnum; m++) { for (int n = 0; n < grandparentnum; n++) { System.out.println(grandchild[m] + " " + grandparent[n]); context.write(new Text(grandchild[m]), new Text(grandparent[n])); } } } } } public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf=new Configuration(); //设置压缩格式 conf.set("mapreduce.output.fileoutputformat.compress", "false"); //设置内存 conf.set("mapreduce.map.memory.mb", "2048"); Job job= Job.getInstance(conf); // Path inPath=new Path(args[0]); Path outPath=new Path(args[1]); FileSystem fs=FileSystem.get(conf); if(fs.exists(outPath)){ fs.delete(outPath,true); } job.setJarByClass(OneJoin.class); job.setMapperClass(OneJoinMap.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); // job.setReducerClass(OneJoinReduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // FileInputFormat.addInputPath(job,inPath); FileOutputFormat.setOutputPath(job,outPath); job.waitForCompletion(true); } }