Hadoop 2.6 使用Map Reduce实现矩阵相乘2 矩阵相乘

最新推荐文章于 2018-03-13 17:48:11 发布

土豆拍死马铃薯

最新推荐文章于 2018-03-13 17:48:11 发布

阅读量1.2k

点赞数 2

CC 4.0 BY-SA版权

分类专栏：大数据文章标签： Hadoop MapReduce 矩阵相乘矩阵转置

本文链接：https://blog.youkuaiyun.com/csj941227/article/details/78191578

大数据专栏收录该内容

81 篇文章

订阅专栏

本文介绍了一种利用Hadoop实现大规模矩阵乘法的方法。通过将右侧矩阵转置并缓存到Hadoop中，对左侧矩阵进行MapReduce操作，最终得到结果矩阵。详细解释了Mapper和Reducer的工作流程。

项目地址：https://github.com/tudoupaisimalingshu/hadoop_matrix

一、首先将右侧矩阵进行转置（点击打开）

二、然后进行相乘运算

1、将右侧转置矩阵缓存到Hadoop中

2、对左侧矩阵进行map操作

将左侧矩阵的每一行line，进行拆分，拆分成列（分量）数组，然后与缓存中的每一行进行相乘运算，然后将结果和对应的位置写回

3、对结果进行拼接，形成结果矩阵的物理存储

package hadoop;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;

import hadoop.Step1.Mapper1;
import hadoop.Step1.Reducer1;

public class Step2 {
	public static class Mapper2 extends Mapper<LongWritable,Text,Text,Text>
	{
		private Text outKey = new Text();
		private Text outValue = new Text();
		private List<String> cacheList = new ArrayList<String>();
		
		
		/***
		 * 	将保存右侧矩阵的文件缓存到内存中，每一行为一个字符串，是所有行构成list
		 */
		@Override
		protected void setup(Context context)
				throws IOException, InterruptedException {
			FileReader fr = new FileReader("matrix2");
			//从缓存中拿到matrix2（main方法中设置）
			BufferedReader br = new BufferedReader(fr);
			String line = null;
			while((line = br.readLine()) != null)
			{
				cacheList.add(line);
			}
			fr.close();
			br.close();
		}
		
		
		/*	左侧矩阵逻辑形式
		 * 1	2	-2	0
		 * 3	3	4	-3
		 * -2	0	2	3
		 * 5	3	-1	2
		 * -4	2	0	2
		 * 左侧矩阵物理形式
		 * 1	1_1,2_2,3_-2,4_0
		 * 2	1_3,2_3,3_4,4_-3
		 * 3	1_-2,2_0,3_2,4_3
		 * 4	1_5,2_3,3_-1,4_2
		 * 5	1_-4,2_2,3_0,4_2
		 * 
		 * 右侧矩阵（已转置）物理形式
		 *  1	3_0,1_0,4_-2,2_1
			2	3_1,4_2,2_3,1_3
			3	4_-1,1_-1,3_4,2_5
			4	1_2,3_-1,4_1,2_-2
			5	4_2,3_2,1_-3,2_-1
			
			key: "1"
			value: "1	1_1,2_2,3_-2,4_0"
		 * */
		@Override
		protected void map(LongWritable key, Text value, Context context)
				throws IOException, InterruptedException {
			String[] rowAndline = value.toString().split("\t");
			//获得行号
			//rowAndline : {"1","1_1,2_2,3_-2,4_0"}
			String row_matrix1 = rowAndline[0];
			//row_matrix1 ："1"
			String[] column_value_array_matrix1 = rowAndline[1].split(",");
			//获得各列
			//rowAndline[1] ： "1_1,2_2,3_-2,4_0"
			//column_value_array_matrix1 : {"1_1","2_2","3_-2","4_0"}
			for(String line : cacheList)// 以line:"3		4_-1,1_-1,3_4,2_5"为例
			{
				String[] rowAndline2 = line.toString().split("\t");
				//rowAndline2 : {"3","4_-1,1_-1,3_4,2_5"}
				String row_matrix2 = rowAndline2[0];
				//获得转置矩阵line行的行号（原右矩阵的列号）
				String[] column_value_array_matrix2 = rowAndline2[1].split(",");
				//rowAndline2[1] : "4_-1,1_-1,3_4,2_5"
				//column_value_array_matrix2 : {"4_-1","1,-1","3_4","2_5"}
				int result = 0;
				//保存成绩累加结果
				for(String column_value_matrix1 : column_value_array_matrix1)//对于左侧矩阵line行的每一列(分量) "1_1","2_2","3_-2","4_0"
				{
					String column_maxtrix1 = column_value_matrix1.split("_")[0];
					//获得列号
					String value_matrix1 = column_value_matrix1.split("_")[1];
					//获得该列的值
					
					for(String column_value_matrix2 : column_value_array_matrix2)//对于右侧矩阵的line行的每一列(分量) "4_-1","1,-1","3_4","2_5"
					{
						String column_maxtrix2 = column_value_matrix2.split("_")[0];
						//获得列号
						String value_matrix2 = column_value_matrix2.split("_")[1];
						//获得该列的值
						
						if(column_maxtrix2.equals(column_maxtrix1))//这里也体现了为什么要标明列号，只有列号明确且相等，才证明是同一个位置的分量
						{
							result += Integer.valueOf(value_matrix1) * Integer.valueOf(value_matrix2);
							//result += 1 * (-1)
							//result += 2 * 5
							//result += -2 * 4
							//result += 0 * (-1)
						}
					}
				}
				outKey.set(row_matrix1);//输出的key值设置为左侧矩阵的行号
				outValue.set(row_matrix2 + "_" +result);//输出的value值设置为右侧转置矩阵的行号(实际矩阵的列号)_该位置的值
				context.write(outKey, outValue);
				//("1","3_1")
			}
			//("1","2_7")("1,"3_1")("1","2_4")("1","4_0")("1","5_9")
			//("2","1_9")...
			//....
		}
	}
	
	
	public static class Reducer2 extends Reducer<Text,Text,Text,Text>
	{
		private Text outKey = new Text();
		private Text outValue = new Text();
		
		/**
		 * 将map产生的key-value对进行组合，拼接成结果矩阵的物理形式
		 * ("1","2_7")("1,"3_1")("1","2_4")("1","4_0")("1","5_9")
		 * ("2","1_9")...
		 * ...
		 * 对于key值相同的元素("1","2_7")("1,"3_1")("1","2_4")("1","4_0")("1","5_9")
		 * 会将其组合
		 * key : "1"
		 * values : {"2_7","3_1","2_4","4_0","5_9"}
		 *
		 */
		@Override
		protected void reduce(Text key, Iterable<Text> values, Context context)
				throws IOException, InterruptedException {
			
			StringBuilder sb = new StringBuilder();
			for(Text text : values)
			{
				sb.append(text + ",");
			}
			// sb : "2_7,3_1,2_4,4_0,5_9,"
			String line = "";
			if(sb.toString().endsWith(","))
			{
				line = sb.substring(0,sb.length()-1);
			}
			//line :"2_7,3_1,2_4,4_0,5_9"
			outKey.set(key);
			outValue.set(line);
			context.write(outKey, outValue);
			// ("1","2_7,3_1,2_4,4_0,5_9")
		}
		
	}
	
	
	private static final String INPATH = "input/matrix.txt";
	private static final String OUTPATH = "hdfs://pc1:9000/output/step2_3";
	
	private static final String CACHE = "hdfs://pc1:9000/cache/matrix.txt";
	private static final String HDFS = "hdfs://pc1:9000";
	
	public void run() throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {
		 Configuration conf = new Configuration();
		    //String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
		    //String[] otherArgs = {"hdfs://pc1:9000/input/chenjie.txt","hdfs://pc1:9000/output/out4"};
		    String[] otherArgs = {INPATH,OUTPATH};
		    //这里需要配置参数即输入和输出的HDFS的文件路径
		    if (otherArgs.length != 2) {
		      System.err.println("Usage: wordcount <in> <out>");
		      System.exit(2);
		    }
		    //conf.set("fs.defaultFS",HDFS);
		   // JobConf conf1 = new JobConf(WordCount.class);
		    Job job = new Job(conf, "step2");//Job(Configuration conf, String jobName) 设置job名称和
		    job.setJarByClass(Step2.class);
		    job.setMapperClass(Mapper2.class); //为job设置Mapper类 
		    //job.setCombinerClass(IntSumReducer.class); //为job设置Combiner类  
		    job.setReducerClass(Reducer2.class); //为job设置Reduce类 

		    job.addCacheArchive(new URI(CACHE + "#matrix2"));
		    
		    job.setMapOutputKeyClass(Text.class);  
		    job.setMapOutputValueClass(Text.class); 

		    job.setOutputKeyClass(Text.class);        //设置输出key的类型
		    job.setOutputValueClass(Text.class);//  设置输出value的类型

		    job.setOutputFormatClass(SequenceFileOutputFormat.class);
		    FileInputFormat.addInputPath(job, new Path(otherArgs[0])); //为map-reduce任务设置InputFormat实现类   设置输入路径

		    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));//为map-reduce任务设置OutputFormat实现类  设置输出路径
		    System.exit(job.waitForCompletion(true) ? 0 : 1);
		
		
		/*Configuration conf = new Configuration();
		conf.set("fs.defaultFS",HDFS);
		Job job = Job.getInstance(conf,"step1");
		job.setJarByClass(Step1.class);
		job.setMapperClass(Mapper1.class);
		job.setReducerClass(Reducer1.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		FileSystem fs = FileSystem.get(conf);
		Path inPath = new Path(INPATH);
		if(fs.exists(inPath))
		{
			//FileInputFormat.addInputPath(conf, inPath);
		}
		Path outPath = new Path(OUTPATH);
		if(fs.exists(outPath))
		{
			fs.delete(outPath, true);
		}*/
		
	}
	
	public static void main(String[] args)
	{
		try {
			new Step2().run();
		} catch (ClassNotFoundException | IOException | InterruptedException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (URISyntaxException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
}

三、运行结果