MapReduce 求每行平均值求标准差----MapReduce案例--迭代器只能使用一次的处理方法

最新推荐文章于 2022-06-11 00:21:15 发布

lnchengzi

最新推荐文章于 2022-06-11 00:21:15 发布

阅读量774

点赞数

CC 4.0 BY-SA版权

分类专栏： Hadoop 文章标签： hadoop MapReduce 平均值标准差

本文链接：https://blog.youkuaiyun.com/qq_38689179/article/details/84350698

Hadoop 专栏收录该内容

2 篇文章

订阅专栏

一、Data数据显示一部分数据量太大无法显示

-161.8 -231 -279 -20 -268 -370 -50 -455 -105 -377 -479 -182 -46 -224 66 -305 -350 -211 
245 489 374 681 183 406 438 171 380 167 33 270 213 396 198 282 60 492 446 541 4796.7 
458 643 -51.6 -6 -114 -73 -311 -6 -32 -105 -10 5 -69 -83 -94 -91 -6 -59 -241 2 -28 -111

二、Data数据处理

1、使用java代码给每行数据添加行号

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
public class AddLine
{
	public static void main(String[] args)
	{
		File file = new File("data.txt");
		try {
			BufferedReader br = new BufferedReader(new FileReader("data.txt"));
			String temp = null;
			StringBuffer sb = new StringBuffer();
			int i = 1;
			while((temp = br.readLine()) != null )
			{
				temp = i + " " + temp;
				i++;
				sb.append(temp + "\r\n");
			}
			FileOutputStream fos = new FileOutputStream(file);
			fos.write(sb.toString().getBytes());
		} catch (FileNotFoundException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
}

处理结果为显示一部分数据量太大无法显示

1 -161.8 -231 -279 -20 -268 -370 -50 -455 -105 -377 -479 -182 -46 -224 66 -305 -350 -211 
2 245 489 374 681 183 406 438 171 380 167 33 270 213 396 198 282 60 492 446 541 4796.7 
3 458 643 -51.6 -6 -114 -73 -311 -6 -32 -105 -10 5 -69 -83 -94 -91 -6 -59 -241 2 -28 -111

2、不实际修改Data数据使用代码实现添加行号 indexline自增写入

三、代码实现求行平均值--标准差--显示原始数据-- 未添加行号


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class MyMain {

	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		Job job = Job.getInstance(conf, "JobName");
		job.setJarByClass(MyMain.class);
		// TODO: specify a mapper
		job.setMapperClass(MyMapper.class);
		// TODO: specify a reducer
		job.setReducerClass(MyReducer.class);

		// TODO: specify output types
		job.setOutputKeyClass(IntWritable.class);
		job.setOutputValueClass(DoubleWritable.class);

		// TODO: specify input and output DIRECTORIES (not files)
		FileInputFormat.setInputPaths(job, new Path("/lqc_Data_Avg/input"));
		FileOutputFormat.setOutputPath(job, new Path("/lqc_Data_Avg/output"));

		if (!job.waitForCompletion(true))
			return;
	}

}

MyMapper写法方式一


import java.io.IOException;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;


public class MyMapper extends Mapper<LongWritable, Text, IntWritable, DoubleWritable> {
	int indexline=1;
	public void map(LongWritable ikey, Text ivalue, Context context) throws IOException, InterruptedException {
		String line = ivalue.toString();
		String[] datas=line.split(" ");
		
		
		/**
		 * the method is not add line
		 */
		for(String data:datas) {
			context.write(new IntWritable(indexline), new DoubleWritable(Double.parseDouble(data)));
			
		}
		indexline++;
	}

}

MyMapper写法方式二

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;


public class MyMapper extends Mapper<LongWritable, Text, IntWritable, DoubleWritable> {
	public void map(LongWritable ikey, Text ivalue, Context context) throws IOException, InterruptedException {
		/**
		 * the method not line
		 */
		String line = ivalue.toString();
		StringTokenizer token = new StringTokenizer(line);
		int index = Integer.parseInt(token.nextToken());
		while(token.hasMoreTokens()) {
			context.write(new IntWritable(index), new DoubleWritable(Double.parseDouble(token.nextToken())));
		}
	
	}

}

注意迭代器只能运行一次可以使用List<T> catch = new ArrayList<T>();

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class MyReducer extends Reducer<IntWritable, DoubleWritable, IntWritable, Text> {

	public void reduce(IntWritable _key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException {
		// process values
		double sum=0;
		int index=0;
		List<Double> sourceData = new ArrayList<Double>();
//		double[] sourceData= new double[72];
		String strSourceData="";
		for (DoubleWritable val : values) {
//			sourceData[index]=val.get();
			sourceData.add(val.get());
			strSourceData += val.get()+" ";
			sum+=val.get();
			index++;
		}
		double Avg = sum/index;
		double Xi=0;
		for (double val : sourceData) {
			 Xi += Math.pow((val-Avg), 2);
		}
		double SD = Math.sqrt(Xi/index);
		String out=strSourceData+" "+"Avg: "+Avg+" SD: "+SD;
		context.write(_key, new Text(out));
	}

}

三、代码实现求行平均值----数据处理过已经在Data数据上添加行号

MyMain和MyReduce和上面相同

import java.io.IOException;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;


public class MyMapper extends Mapper<LongWritable, Text, IntWritable, DoubleWritable> {
	public void map(LongWritable ikey, Text ivalue, Context context) throws IOException, InterruptedException {
		
		String line = ivalue.toString();
		String[] datas = line.split(" ");
		
		/**
		 * the method is add line
		 */
		for(int i=1;i<datas.length;i++) {
			context.write(new IntWritable(Integer.parseInt(datas[0])), new DoubleWritable(Double.parseDouble(datas[i])));
		}
		
	}

}

四、结果显示前部分省略需要源码可以下载

1  xxx  -17.0 -138.0 -109.0  Avg: -184.3861111111111 SD: 153.5995020477638
2  xxx  42.0 207.0 166.0  Avg: 53.99722222222223 SD: 115.17834235584182
3  xxx  -114.0 9.0 113.0 28.0  Avg: 23.897222222222222 SD: 52.15638868990874

源码下载-Data_Avg_SD