一、Data数据 显示一部分 数据量太大无法显示
-161.8 -231 -279 -20 -268 -370 -50 -455 -105 -377 -479 -182 -46 -224 66 -305 -350 -211
245 489 374 681 183 406 438 171 380 167 33 270 213 396 198 282 60 492 446 541 4796.7
458 643 -51.6 -6 -114 -73 -311 -6 -32 -105 -10 5 -69 -83 -94 -91 -6 -59 -241 2 -28 -111
二、Data数据处理
1、使用java代码给每行数据添加行号
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
public class AddLine
{
public static void main(String[] args)
{
File file = new File("data.txt");
try {
BufferedReader br = new BufferedReader(new FileReader("data.txt"));
String temp = null;
StringBuffer sb = new StringBuffer();
int i = 1;
while((temp = br.readLine()) != null )
{
temp = i + " " + temp;
i++;
sb.append(temp + "\r\n");
}
FileOutputStream fos = new FileOutputStream(file);
fos.write(sb.toString().getBytes());
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
处理结果为 显示一部分 数据量太大无法显示
1 -161.8 -231 -279 -20 -268 -370 -50 -455 -105 -377 -479 -182 -46 -224 66 -305 -350 -211
2 245 489 374 681 183 406 438 171 380 167 33 270 213 396 198 282 60 492 446 541 4796.7
3 458 643 -51.6 -6 -114 -73 -311 -6 -32 -105 -10 5 -69 -83 -94 -91 -6 -59 -241 2 -28 -111
2、不实际修改Data数据 使用代码实现添加行号 indexline自增写入
三、代码实现求行平均值--标准差--显示原始数据-- 未添加行号
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class MyMain {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "JobName");
job.setJarByClass(MyMain.class);
// TODO: specify a mapper
job.setMapperClass(MyMapper.class);
// TODO: specify a reducer
job.setReducerClass(MyReducer.class);
// TODO: specify output types
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(DoubleWritable.class);
// TODO: specify input and output DIRECTORIES (not files)
FileInputFormat.setInputPaths(job, new Path("/lqc_Data_Avg/input"));
FileOutputFormat.setOutputPath(job, new Path("/lqc_Data_Avg/output"));
if (!job.waitForCompletion(true))
return;
}
}
MyMapper写法 方式一
import java.io.IOException;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class MyMapper extends Mapper<LongWritable, Text, IntWritable, DoubleWritable> {
int indexline=1;
public void map(LongWritable ikey, Text ivalue, Context context) throws IOException, InterruptedException {
String line = ivalue.toString();
String[] datas=line.split(" ");
/**
* the method is not add line
*/
for(String data:datas) {
context.write(new IntWritable(indexline), new DoubleWritable(Double.parseDouble(data)));
}
indexline++;
}
}
MyMapper写法 方式二
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class MyMapper extends Mapper<LongWritable, Text, IntWritable, DoubleWritable> {
public void map(LongWritable ikey, Text ivalue, Context context) throws IOException, InterruptedException {
/**
* the method not line
*/
String line = ivalue.toString();
StringTokenizer token = new StringTokenizer(line);
int index = Integer.parseInt(token.nextToken());
while(token.hasMoreTokens()) {
context.write(new IntWritable(index), new DoubleWritable(Double.parseDouble(token.nextToken())));
}
}
}
注意 迭代器只能运行一次 可以使用List<T> catch = new ArrayList<T>();
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class MyReducer extends Reducer<IntWritable, DoubleWritable, IntWritable, Text> {
public void reduce(IntWritable _key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException {
// process values
double sum=0;
int index=0;
List<Double> sourceData = new ArrayList<Double>();
// double[] sourceData= new double[72];
String strSourceData="";
for (DoubleWritable val : values) {
// sourceData[index]=val.get();
sourceData.add(val.get());
strSourceData += val.get()+" ";
sum+=val.get();
index++;
}
double Avg = sum/index;
double Xi=0;
for (double val : sourceData) {
Xi += Math.pow((val-Avg), 2);
}
double SD = Math.sqrt(Xi/index);
String out=strSourceData+" "+"Avg: "+Avg+" SD: "+SD;
context.write(_key, new Text(out));
}
}
三、代码实现求行平均值----数据处理过 已经在Data数据上添加行号
MyMain和MyReduce和上面相同
import java.io.IOException;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class MyMapper extends Mapper<LongWritable, Text, IntWritable, DoubleWritable> {
public void map(LongWritable ikey, Text ivalue, Context context) throws IOException, InterruptedException {
String line = ivalue.toString();
String[] datas = line.split(" ");
/**
* the method is add line
*/
for(int i=1;i<datas.length;i++) {
context.write(new IntWritable(Integer.parseInt(datas[0])), new DoubleWritable(Double.parseDouble(datas[i])));
}
}
}
四、结果显示 前部分省略 需要源码可以下载
1 xxx -17.0 -138.0 -109.0 Avg: -184.3861111111111 SD: 153.5995020477638
2 xxx 42.0 207.0 166.0 Avg: 53.99722222222223 SD: 115.17834235584182
3 xxx -114.0 9.0 113.0 28.0 Avg: 23.897222222222222 SD: 52.15638868990874