创建weblog项目,要保证此电脑的文件里面有vm文件在E盘,以及里面有weblogs文件
package com.example.weblog;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class WebLogDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
//设置用户名:
//1.获取job对象
Configuration conf = new Configuration();
// conf.set("fs.defaultFS", "hdfs://hadoop100:8020");
Job job = Job.getInstance(conf);
//2.关联啊本地Driver类的jar
job.setJarByClass(WebLogDriver.class);
//3.关联map和reduce
job.setMapperClass(WeblogMapper.class);
//4.设置map的输出kv类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
//5.设置map的输出kv类型
//6.设置输入数据和输出结果的地址
FileInputFormat.setInputPaths(job, new Path("E:\\vm\\weblogs"));
FileOutputFormat.setOutputPath(job, new Path("E:\\vm\\weblog_out1112"));
//7.提交job
boolean result = job.waitForCompletion(true);
System.exit(result ? 0 : 1);
}
}
package com.example.weblog;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
//1.继承 Mapper类
//2.重写map方法
public class WeblogMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//每一行的文本内容,使用空格做拆分,得到一个列表
System.out.println(value);
//1获取一行数据,使用空格拆分,得到字段数组
String[] words = value.toString().split("\\s+");
System.out.println(words.length);
System.out.println("=========================");
//2如果有6个字段我们就保留这条数组
if (words.length == 9) {
context.write(value, NullWritable.get());
}
}
}
package com.example.weblog;
public class WeblogReducer {
}