WCToMySQL
WCToMySQL
package a.b.c;
import java.beans.Statement;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.lib.db.DBOutputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
//适合结果集比较小的数据,比如PV UV等数据,存储到mysql,用于快速查询
//写自定义类DataTable,继承DBWritable,Writable,为了完成数据序列化。
//DBOutFormat: 提供数据库写入接口。
//DBRecordWriter:提供向数据库中写入的数据记录的接口。
//DBConfiguration:提供数据库配置和创建链接的接口。:提供数据库配置和创建链接的接口。
class DBWrite implements Writable,DBWritable{
String words;
int count;
public DBWrite(String words, int count) {
// TODO Auto-generated constructor stub
this.words=words;
this.count=count;
}
//write和readFields主要实现了把对象序列化成byte数组并写入到DataOutput中
//从DataInput中读取byte数组并反序列化成对象。这样实现了一个自定义的Writable。
@Override
public void write(PreparedStatement statement) throws SQLException {
// TODO Auto-generated method stub
statement.setString(1, this.words);
statement.setInt(2, this.count);
}
@Override
public void readFields(ResultSet resultSet) throws SQLException {
// TODO Auto-generated method stub
this.words=resultSet.getString(1);
this.count=resultSet.getInt(2);
}
//写入所有字段
@Override
public void write(DataOutput output) throws IOException {
// TODO Auto-generated method stub
output.writeUTF(words);
output.writeInt(count);
//读出所有字段
}
@Override
public void readFields(DataInput input) throws IOException {
// TODO Auto-generated method stub
words=input.readUTF();
count=input.readInt();
}
}
class WCMapper extends Mapper<LongWritable, Text, Text, IntWritable>{
@Override
protected void map(LongWritable key, Text value,
Context context)
throws IOException, InterruptedException {
String line=value.toString();
String[] wordsStrings=line.split(",");
for(String wordsString:wordsStrings ){
context.write(new Text(wordsString),new IntWritable(1));
}
}
}
class WCReducer extends Reducer<Text, IntWritable,DBWrite,DBWrite>{
@Override
protected void reduce(Text key, Iterable<IntWritable> val,Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
int count=0;
for(IntWritable word:val){
count+=word.get();
}
context.write(new DBWrite(key.toString(), count),null);
//输出value为null就可以,因为入库的操作都在key里完成了
}
}
public class WCToMySQL {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration configuration=new Configuration();
DBConfiguration.configureDB(configuration, "com.mysql.jdbc.Driver", "jdbc:mysql://192.168.2.10:3306/hadoop", "root", "root");
//新建任务
Job job=Job.getInstance(configuration,"WCToMySQL");
job.setJarByClass(WCToMySQL.class);
//输入格式
job.setInputFormatClass(TextInputFormat.class);
//Mapper
job.setMapperClass(WCMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
//Reducer
job.setReducerClass(WCReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
//输入路径
FileInputFormat.addInputPath(job, new Path(args[0]));
//输出格式
// DBOutputFormat.setOutput(job, "数据库表名", "字段1","字段2");
job.setOutputFormatClass(DBOutputFormat.class);
DBOutputFormat.setOutput(job, "wc", "words", "count");
//设置
job.addArchiveToClassPath(new Path("/lib/mysql-connector-java-5.1.26-bin.jar"));
//提交任务
job.waitForCompletion(true);
}
}