MapReduce API

WCToMySQL

WCToMySQL
package a.b.c;

import java.beans.Statement;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.lib.db.DBOutputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;

//适合结果集比较小的数据,比如PV UV等数据,存储到mysql,用于快速查询
//写自定义类DataTable,继承DBWritable,Writable,为了完成数据序列化。
//DBOutFormat: 提供数据库写入接口。
//DBRecordWriter:提供向数据库中写入的数据记录的接口。
//DBConfiguration:提供数据库配置和创建链接的接口。:提供数据库配置和创建链接的接口。

class DBWrite implements Writable,DBWritable{
String words;
int count;
	
	public DBWrite(String words, int count) {
		// TODO Auto-generated constructor stub
		this.words=words;
		this.count=count;
	}
	

//write和readFields主要实现了把对象序列化成byte数组并写入到DataOutput中
//从DataInput中读取byte数组并反序列化成对象。这样实现了一个自定义的Writable。


@Override
	public void write(PreparedStatement statement) throws SQLException {
	
		// TODO Auto-generated method stub
	statement.setString(1, this.words);
	statement.setInt(2, this.count);	
		
	}

@Override
	public void readFields(ResultSet resultSet) throws SQLException {
		// TODO Auto-generated method stub
	this.words=resultSet.getString(1);
	this.count=resultSet.getInt(2);
		
	}

	//写入所有字段
@Override
	public void write(DataOutput output) throws IOException {
		// TODO Auto-generated method stub
	output.writeUTF(words);
	output.writeInt(count);
		
	//读出所有字段
	}
@Override
	public void readFields(DataInput input) throws IOException {
		// TODO Auto-generated method stub
		words=input.readUTF();
		count=input.readInt();
	}

}

class WCMapper extends Mapper<LongWritable, Text, Text, IntWritable>{
	@Override
	protected void map(LongWritable key, Text value,
			Context context)
			throws IOException, InterruptedException {
		String line=value.toString();
		String[] wordsStrings=line.split(",");
		for(String wordsString:wordsStrings ){
			context.write(new Text(wordsString),new IntWritable(1));
		}

	}
	
}

class WCReducer extends Reducer<Text, IntWritable,DBWrite,DBWrite>{
	@Override
	protected void reduce(Text key, Iterable<IntWritable> val,Context context)
			throws IOException, InterruptedException {
		// TODO Auto-generated method stub
		int count=0;
		for(IntWritable word:val){
			count+=word.get();	
		}
		context.write(new DBWrite(key.toString(), count),null);
		//输出value为null就可以,因为入库的操作都在key里完成了
	}
	
}


public class WCToMySQL {
	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
		Configuration configuration=new Configuration();
		DBConfiguration.configureDB(configuration, "com.mysql.jdbc.Driver",  "jdbc:mysql://192.168.2.10:3306/hadoop", "root", "root");
		
		//新建任务
		Job job=Job.getInstance(configuration,"WCToMySQL");
		job.setJarByClass(WCToMySQL.class);

	
		//输入格式
		job.setInputFormatClass(TextInputFormat.class);
		
		//Mapper
		job.setMapperClass(WCMapper.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);
		
		//Reducer
		job.setReducerClass(WCReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		
		//输入路径
        FileInputFormat.addInputPath(job, new Path(args[0]));

		
		//输出格式
		// DBOutputFormat.setOutput(job, "数据库表名", "字段1","字段2");
		job.setOutputFormatClass(DBOutputFormat.class);
		DBOutputFormat.setOutput(job, "wc", "words", "count");
		
		//设置
		job.addArchiveToClassPath(new Path("/lib/mysql-connector-java-5.1.26-bin.jar"));
		
		//提交任务
		job.waitForCompletion(true);
		
		
	}

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值