hadoop读取数据库信息的尝试


写一个简单的读取数据库某个表的信息的hadoop程序:

读取某个表,然后把信息输出到某个文件。


package db;


import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Iterator;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.mapred.lib.db.DBInputFormat;
import org.apache.hadoop.mapred.lib.db.DBWritable;




public class DbInputTest {


public static class Map extends MapReduceBase implements
Mapper<LongWritable, DBWritable, Text, IntWritable> {


@Override
public void map(LongWritable key, DBWritable value,
OutputCollector<Text, IntWritable> output, Reporter reporter)
throws IOException {
System.out.println("--in map , key =" + key.toString() + ",value="
+ value.toString());
TreeNodeRecord v=(TreeNodeRecord)value;
output.collect(new Text(v.treeNodeName),new IntWritable(v.treeId));
}
}


public static class Reduce extends MapReduceBase implements
Reducer<Text, IntWritable, Text, IntWritable> {



@Override
public void reduce(Text arg0, Iterator<IntWritable> arg1,
OutputCollector<Text, IntWritable> collector, Reporter arg3)
throws IOException {


while(arg1.hasNext()){
collector.collect(arg0,  arg1.next());
}
}


}


public static class TreeNodeRecord implements Writable,DBWritable{


int treeId;
String treeNodeName;
@Override
public void write(PreparedStatement statement) throws SQLException {
// TODO Auto-generated method stub
statement.setInt(1, this.treeId);
statement.setString(2, this.treeNodeName);
}


@Override
public void readFields(ResultSet resultSet) throws SQLException {
// TODO Auto-generated method stub
this.treeId=resultSet.getInt(1);
this.treeNodeName=resultSet.getString(2);
}


@Override
public void readFields(DataInput arg0) throws IOException {
// TODO Auto-generated method stub
this.treeId=arg0.readInt();
this.treeNodeName=Text.readString(arg0);
}


@Override
public void write(DataOutput arg0) throws IOException {
// TODO Auto-generated method stub
arg0.write(this.treeId);
Text.writeString(arg0, this.treeNodeName);
}


public int getTreeId() {
return treeId;
}


public void setTreeId(int treeId) {
this.treeId = treeId;
}


public String getTreeNodeName() {
return treeNodeName;
}


public void setTreeNodeName(String treeNodeName) {
this.treeNodeName = treeNodeName;
}


}

public static void main(String[] args) throws IOException {
Configuration conf = new Configuration();
JobConf jobConf = new JobConf(conf);
jobConf.set("mapred.jdbc.driver.class", "com.mysql.jdbc.Driver");
jobConf.set("mapred.jdbc.url", "jdbc:mysql://192.168.1.239:3306/test");
jobConf.set("mapred.jdbc.username", "abcdefg");
jobConf.set("mapred.jdbc.password", "123456");
jobConf.set("mapred.jdbc.input.table.name", "treenode");




jobConf.setJobName("db input format test");


jobConf.setJarByClass(DbInputTest.class);
jobConf.setMapperClass(Map.class);
jobConf.setMapOutputKeyClass(Text.class);
jobConf.setMapOutputValueClass(IntWritable.class);


jobConf.setReducerClass(Reduce.class);
jobConf.setOutputKeyClass(Text.class);
jobConf.setOutputValueClass(IntWritable.class);


// jobConf.setCombinerClass(Reduce.class);


jobConf.setInputFormat(DBInputFormat.class);
jobConf.setOutputFormat(TextOutputFormat.class);


FileInputFormat.setInputPaths(jobConf, new Path(args[0]));
FileOutputFormat.setOutputPath(jobConf, new Path(args[1]));



// DBConfiguration.configureDB(jobConf, "com.mysql.jdbc.Driver",  
//          "jdbc:mysql://192.168.3.244:3306/hadoop", "hua", "hadoop");  
    String[] fields = { "treeId", "treeNodeName" };  
DBInputFormat.setInput(jobConf, TreeNodeRecord.class, "treenode",  
null, "treeId", fields);  
    
JobClient.runJob(jobConf);
}
}


将以上程序打好jar包,将使用到的mysql的驱动,放到了hadoop目录的lib下。

运行:

[hadoop@hadoop1 java]$ hadoop jar DbInputTest.jar  /home/hadoop/java  /ouptty2
Warning: $HADOOP_HOME is deprecated.


14/06/03 02:48:31 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. 


Applications should implement Tool for the same.
14/06/03 02:48:41 INFO mapred.JobClient: Running job: job_201405220115_0009
14/06/03 02:48:42 INFO mapred.JobClient:  map 0% reduce 0%
14/06/03 02:49:12 INFO mapred.JobClient:  map 50% reduce 0%
14/06/03 02:49:15 INFO mapred.JobClient:  map 100% reduce 0%
14/06/03 02:49:33 INFO mapred.JobClient:  map 100% reduce 100%
14/06/03 02:49:38 INFO mapred.JobClient: Job complete: job_201405220115_0009
14/06/03 02:49:39 INFO mapred.JobClient: Counters: 29
14/06/03 02:49:39 INFO mapred.JobClient:   Map-Reduce Framework
14/06/03 02:49:39 INFO mapred.JobClient:     Spilled Records=12
14/06/03 02:49:39 INFO mapred.JobClient:     Map output materialized bytes=167
14/06/03 02:49:39 INFO mapred.JobClient:     Reduce input records=6
14/06/03 02:49:39 INFO mapred.JobClient:     Virtual memory (bytes) snapshot=945438720
14/06/03 02:49:39 INFO mapred.JobClient:     Map input records=6
14/06/03 02:49:39 INFO mapred.JobClient:     SPLIT_RAW_BYTES=150
14/06/03 02:49:39 INFO mapred.JobClient:     Map output bytes=143
14/06/03 02:49:39 INFO mapred.JobClient:     Reduce shuffle bytes=167
14/06/03 02:49:39 INFO mapred.JobClient:     Physical memory (bytes) snapshot=319463424
14/06/03 02:49:39 INFO mapred.JobClient:     Map input bytes=6
14/06/03 02:49:39 INFO mapred.JobClient:     Reduce input groups=6
14/06/03 02:49:39 INFO mapred.JobClient:     Combine output records=0
14/06/03 02:49:39 INFO mapred.JobClient:     Reduce output records=6
14/06/03 02:49:39 INFO mapred.JobClient:     Map output records=6
14/06/03 02:49:39 INFO mapred.JobClient:     Combine input records=0
14/06/03 02:49:39 INFO mapred.JobClient:     CPU time spent (ms)=4360
14/06/03 02:49:39 INFO mapred.JobClient:     Total committed heap usage (bytes)=415510528
14/06/03 02:49:39 INFO mapred.JobClient:   File Input Format Counters 
14/06/03 02:49:39 INFO mapred.JobClient:     Bytes Read=0
14/06/03 02:49:39 INFO mapred.JobClient:   FileSystemCounters
14/06/03 02:49:39 INFO mapred.JobClient:     HDFS_BYTES_READ=150
14/06/03 02:49:39 INFO mapred.JobClient:     FILE_BYTES_WRITTEN=67245
14/06/03 02:49:39 INFO mapred.JobClient:     FILE_BYTES_READ=161
14/06/03 02:49:39 INFO mapred.JobClient:     HDFS_BYTES_WRITTEN=131
14/06/03 02:49:39 INFO mapred.JobClient:   File Output Format Counters 
14/06/03 02:49:39 INFO mapred.JobClient:     Bytes Written=131
14/06/03 02:49:39 INFO mapred.JobClient:   Job Counters 
14/06/03 02:49:39 INFO mapred.JobClient:     Launched map tasks=2
14/06/03 02:49:39 INFO mapred.JobClient:     Launched reduce tasks=1
14/06/03 02:49:39 INFO mapred.JobClient:     SLOTS_MILLIS_REDUCES=18138
14/06/03 02:49:39 INFO mapred.JobClient:     Total time spent by all reduces waiting after 


reserving slots (ms)=0
14/06/03 02:49:39 INFO mapred.JobClient:     SLOTS_MILLIS_MAPS=47273
14/06/03 02:49:39 INFO mapred.JobClient:     Total time spent by all maps waiting after reserving 


slots (ms)=0

可以查看到,输出目录的内容。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值