写一个简单的读取数据库某个表的信息的hadoop程序:
读取某个表,然后把信息输出到某个文件。
package db;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.mapred.lib.db.DBInputFormat;
import org.apache.hadoop.mapred.lib.db.DBWritable;
public class DbInputTest {
public static class Map extends MapReduceBase implements
Mapper<LongWritable, DBWritable, Text, IntWritable> {
@Override
public void map(LongWritable key, DBWritable value,
OutputCollector<Text, IntWritable> output, Reporter reporter)
throws IOException {
System.out.println("--in map , key =" + key.toString() + ",value="
+ value.toString());
TreeNodeRecord v=(TreeNodeRecord)value;
output.collect(new Text(v.treeNodeName),new IntWritable(v.treeId));
}
}
public static class Reduce extends MapReduceBase implements
Reducer<Text, IntWritable, Text, IntWritable> {
@Override
public void reduce(Text arg0, Iterator<IntWritable> arg1,
OutputCollector<Text, IntWritable> collector, Reporter arg3)
throws IOException {
while(arg1.hasNext()){
collector.collect(arg0, arg1.next());
}
}
}
public static class TreeNodeRecord implements Writable,DBWritable{
int treeId;
String treeNodeName;
@Override
public void write(PreparedStatement statement) throws SQLException {
// TODO Auto-generated method stub
statement.setInt(1, this.treeId);
statement.setString(2, this.treeNodeName);
}
@Override
public void readFields(ResultSet resultSet) throws SQLException {
// TODO Auto-generated method stub
this.treeId=resultSet.getInt(1);
this.treeNodeName=resultSet.getString(2);
}
@Override
public void readFields(DataInput arg0) throws IOException {
// TODO Auto-generated method stub
this.treeId=arg0.readInt();
this.treeNodeName=Text.readString(arg0);
}
@Override
public void write(DataOutput arg0) throws IOException {
// TODO Auto-generated method stub
arg0.write(this.treeId);
Text.writeString(arg0, this.treeNodeName);
}
public int getTreeId() {
return treeId;
}
public void setTreeId(int treeId) {
this.treeId = treeId;
}
public String getTreeNodeName() {
return treeNodeName;
}
public void setTreeNodeName(String treeNodeName) {
this.treeNodeName = treeNodeName;
}
}
public static void main(String[] args) throws IOException {
Configuration conf = new Configuration();
JobConf jobConf = new JobConf(conf);
jobConf.set("mapred.jdbc.driver.class", "com.mysql.jdbc.Driver");
jobConf.set("mapred.jdbc.url", "jdbc:mysql://192.168.1.239:3306/test");
jobConf.set("mapred.jdbc.username", "abcdefg");
jobConf.set("mapred.jdbc.password", "123456");
jobConf.set("mapred.jdbc.input.table.name", "treenode");
jobConf.setJobName("db input format test");
jobConf.setJarByClass(DbInputTest.class);
jobConf.setMapperClass(Map.class);
jobConf.setMapOutputKeyClass(Text.class);
jobConf.setMapOutputValueClass(IntWritable.class);
jobConf.setReducerClass(Reduce.class);
jobConf.setOutputKeyClass(Text.class);
jobConf.setOutputValueClass(IntWritable.class);
// jobConf.setCombinerClass(Reduce.class);
jobConf.setInputFormat(DBInputFormat.class);
jobConf.setOutputFormat(TextOutputFormat.class);
FileInputFormat.setInputPaths(jobConf, new Path(args[0]));
FileOutputFormat.setOutputPath(jobConf, new Path(args[1]));
// DBConfiguration.configureDB(jobConf, "com.mysql.jdbc.Driver",
// "jdbc:mysql://192.168.3.244:3306/hadoop", "hua", "hadoop");
String[] fields = { "treeId", "treeNodeName" };
DBInputFormat.setInput(jobConf, TreeNodeRecord.class, "treenode",
null, "treeId", fields);
JobClient.runJob(jobConf);
}
}
将以上程序打好jar包,将使用到的mysql的驱动,放到了hadoop目录的lib下。
运行:
[hadoop@hadoop1 java]$ hadoop jar DbInputTest.jar /home/hadoop/java /ouptty2Warning: $HADOOP_HOME is deprecated.
14/06/03 02:48:31 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments.
Applications should implement Tool for the same.
14/06/03 02:48:41 INFO mapred.JobClient: Running job: job_201405220115_0009
14/06/03 02:48:42 INFO mapred.JobClient: map 0% reduce 0%
14/06/03 02:49:12 INFO mapred.JobClient: map 50% reduce 0%
14/06/03 02:49:15 INFO mapred.JobClient: map 100% reduce 0%
14/06/03 02:49:33 INFO mapred.JobClient: map 100% reduce 100%
14/06/03 02:49:38 INFO mapred.JobClient: Job complete: job_201405220115_0009
14/06/03 02:49:39 INFO mapred.JobClient: Counters: 29
14/06/03 02:49:39 INFO mapred.JobClient: Map-Reduce Framework
14/06/03 02:49:39 INFO mapred.JobClient: Spilled Records=12
14/06/03 02:49:39 INFO mapred.JobClient: Map output materialized bytes=167
14/06/03 02:49:39 INFO mapred.JobClient: Reduce input records=6
14/06/03 02:49:39 INFO mapred.JobClient: Virtual memory (bytes) snapshot=945438720
14/06/03 02:49:39 INFO mapred.JobClient: Map input records=6
14/06/03 02:49:39 INFO mapred.JobClient: SPLIT_RAW_BYTES=150
14/06/03 02:49:39 INFO mapred.JobClient: Map output bytes=143
14/06/03 02:49:39 INFO mapred.JobClient: Reduce shuffle bytes=167
14/06/03 02:49:39 INFO mapred.JobClient: Physical memory (bytes) snapshot=319463424
14/06/03 02:49:39 INFO mapred.JobClient: Map input bytes=6
14/06/03 02:49:39 INFO mapred.JobClient: Reduce input groups=6
14/06/03 02:49:39 INFO mapred.JobClient: Combine output records=0
14/06/03 02:49:39 INFO mapred.JobClient: Reduce output records=6
14/06/03 02:49:39 INFO mapred.JobClient: Map output records=6
14/06/03 02:49:39 INFO mapred.JobClient: Combine input records=0
14/06/03 02:49:39 INFO mapred.JobClient: CPU time spent (ms)=4360
14/06/03 02:49:39 INFO mapred.JobClient: Total committed heap usage (bytes)=415510528
14/06/03 02:49:39 INFO mapred.JobClient: File Input Format Counters
14/06/03 02:49:39 INFO mapred.JobClient: Bytes Read=0
14/06/03 02:49:39 INFO mapred.JobClient: FileSystemCounters
14/06/03 02:49:39 INFO mapred.JobClient: HDFS_BYTES_READ=150
14/06/03 02:49:39 INFO mapred.JobClient: FILE_BYTES_WRITTEN=67245
14/06/03 02:49:39 INFO mapred.JobClient: FILE_BYTES_READ=161
14/06/03 02:49:39 INFO mapred.JobClient: HDFS_BYTES_WRITTEN=131
14/06/03 02:49:39 INFO mapred.JobClient: File Output Format Counters
14/06/03 02:49:39 INFO mapred.JobClient: Bytes Written=131
14/06/03 02:49:39 INFO mapred.JobClient: Job Counters
14/06/03 02:49:39 INFO mapred.JobClient: Launched map tasks=2
14/06/03 02:49:39 INFO mapred.JobClient: Launched reduce tasks=1
14/06/03 02:49:39 INFO mapred.JobClient: SLOTS_MILLIS_REDUCES=18138
14/06/03 02:49:39 INFO mapred.JobClient: Total time spent by all reduces waiting after
reserving slots (ms)=0
14/06/03 02:49:39 INFO mapred.JobClient: SLOTS_MILLIS_MAPS=47273
14/06/03 02:49:39 INFO mapred.JobClient: Total time spent by all maps waiting after reserving
slots (ms)=0
可以查看到,输出目录的内容。