hadoop读取数据库信息的尝试

最新推荐文章于 2022-12-14 11:51:51 发布

brightming

最新推荐文章于 2022-12-14 11:51:51 发布

阅读量650

点赞数

分类专栏： hadoop 文章标签： hadoop DBInputForma 编程

本文链接：https://blog.youkuaiyun.com/brightming/article/details/28399113

版权

hadoop 专栏收录该内容

4 篇文章

订阅专栏

写一个简单的读取数据库某个表的信息的hadoop程序：

读取某个表，然后把信息输出到某个文件。

package db;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Iterator;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.mapred.lib.db.DBInputFormat;
import org.apache.hadoop.mapred.lib.db.DBWritable;

public class DbInputTest {

public static class Map extends MapReduceBase implements
Mapper<LongWritable, DBWritable, Text, IntWritable> {

@Override
public void map(LongWritable key, DBWritable value,
OutputCollector<Text, IntWritable> output, Reporter reporter)
throws IOException {
System.out.println("--in map , key =" + key.toString() + ",value="
+ value.toString());
TreeNodeRecord v=(TreeNodeRecord)value;
output.collect(new Text(v.treeNodeName),new IntWritable(v.treeId));
}
}

public static class Reduce extends MapReduceBase implements
Reducer<Text, IntWritable, Text, IntWritable> {

@Override
public void reduce(Text arg0, Iterator<IntWritable> arg1,
OutputCollector<Text, IntWritable> collector, Reporter arg3)
throws IOException {

while(arg1.hasNext()){
collector.collect(arg0, arg1.next());
}
}

}

public static class TreeNodeRecord implements Writable,DBWritable{

int treeId;
String treeNodeName;
@Override
public void write(PreparedStatement statement) throws SQLException {
// TODO Auto-generated method stub
statement.setInt(1, this.treeId);
statement.setString(2, this.treeNodeName);
}

@Override
public void readFields(ResultSet resultSet) throws SQLException {
// TODO Auto-generated method stub
this.treeId=resultSet.getInt(1);
this.treeNodeName=resultSet.getString(2);
}

@Override
public void readFields(DataInput arg0) throws IOException {
// TODO Auto-generated method stub
this.treeId=arg0.readInt();
this.treeNodeName=Text.readString(arg0);
}

@Override
public void write(DataOutput arg0) throws IOException {
// TODO Auto-generated method stub
arg0.write(this.treeId);
Text.writeString(arg0, this.treeNodeName);
}

public int getTreeId() {
return treeId;
}

public void setTreeId(int treeId) {
this.treeId = treeId;
}

public String getTreeNodeName() {
return treeNodeName;
}

public void setTreeNodeName(String treeNodeName) {
this.treeNodeName = treeNodeName;
}

}

public static void main(String[] args) throws IOException {
Configuration conf = new Configuration();
JobConf jobConf = new JobConf(conf);
jobConf.set("mapred.jdbc.driver.class", "com.mysql.jdbc.Driver");
jobConf.set("mapred.jdbc.url", "jdbc:mysql://192.168.1.239:3306/test");
jobConf.set("mapred.jdbc.username", "abcdefg");
jobConf.set("mapred.jdbc.password", "123456");
jobConf.set("mapred.jdbc.input.table.name", "treenode");

jobConf.setJobName("db input format test");

jobConf.setJarByClass(DbInputTest.class);
jobConf.setMapperClass(Map.class);
jobConf.setMapOutputKeyClass(Text.class);
jobConf.setMapOutputValueClass(IntWritable.class);

jobConf.setReducerClass(Reduce.class);
jobConf.setOutputKeyClass(Text.class);
jobConf.setOutputValueClass(IntWritable.class);

// jobConf.setCombinerClass(Reduce.class);

jobConf.setInputFormat(DBInputFormat.class);
jobConf.setOutputFormat(TextOutputFormat.class);

FileInputFormat.setInputPaths(jobConf, new Path(args[0]));
FileOutputFormat.setOutputPath(jobConf, new Path(args[1]));

// DBConfiguration.configureDB(jobConf, "com.mysql.jdbc.Driver",
// "jdbc:mysql://192.168.3.244:3306/hadoop", "hua", "hadoop");
String[] fields = { "treeId", "treeNodeName" };
DBInputFormat.setInput(jobConf, TreeNodeRecord.class, "treenode",
null, "treeId", fields);

JobClient.runJob(jobConf);
}
}

将以上程序打好jar包，将使用到的mysql的驱动，放到了hadoop目录的lib下。

运行：

[hadoop@hadoop1 java]$ hadoop jar DbInputTest.jar /home/hadoop/java /ouptty2
Warning: $HADOOP_HOME is deprecated.

14/06/03 02:48:31 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments.

Applications should implement Tool for the same.
14/06/03 02:48:41 INFO mapred.JobClient: Running job: job_201405220115_0009
14/06/03 02:48:42 INFO mapred.JobClient: map 0% reduce 0%
14/06/03 02:49:12 INFO mapred.JobClient: map 50% reduce 0%
14/06/03 02:49:15 INFO mapred.JobClient: map 100% reduce 0%
14/06/03 02:49:33 INFO mapred.JobClient: map 100% reduce 100%
14/06/03 02:49:38 INFO mapred.JobClient: Job complete: job_201405220115_0009
14/06/03 02:49:39 INFO mapred.JobClient: Counters: 29
14/06/03 02:49:39 INFO mapred.JobClient: Map-Reduce Framework
14/06/03 02:49:39 INFO mapred.JobClient: Spilled Records=12
14/06/03 02:49:39 INFO mapred.JobClient: Map output materialized bytes=167
14/06/03 02:49:39 INFO mapred.JobClient: Reduce input records=6
14/06/03 02:49:39 INFO mapred.JobClient: Virtual memory (bytes) snapshot=945438720
14/06/03 02:49:39 INFO mapred.JobClient: Map input records=6
14/06/03 02:49:39 INFO mapred.JobClient: SPLIT_RAW_BYTES=150
14/06/03 02:49:39 INFO mapred.JobClient: Map output bytes=143
14/06/03 02:49:39 INFO mapred.JobClient: Reduce shuffle bytes=167
14/06/03 02:49:39 INFO mapred.JobClient: Physical memory (bytes) snapshot=319463424
14/06/03 02:49:39 INFO mapred.JobClient: Map input bytes=6
14/06/03 02:49:39 INFO mapred.JobClient: Reduce input groups=6
14/06/03 02:49:39 INFO mapred.JobClient: Combine output records=0
14/06/03 02:49:39 INFO mapred.JobClient: Reduce output records=6
14/06/03 02:49:39 INFO mapred.JobClient: Map output records=6
14/06/03 02:49:39 INFO mapred.JobClient: Combine input records=0
14/06/03 02:49:39 INFO mapred.JobClient: CPU time spent (ms)=4360
14/06/03 02:49:39 INFO mapred.JobClient: Total committed heap usage (bytes)=415510528
14/06/03 02:49:39 INFO mapred.JobClient: File Input Format Counters
14/06/03 02:49:39 INFO mapred.JobClient: Bytes Read=0
14/06/03 02:49:39 INFO mapred.JobClient: FileSystemCounters
14/06/03 02:49:39 INFO mapred.JobClient: HDFS_BYTES_READ=150
14/06/03 02:49:39 INFO mapred.JobClient: FILE_BYTES_WRITTEN=67245
14/06/03 02:49:39 INFO mapred.JobClient: FILE_BYTES_READ=161
14/06/03 02:49:39 INFO mapred.JobClient: HDFS_BYTES_WRITTEN=131
14/06/03 02:49:39 INFO mapred.JobClient: File Output Format Counters
14/06/03 02:49:39 INFO mapred.JobClient: Bytes Written=131
14/06/03 02:49:39 INFO mapred.JobClient: Job Counters
14/06/03 02:49:39 INFO mapred.JobClient: Launched map tasks=2
14/06/03 02:49:39 INFO mapred.JobClient: Launched reduce tasks=1
14/06/03 02:49:39 INFO mapred.JobClient: SLOTS_MILLIS_REDUCES=18138
14/06/03 02:49:39 INFO mapred.JobClient: Total time spent by all reduces waiting after

reserving slots (ms)=0
14/06/03 02:49:39 INFO mapred.JobClient: SLOTS_MILLIS_MAPS=47273
14/06/03 02:49:39 INFO mapred.JobClient: Total time spent by all maps waiting after reserving

slots (ms)=0

可以查看到，输出目录的内容。