Hadoop/MapReduce(单词统计--读写数据库)

本文通过MySQL创建表并插入单词及其出现次数的数据,利用MapReduce技术进行单词统计,并将结果存储到新的表中。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

单词统计

1.  MySQL     

CREATE TABLE `word` (
   `id` int(11) NOT NULL AUTO_INCREMENT,
   `name` varchar(20) NOT NULL COMMENT '单词',
   `count` int(11) NOT NULL DEFAULT '1' COMMENT '次数',
   PRIMARY KEY (`id`)
 ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='单词表'

CREATE TABLE `word2` (
   `id` int(11) NOT NULL AUTO_INCREMENT,
   `name` varchar(20) NOT NULL COMMENT '单词',
   `count` int(11) NOT NULL DEFAULT '1' COMMENT '次数',
   PRIMARY KEY (`id`)
 ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='单词统计表'

INSERT INTO word(NAME) VALUES('JAVA'),('Hadoop'),('Spring'),('Hadoop'),('JAVA'),('JAVA'),('JAVA'),('Spring'),('Mysql'),('Hadoop'),('Mysql'),('Spring'),('Hadoop'),('Mysql'),('Mysql'),('Hadoop');
2. Java

  

public class Demo08 {
	public static class WordRecord implements Writable, DBWritable {
		int count;
		String name;
		public WordRecord() {
		}
		public void readFields(DataInput in) throws IOException {
			this.count = in.readInt();
			this.name = Text.readString(in);
		}		
		public void write(PreparedStatement stmt) throws SQLException {
			stmt.setInt(1, this.count);
			stmt.setString(2, this.name);
		}
		public void readFields(ResultSet result) throws SQLException {
			this.count = result.getInt(1);
			this.name = result.getString(2);
		}
		public void write(DataOutput out) throws IOException {
			out.writeInt(this.count);
			Text.writeString(out, this.name);
		}
	}

	public static class DBInputMapper extends MapReduceBase
			implements Mapper<LongWritable, WordRecord, Text, IntWritable> {
		public void map(LongWritable key, WordRecord value, OutputCollector<Text, IntWritable> collector,
				Reporter reporter) throws IOException {
			collector.collect(new Text(value.name), new IntWritable(value.count));
		}
	}

	public static class MyReducer extends MapReduceBase
			implements Reducer<Text, IntWritable, WordRecord, LongWritable> {
		public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<WordRecord, LongWritable> output,
				Reporter reporter) throws IOException {
			WordRecord r = new WordRecord();
			r.name = key.toString();
			while(values.hasNext()){		
				r.count += values.next().get();
			}
			output.collect(r, new LongWritable(r.count));
		}
	}

	@SuppressWarnings("deprecation")
	public static void main(String[] args) throws IOException {
		JobConf conf = new JobConf(Demo08.class);
               //加载mysql连接jar包
                DistributedCache.addFileToClassPath(new Path("/tool/lib/mysql.jar"), conf);

		conf.setMapOutputKeyClass(Text.class);
		conf.setMapOutputValueClass(IntWritable.class);
		conf.setOutputKeyClass(Text.class);
		conf.setOutputValueClass(IntWritable.class);

		conf.setOutputFormat(DBOutputFormat.class);
		conf.setInputFormat(DBInputFormat.class);

		DBConfiguration.configureDB(conf, "com.mysql.jdbc.Driver", "jdbc:mysql://192.168.1.194:3306/test", "root", "root");
		String[] fields = { "count", "name" };
		DBInputFormat.setInput(conf, WordRecord.class, "word", null, null, fields);
		DBOutputFormat.setOutput(conf, "word2", "count", "name");
		conf.setMapperClass(DBInputMapper.class);
		conf.setReducerClass(MyReducer.class);

		JobClient.runJob(conf);
	}
}


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值