pom.xml
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.imooc.bigdata</groupId>
<artifactId>storm</artifactId>
<version>1.0</version>
<packaging>jar</packaging>
<name>storm</name>
<url>http://maven.apache.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<storm.version>1.1.1</storm.version>
<hadoop.version>2.9.0</hadoop.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-core</artifactId>
<version>${storm.version}</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>log4j-over-slf4j</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.4</version>
</dependency>
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-hbase</artifactId>
<version>${storm.version}</version>
<exclusions>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>16.0.1</version>
</dependency>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-client</artifactId>
<version>2.12.0</version>
</dependency>
</dependencies>
</project>
代码
package com.imooc.bigdata.integration.hbase;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.hbase.bolt.HBaseBolt;
import org.apache.storm.hbase.bolt.mapper.SimpleHBaseMapper;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import org.apache.storm.utils.Utils;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
/**
* 使用Storm完成词频统计功能
*/
public class LocalWordCountHBaseStormTopology {
public static class DataSourceSpout extends BaseRichSpout {
private SpoutOutputCollector collector;
public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
this.collector = collector;
}
public static final String[] words = new String[]{"apple","orange","pineapple", "banana", "watermelon"};
public void nextTuple() {
Random random = new Random();
String word = words[random.nextInt(words.length)];
this.collector.emit(new Values(word));
System.out.println("emit: " + word);
Utils.sleep(1000);
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("line"));
}
}
/**
* 对数据进行分割
*/
public static class SplitBolt extends BaseRichBolt {
private OutputCollector collector;
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.collector = collector;
}
/**
* 业务逻辑:
*/
public void execute(Tuple input) {
String word = input.getStringByField("line");
this.collector.emit(new Values(word));
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word"));
}
}
/**
* 词频汇总Bolt
*/
public static class CountBolt extends BaseRichBolt {
private OutputCollector collector;
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.collector = collector;
}
Map<String,Integer> map = new HashMap<String, Integer>();
/**
* 业务逻辑:
* 1)获取每个单词
* 2)对所有单词进行汇总
* 3)输出
*/
public void execute(Tuple input) {
// 1)获取每个单词
String word = input.getStringByField("word");
Integer count = map.get(word);
if(count == null) {
count = 0;
}
count ++;
// 2)对所有单词进行汇总
map.put(word, count);
// 3)输出
this.collector.emit(new Values(word, map.get(word)));
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word", "count"));
}
}
public static void main(String[] args) {
Config config = new Config();
Map<String, Object> hbaseConf = new HashMap<String, Object>();
hbaseConf.put("hbase.rootdir","hdfs://hadoopcluster/hbase");
hbaseConf.put("hbase.zookeeper.quorum", "node1:2181,node2:2181,node3:2181");
config.put("hbase.conf",hbaseConf);
// 通过TopologyBuilder根据Spout和Bolt构建Topology
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("DataSourceSpout", new DataSourceSpout());
builder.setBolt("SplitBolt", new SplitBolt()).shuffleGrouping("DataSourceSpout");
builder.setBolt("CountBolt", new CountBolt()).shuffleGrouping("SplitBolt");
SimpleHBaseMapper mapper = new SimpleHBaseMapper()
.withRowKeyField("word")
.withColumnFields(new Fields("word"))
.withCounterFields(new Fields("count"))
.withColumnFamily("cf");
HBaseBolt hbaseBolt = new HBaseBolt("wc", mapper)
.withConfigKey("hbase.conf");//如果没有withConfigKey会报错
builder.setBolt("HBaseBolt", hbaseBolt).shuffleGrouping("CountBolt");
// 创建本地集群
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("LocalWordCountHBaseStormTopology",
config , builder.createTopology());
}
}
启动zookeeper
启动hadoop
启动hbase
启动hbase shell
创建表
create 'wc','cf'
list
IDEA启动程序
报错 这种NoSuchMethodError的错误,要么是调用的方法没引用,要么是引用冲突了。
经检查这里是属于引用冲突了,pom.xml中注释到hadoop-client的引用就好了。
java.lang.NoSuchMethodError: org.apache.hadoop.security.authentication.util.KerberosUtil.hasKerberosTicket(Ljavax/security/auth/Subject;)Z
at org.apache.hadoop.security.UserGroupInformation.<init>(UserGroupInformation.java:666) ~[hadoop-common-2.9.0.jar:?]
at org.apache.hadoop.security.UserGroupInformation.loginUserFromSubject(UserGroupInformation.java:861) ~[hadoop-common-2.9.0.jar:?]
at org.apache.hadoop.security.UserGroupInformation.getLoginUser(UserGroupInformation.java:820) ~[hadoop-common-2.9.0.jar:?]
at org.apache.hadoop.security.UserGroupInformation.getCurrentUser(UserGroupInformation.java:689) ~[hadoop-common-2.9.0.jar:?]
at org.apache.hadoop.hbase.security.User$SecureHadoopUser.<init>(User.java:285) ~[hbase-common-1.1.0.jar:1.1.0]
at org.apache.hadoop.hbase.security.User$SecureHadoopUser.<init>(User.java:281) ~[hbase-common-1.1.0.jar:1.1.0]
at org.apache.hadoop.hbase.security.User.getCurrent(User.java:185) ~[hbase-common-1.1.0.jar:1.1.0]
at org.apache.hadoop.hbase.security.UserProvider.getCurrent(UserProvider.java:88) ~[hbase-common-1.1.0.jar:1.1.0]
at org.apache.storm.hbase.common.HBaseClient.<init>(HBaseClient.java:43) ~[storm-hbase-1.1.1.jar:1.1.1]
at org.apache.storm.hbase.bolt.AbstractHBaseBolt.prepare(AbstractHBaseBolt.java:75) ~[storm-hbase-1.1.1.jar:1.1.1]
at org.apache.storm.hbase.bolt.HBaseBolt.prepare(HBaseBolt.java:109) ~[storm-hbase-1.1.1.jar:1.1.1]
at org.apache.storm.daemon.executor$fn__5030$fn__5043.invoke(executor.clj:793) ~[storm-core-1.1.1.jar:1.1.1]
at org.apache.storm.util$async_loop$fn__557.invoke(util.clj:482) [storm-core-1.1.1.jar:1.1.1]
at clojure.lang.AFn.run(AFn.java:22) [clojure-1.7.0.jar:?]
at java.lang.Thread.run(Thread.java:745) [?:1.8.0_91]
查看hbase