access.log->kafka->storm
1.利用shelllogger.sh脚本实时产生日志数据
shelllogger.sh脚本
#!/bin/sh
# start cmd:
# nohup sh shellcrawler.sh >> shellcrawler.log 2>&1 &
# set timer
g_getTime=""
function getTime
{
g_getTime=`date '+%Y%m%d %H:%M:%S'`
}
#getTime && echo "[$g_getTime] [$0:$LINENO:$FUNCNAME] - "
# set function
# start cmd:
# nohup sh shellcrawler.sh >> shellcrawler.log 2>&1 &
# set timer
g_getTime=""
function getTime
{
g_getTime=`date '+%Y%m%d %H:%M:%S'`
}
#getTime && echo "[$g_getTime] [$0:$LINENO:$FUNCNAME] - "
# set function
function crawler
{
int=1
while(( $int<=1000000000 ))
do
log="{ \"time_local\": \"01/Nov/2015:00:01:01 +0800\", \"remote_addr\": \"182.92.77.57\", \"remote_user\": \"-\", \"body_bytes_sent\": \"5760\", \"request_time\": \"0.005\", \"status\": \"200\", \"request\": \"GET /jiayouserver/www/index.php\", \"request_method\": \"GET\", \"http_referrer\": \"-\", \"body_bytes_sent\":\"5760\", \"http_x_forwarded_for\": \"-\", \"http_user_agent\": \"Wget/1.12 (linux-gnu)\" }"
let "int++"
echo $log >> access.log
sleep 1s
#usleep 1000
done
}
# main
{
int=1
while(( $int<=1000000000 ))
do
log="{ \"time_local\": \"01/Nov/2015:00:01:01 +0800\", \"remote_addr\": \"182.92.77.57\", \"remote_user\": \"-\", \"body_bytes_sent\": \"5760\", \"request_time\": \"0.005\", \"status\": \"200\", \"request\": \"GET /jiayouserver/www/index.php\", \"request_method\": \"GET\", \"http_referrer\": \"-\", \"body_bytes_sent\":\"5760\", \"http_x_forwarded_for\": \"-\", \"http_user_agent\": \"Wget/1.12 (linux-gnu)\" }"
let "int++"
echo $log >> access.log
sleep 1s
#usleep 1000
done
}
# main
crawler
2,上传到kafka
bin/kafka-topics.sh --create --zookeeper 192.168.236.129:2181 --replication-factor 1 --partitions 1 --topic topic_access
将日志文件access.log上传到主题topic_access
查看上传结果
3.运行topology-0.0.1.jar
4.NginxTopology代码
package starter.topology.nginx;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.generated.StormTopology;
import backtype.storm.spout.SchemeAsMultiScheme;
import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.topology.base.BaseBasicBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import starter.topology.nginx.bolt.ExtractBolt;
import starter.topology.nginx.bolt.StatisticsBolt;
import starter.topology.nginx.bolt.WritedbBolt;
import starter.util.JConf;
import starter.util.JTimer;
import storm.kafka.BrokerHosts;
import storm.kafka.KafkaSpout;
import storm.kafka.SpoutConfig;
import storm.kafka.StringScheme;
import storm.kafka.ZkHosts;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
public class NginxTopology {
public static void main(String[] args) throws Exception {
/**
* 配置zk信息
*/
String zklist = JConf.getEV("topology", "zookeeper");
String kfka_topic = JConf.getEV("topology", "kfka_topic");
String db_host = JConf.getEV("topology", "db_host");
String db_user = JConf.getEV("topology", "db_user");
String db_passwd = JConf.getEV("topology", "db_passwd");
String db_name = JConf.getEV("topology", "db_name");
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.generated.StormTopology;
import backtype.storm.spout.SchemeAsMultiScheme;
import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.topology.base.BaseBasicBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import starter.topology.nginx.bolt.ExtractBolt;
import starter.topology.nginx.bolt.StatisticsBolt;
import starter.topology.nginx.bolt.WritedbBolt;
import starter.util.JConf;
import starter.util.JTimer;
import storm.kafka.BrokerHosts;
import storm.kafka.KafkaSpout;
import storm.kafka.SpoutConfig;
import storm.kafka.StringScheme;
import storm.kafka.ZkHosts;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
public class NginxTopology {
public static void main(String[] args) throws Exception {
/**
* 配置zk信息
*/
String zklist = JConf.getEV("topology", "zookeeper");
String kfka_topic = JConf.getEV("topology", "kfka_topic");
String db_host = JConf.getEV("topology", "db_host");
String db_user = JConf.getEV("topology", "db_user");
String db_passwd = JConf.getEV("topology", "db_passwd");
String db_name = JConf.getEV("topology", "db_name");
SpoutConfig kafkaConfig = new SpoutConfig(new ZkHosts(zklist), kfka_topic, "/bigdata/nginx", "nginxid");
kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
/**
* 一、创建 Builder
*/
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("kafkaspout", new KafkaSpout(kafkaConfig), 1);
builder.setBolt("extract", new ExtractBolt(),1).shuffleGrouping("kafkaspout");
//builder.setBolt("extract", new ExtractBolt(),2).shuffleGrouping("kafkaspout");
//builder.setBolt("statistics", new StatisticsBolt(),2).fieldsGrouping("extract", new Fields("date", "ip", "location", "status", "body_bytes_sent", "request", "http_referer", "http_user_agent", "http_x_forwarded_for"));
//builder.setBolt("writedb", new WritedbBolt(),10).fieldsGrouping("statistics",new Fields("date","location","status","request","stat_field","count") );
/**
* 二、创建 配置对象Config
*/
Config config = new Config();
config.put(Config.TOPOLOGY_TRIDENT_BATCH_EMIT_INTERVAL_MILLIS, 1000);
config.setMaxSpoutPending(10); //限制整个流程中tuple待处理的数量,(没有ack/failed)
//config.setDebug(true);
config.setDebug(false);
// put paramters
config.put("zklist", zklist);
config.put("kfka_topic",kfka_topic);
config.put("db_host", db_host);
config.put("db_user", db_user);
config.put("db_passwd", db_passwd);
config.put("db_name", db_name);
/**
* 三、创建 Topology
*/
StormTopology stormTopology = builder.createTopology();
/**
* 四、提交任务
*/
//String topologyname = "Wei-Bo";
String topologyname = JConf.getEV("topology", "name");
String os = System.getProperty("os.name");
System.out.println(os);
if (os.contains("Window")) { // 本地模式
config.setNumWorkers(1);
LocalCluster cluster = new LocalCluster();
cluster.submitTopology(topologyname, config, stormTopology);
Thread.sleep(Integer.MAX_VALUE);
System.exit(0);
} else { // 集群模式
config.setNumWorkers(1);
StormSubmitter.submitTopology(topologyname, config, stormTopology);
}
}
}
5.编译
6,运行结果