目录结构:
KafkaWriter.java
说明:从本地文件读取数据写入kafka。
package cn.com.java.kafka;
import kafka.javaapi.producer.Producer;
import kafka.producer.KeyedMessage;
import kafka.producer.ProducerConfig;
import java.io.*;
import java.util.Properties;
import java.util.Random;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
public class KafkaWriter {
public static void main(String[] args) throws InterruptedException {
String kafkaServer = args[0];
String topicName = args[1];
int threadNum = Integer.parseInt(args[2]);
int interval = Integer.parseInt(args[3]);
File file = new File(args[4]);
Properties props = new Properties();
props.put("metadata.broker.list", kafkaServer);
props.put("request.required.acks", "0");
props.put("serializer.class", "kafka.serializer.StringEncoder");
ExecutorService executor = Executors.newFixedThreadPool(threadNum);
for (int i = 0; i < threadNum; i++) {
executor.submit(new SQLDataProducerThread(props, topicName, interval, file));
}
}
static class SQLDataProducerThread implements Runnable {
private String topic;
private Producer<String, String> producer;
private int interval;
private File file;
private String temp = null;
private BufferedReader reader=null;
public SQLDataProducerThread(Properties props, String topic, int interval, File file) {
this.topic = topic;
this.producer = new Producer(new ProducerConfig(props));
this.interval = interval;
this.file = file;
try {
reader = new BufferedReader(new FileReader(file));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
@Override
public void run() {
int CountN = 0;
try {
while ((temp=reader.readLine())!=null) {
CountN += 1;
// System.out.println(CountN + " | " + temp);
String[] all = temp.split("=");
String key = "";
String value = "";
if (all.length != 1) {
key = all[0];
value = all[1];
} else {
Random random = new Random();
key = String.valueOf(random.nextInt(100));
value = all[0];
}
KeyedMessage<String, String> data = new KeyedMessage<String, String>(topic, key,
value
);
producer.send(data);
Thread.sleep(interval);
}
if (CountN >= 100000){
Thread.sleep(20000);
} else if (CountN >=10000) {
Thread.sleep(10000);
} else if (CountN < 10000) {
Thread.sleep(5000);
}
reader.close();
// System.out.println("import success");
} catch (IOException e) {
e.printStackTrace();
reader.close();
producer.close();
} catch (InterruptedException e) {
e.printStackTrace();
reader.close();
producer.close();
} finally {
// System.out.println("shutdown producer");
producer.close();
System.exit(0);
return;
}
}
}
}
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>cn.com.java.kafka</groupId>
<artifactId>genData</artifactId>
<version>1.0.0</version>
<name>genData</name>
<!-- FIXME change it to the project's website -->
<url>http://www.example.com</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.7</maven.compiler.source>
<maven.compiler.target>1.7</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.0</version>
<!--<scope>provided</scope>-->
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.11</artifactId>
<version>0.9.0.1</version>
</dependency>
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<version>2.6</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>1.4</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<transformers>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<!--<mainClass>com.jetflow.streaming.LatencyAndThroughputTest</mainClass>-->
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
配置maven
- 本地maven配置settings.xml,增加本地源
<mirror>
<id>mymaven.org</id>
<name> Mirror of maven</name>
<url>https://mynetwork.com/artifactory/public-maven-virtual/</url>
<mirrorOf>central</mirrorOf>
</mirror>
- IDE配置maven
- 下载项目依赖
#!/usr/bin/env bash
#Jar参数:<TOPIC> <线程数:默认1> <时间间隔> <FilePath>
#trap
KILLK(){
echo "Stop KafkaWriter..."
[ $(ps -ef|grep KafkaWriter|grep -v grep|wc -l) -ge 1 ] && ps -ef|grep KafkaWriter|grep -v grep|awk '{print $2}'|xargs kill -9
}
trap '{ echo "exit trap...."; KILLK; exit 99; }' INT
#INFO
DIR=$(cd `dirname $0`;pwd)
KAFKA_MASTER=Node1
KAFKA_HOME=/usr/local/kafka
JAVA_HOME=/home/java
ZOOKEEPER_HOSTS=Node1:2181,Node2:2181
BOOTSTRAP_SERVER=Node1:9092,Node2:9092
#TOPIC是否存在
TOPICCOUNT=$(ssh ${KAFKA_MASTER} "${KAFKA_HOME}/bin/kafka-topics.sh --zookeeper ${ZOOKEEPER_HOSTS} --list|grep $1|wc -l")
if [ ${TOPICCOUNT} -eq 0 ];then
echo ">> Create TOPIC..."
ssh ${KAFKA_MASTER} "$KAFKA_HOME/bin/kafka-topics.sh --create --zookeeper ${ZOOKEEPER_HOSTS} --topic $1 --replication-factor 1 --partitions 1"
fi
echo ">> start producer..."
$JAVA_HOME/bin/java -cp $DIR/genData-1.0.0.jar cn.com.java.KafkaWriter $BOOTSTRAP_SERVER $1 1 $2 $3
使用当时:
bash ${DIR}/kwriter.sh ${TOPIC} 0 ${DataFile}