Kafka0.8和kafka0.7变化非常大,使用方式和接口class都变掉了。这里简单介绍一下0.7的kafka如何写入数据。
因为刚好工作中用的是0.7的kafka,所以必须要使用0.7的接口往kafka写数据,这是一个老版本的程序,所以意义不大,这是作为参考。
我使用的是maven管理java项目。
首先maven的dependency是这样的:
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.x</groupId>
<artifactId>KafkaWriter</artifactId>
<version>0.0.1-SNAPSHOT</version>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<hadoop.version>0.20.2-cdh3u6</hadoop.version>
</properties>
<dependencies>
<!-- Apache Kafka -->
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-compiler</artifactId>
<version>2.8.0</version>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>2.8.0</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka</artifactId>
<version>0.7.2</version>
<exclusions>
<exclusion>
<artifactId>jmxri</artifactId>
<groupId>com.sun.jmx</groupId>
</exclusion>
<exclusion>
<artifactId>jms</artifactId>
<groupId>javax.jms</groupId>
</exclusion>
<exclusion>
<artifactId>jmxtools</artifactId>
<groupId>com.sun.jdmk</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.101tec</groupId>
<artifactId>zkclient</artifactId>
<version>0.4</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.3</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.9</version>
</dependency>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
<version>1.2</version>
</dependency>
<!-- Unit Test -->
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
<version>6.1.1</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<finalName>${project.artifactId}</finalName>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.8.1</version>
<configuration>
<skipTests>true</skipTests>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.0</version>
<configuration>
<source>1.6</source>
<target>1.6</target>
<encoding>UTF-8</encoding>
<compilerArguments>
<extdirs>src${file.separator}main${file.separator}lib</extdirs>
</compilerArguments>
</configuration>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>config</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
写kafka只有一个简单的java class,通过命令行往kafka写数据,支持两种模式,一种是文件读取,一种是标准输入输出模式。可以根据这个程序,写出符合自己需求的程序。
package com.x;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import kafka.javaapi.producer.ProducerData;
import kafka.producer.ProducerConfig;
import kafka.javaapi.producer.Producer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Properties;
import org.apache.commons.cli.BasicParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
/**
* Flume sink that propogates data to Kafka cluster
*
*/
public class KafkaSimpleWriter {
private static final Logger Logger = LoggerFactory.getLogger(KafkaSimpleWriter.class);
// private static final String KAFKA_TOPIC = "kafka.topic";
// private static final String KAFKA_BROKERS = "kafka.brokers";
// private static final String KAFKA_BUNDLE_SIZE = "kafka.bundle.size";
// private static final String KAFKA_BUNDLE_INTERVAL_MS = "kafka.bundle.interval.ms";
// private static final String KAFKA_MESSAGE_SIZE = "kafka.message.size";
// private static final String KAFKA_HEARTBEAT_MS = "kafka.heartbeat.ms";
private String topic;
private String zkString;
private Producer<String, String> producer;
private ProducerConfig config;
private long totalCnt = 0;
public KafkaSimpleWriter(String file_path, String topic, String zkString) throws IOException {
this.topic = topic;
this.zkString = zkString;
Properties properties = new Properties();
if(file_path != null && file_path.isEmpty()== false ){
properties.load(Thread.currentThread().getContextClassLoader().getResourceAsStream(file_path));
}
SetDefaultProperty(properties, "serializer.class", "kafka.serializer.StringEncoder");
SetDefaultProperty(properties, "zk.connect", this.zkString);
SetDefaultProperty(properties, "zk.connectiontimeout.ms", "65000");
SetDefaultProperty(properties, "producer.type", "async");
SetDefaultProperty(properties, "queue.time", "10000");
SetDefaultProperty(properties, "queue.size", "10000");
this.config = new ProducerConfig(properties);
this.producer = new Producer<String, String>(this.config);
}
private void SetDefaultProperty(Properties prop, String key, String defaultValue){
if(prop.getProperty(key) == null){
prop.setProperty(key, defaultValue);
}
}
public void sendMessage(String message){
this.producer.send(new ProducerData<String, String>(topic, message));
totalCnt++;
if(totalCnt%1000000 == 0){
Logger.info("Total send messages:{} to topic:{}", totalCnt, this.topic);
}
}
public long getCount(){ return this.totalCnt; }
static public void main(String[] args) throws IOException, ParseException{
String topic = null;
String zkString = null;
String config_path = null;
String data_path = null;
Options opts = new Options();
opts.addOption("h", false, "help message");
opts.addOption("data", true, "Data file path");
opts.addOption("config", true, "config file path");
opts.addOption("topic", true, "kafka topic");
opts.addOption("zookeeper", true, "zookeeper list");
BasicParser parser = new BasicParser();
CommandLine cl = parser.parse(opts, args);
if (cl.hasOption('h') || cl.hasOption("topic") == false
|| cl.hasOption("zookeeper") == false ) {
HelpFormatter hf = new HelpFormatter();
hf.printHelp("OptionsTip", opts);
return ;
} else {
topic = cl.getOptionValue("topic");
zkString = cl.getOptionValue("zookeeper");
config_path = cl.getOptionValue("config", null);
data_path = cl.getOptionValue("data", null);
}
Logger.info("topic={}, zookeeper={}, config={}, data={}", topic, zkString, config_path, data_path);
KafkaSimpleWriter writer = new KafkaSimpleWriter(config_path, topic, zkString);
if(data_path == null || data_path.isEmpty()){
BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
String s = null;
try {
while((s = in.readLine()) != null){
writer.sendMessage(s);
}
} catch(IOException e) {
e.printStackTrace();
} finally {
in.close();
}
}else{
BufferedReader in = new BufferedReader(new FileReader(data_path));
try{
String s = null;
while((s = in.readLine()) != null){
writer.sendMessage(s);
}
}catch(IOException e){
e.printStackTrace();
}finally{
in.close();
}
}
System.out.println("Successfully write records:" + writer.getCount());
}
}
编译很简单:
mvn clean package
在target目录下,会得到一个jar包。
运行也很简单:
java -cp $your_jar_file com.x.KafkaSimpleWriter -zookeeper localhost:2181 -topic test -data data.txt
读kafka也是一个简单的java class,通过命令行从kafka读数据,打印到标准输出模式。可以根据这个程序,写出符合自己需求的程序。
package com.x;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.apache.commons.cli.BasicParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import kafka.consumer.Consumer;
import kafka.consumer.ConsumerConfig;
import kafka.consumer.KafkaStream;
import kafka.javaapi.consumer.ConsumerConnector;
import kafka.message.Message;
import kafka.message.MessageAndMetadata;
public class KafkaSimpleReader {
private static final Logger Logger = LoggerFactory
.getLogger(KafkaSimpleWriter.class);
// private static final String KAFKA_TOPIC = "kafka.topic";
// private static final String KAFKA_BROKERS = "kafka.brokers";
// private static final String KAFKA_BUNDLE_SIZE = "kafka.bundle.size";
// private static final String KAFKA_BUNDLE_INTERVAL_MS =
// "kafka.bundle.interval.ms";
// private static final String KAFKA_MESSAGE_SIZE = "kafka.message.size";
// private static final String KAFKA_HEARTBEAT_MS = "kafka.heartbeat.ms";
private String topic;
private String zkString;
private ConsumerConfig consumerConfig;
private ConsumerConnector consumerConnector;
KafkaStream<Message> stream;
private long totalCnt = 0;
public KafkaSimpleReader(String file_path, String topic, String zkString)
throws IOException {
this.topic = topic;
this.zkString = zkString;
Properties properties = new Properties();
if (file_path != null && file_path.isEmpty() == false) {
properties.load(Thread.currentThread().getContextClassLoader()
.getResourceAsStream(file_path));
}
SetDefaultProperty(properties, "zk.connect", this.zkString);
SetDefaultProperty(properties, "zk.connectiontimeout.ms", "30000");
SetDefaultProperty(properties, "groupid", "commonreader");
System.out.println(properties.toString());
this.consumerConfig = new ConsumerConfig(properties);
this.consumerConnector = Consumer
.createJavaConsumerConnector(consumerConfig);
HashMap<String, Integer> map = new HashMap<String, Integer>();
map.put(topic, 1);
Map<String, List<KafkaStream<Message>>> topicMessageStreams = consumerConnector
.createMessageStreams(map);
stream = topicMessageStreams.get(topic).get(0);
}
private void SetDefaultProperty(Properties prop, String key,
String defaultValue) {
if (prop.getProperty(key) == null) {
prop.setProperty(key, defaultValue);
}
}
public void readMessage() {
for (MessageAndMetadata<Message> msgAndMetadata : stream) {
System.out.println("topic: " + msgAndMetadata.topic());
Message message = (Message) msgAndMetadata.message();
ByteBuffer buffer = message.payload();
byte[] bytes = buffer.array();
String tmp = new String(bytes);
System.out.println("message content: " + tmp);
totalCnt++;
if (totalCnt % 1000000 == 0) {
Logger.info("Total send messages:{} to topic:{}", totalCnt,
this.topic);
}
}
}
public void close() {
}
public long getCount() {
return this.totalCnt;
}
public static void main(String[] args) throws IOException, ParseException{
String topic = null;
String zkString = null;
String config_path = null;
Options opts = new Options();
opts.addOption("h", false, "help message");
opts.addOption("config", true, "config file path");
opts.addOption("topic", true, "kafka topic");
opts.addOption("zookeeper", true, "zookeeper list");
BasicParser parser = new BasicParser();
CommandLine cl = parser.parse(opts, args);
if (cl.hasOption('h') || cl.hasOption("topic") == false
|| cl.hasOption("zookeeper") == false ) {
HelpFormatter hf = new HelpFormatter();
hf.printHelp("OptionsTip", opts);
return ;
} else {
topic = cl.getOptionValue("topic");
zkString = cl.getOptionValue("zookeeper");
config_path = cl.getOptionValue("config", null);
}
Logger.info("topic={}, zookeeper={}, config={}, data={}", topic, zkString, config_path);
KafkaSimpleReader reader = new KafkaSimpleReader(config_path, topic, zkString);
try{
reader.readMessage();
}catch (Exception e){
e.printStackTrace();
}
System.out.println("Successfully read records:" + reader.getCount());
}
}
编译很简单:
mvn clean package
在target目录下,会得到一个jar包。
运行也很简单:
java -cp $your_jar_file com.x.KafkaSimpleReader -zookeeper localhost:2181 -topic test