4 分区与自定义分区器
代码地址:https://github.com/luslin1711/kafka_demo/tree/master/kafka_demo_04
ProducerRecord 对象 有多个构造方法, 常用的有
public ProducerRecord(String topic, Integer partition, K key, V value) {
this(topic, partition, null, key, value, null);
}
public ProducerRecord(String topic, K key, V value) {
this(topic, null, null, key, value, null);
}
public ProducerRecord(String topic, V value) {
this(topic, null, null, null, value, null);
}
如果partition是想发送的分区, 如果不传, 则看key是否传入。
private int partition(ProducerRecord<K, V> record, byte[] serializedKey, byte[] serializedValue, Cluster cluster) {
Integer partition = record.partition();
return partition != null ?
partition :
partitioner.partition(
record.topic(), record.key(), serializedKey, record.value(), serializedValue, cluster);
}
DefaultPartitioner.partition
public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster,
int numPartitions) {
if (keyBytes == null) {
return stickyPartitionCache.partition(topic, cluster);
}
// hash the keyBytes to choose a partition
return Utils.toPositive(Utils.murmur2(keyBytes)) % numPartitions;
}
所以, 拥有相同键的消息会被写入同一个分区。如果一个进程只从一个主题的分区读取数据,那么具有相同键对的所有记录都会被该进程读取。
二、 自定义分区器
比如, 将key为null 的record 发送到最后一个分区, 其他的键散列到其他分区
需要实现Partitioner接口
public interface Partitioner extends Configurable, Closeable {
/**
* Compute the partition for the given record.
*
* @param topic The topic name
* @param key The key to partition on (or null if no key)
* @param keyBytes The serialized key to partition on( or null if no key)
* @param value The value to partition on or null
* @param valueBytes The serialized value to partition on or null
* @param cluster The current cluster metadata
*/
public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster);
/**
* This is called when partitioner is closed.
*/
public void close();
/**
* Notifies the partitioner a new batch is about to be created. When using the sticky partitioner,
* this method can change the chosen sticky partition for the new batch.
* @param topic The topic name
* @param cluster The current cluster metadata
* @param prevPartition The partition previously selected for the record that triggered a new batch
*/
default public void onNewBatch(String topic, Cluster cluster, int prevPartition) {
}
}
例如 CustomizedPartitioner
import org.apache.kafka.clients.producer.Partitioner;
import org.apache.kafka.common.Cluster;
import org.apache.kafka.common.utils.Utils;
import java.util.Map;
public class CustomizedPartitioner implements Partitioner {
@Override
public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
Integer countForTopic = cluster.partitionCountForTopic(topic);
if (countForTopic == null || countForTopic == 1) {
return 0;
} else {
if (key == null) {
return countForTopic-1;
} else {
return Utils.toPositive(Utils.murmur2(keyBytes)) % (countForTopic - 1);
}
}
}
@Override
public void close() {}
@Override
public void configure(Map<String, ?> configs) {}
}
然后在Producer中定义这个类
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.clients.producer.RecordMetadata;
import org.apache.kafka.common.serialization.StringSerializer;
import java.util.Properties;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
public class Producer {
public static void main(String[] args) throws ExecutionException, InterruptedException {
Properties props = new Properties();
props.put("bootstrap.servers", "localhost:9092");
props.put("acks", "all");
props.put("enable.idempotence", "true");
props.put("retries", 5);
props.put("max.in.flight.requests.per.connection", 1);
props.put("partitioner.class", "com.luslin.demo.kakfa.producer.CustomizedPartitioner"); // 定义分区器
KafkaProducer<String, String> producer = new KafkaProducer<>(props, new StringSerializer(), new StringSerializer());
for (int i = 0; i < 10; i++) {
Future<RecordMetadata> recordMetadataFuture = producer.send(new ProducerRecord<String, String>("topic04", Integer.toString(i), Integer.toString(i)));
RecordMetadata recordMetadata = recordMetadataFuture.get();
System.out.println("offset: " + recordMetadata.offset() + ", partition: " + recordMetadata.partition());
}
for (int i = 0; i < 10; i++) {
Future<RecordMetadata> recordMetadataFuture = producer.send(new ProducerRecord<String, String>("topic04", null, Integer.toString(i)));
RecordMetadata recordMetadata = recordMetadataFuture.get();
System.out.println("key null -> offset: " + recordMetadata.offset() + ", partition: " + recordMetadata.partition());
}
producer.close();
}
}