因为一次上线事故,导致kafka里的数据取出后被丢弃,于是写了一个直接消费kafka的脚本
python2.7+kafka0.8+zookeeper
直接上代码
from pykafka import KafkaClient
from pykafka.common import OffsetType
class NewK_Client(object):
def __init__(self, topic, host):
self.topic = topic
self.client = KafkaClient(hosts=host)
def HaddleConsum(self, zookeeper_connect):
topic = self.client.topics[self.topic]
consum = topic.get_balanced_consumer(
# set consumer_group
consumer_group='my_group',
auto_commit_enable=True,
zookeeper_connect=zookeeper_connect,
reset_offset_on_start=True,
# set offset
auto_offset_reset=OffsetType.EARLIEST,
)
return consum
class NewStoreEvent(object):
def __init__(self, event_type=(), start_time="", stop_time="", event_id=""):
self.kafka_addr = CONF.get("kafka", "kafka_addr")
self.kafka_topic = CONF.get("kafka", "kafka_topic")
self.kafka_zp = CONF.get("kafka", "kafka_zp")
def connection(self):
kc = NewK_Client(self.kafka_topic, self.kafka_addr)
consums = kc.HaddleConsum(self.kafka_zp)
return consums
def main(self):
consums = self.connection()
kfk_rds = kafka2redis()
while 1:
try:
if consums is not None:
for data in consums:
try:
print(data)
except Exception as e:
LOG.error(e, exc_info=True)
except Exception as e:
LOG.error(e, exc_info=True)
关键在于 auto_offset_reset=OffsetType.EARLIEST这行配置,实现了从zookper记录的第一条(未过期的)开始读取