从kafka读取数据
import time
import json
import queue
import threading
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from kafka import KafkaConsumer,TopicPartition
import pymysql
import re
import logging
import os
from logging.handlers import TimedRotatingFileHandler
logger = logging.getLogger()
logger.setLevel(logging.INFO)
log_dir = './logs'
os.makedirs(log_dir, exist_ok=True)
handler = TimedRotatingFileHandler('logs/app.log', when='midnight', interval=1, backupCount=7)
handler.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
KAFKA_SERVER = "xxxx:9092"
KAFKA_TOPIC = "topic_log"
DB_HOST = "xxxx"
DB_USER = "root"
DB_PASSWORD = "000000"
DB_NAME = "maxwall"
SMTP_SERVER = "smtp.gmail.com"
SMTP_PORT = 587
SENDER_EMAIL = "xxx"
SENDER_PASSWORD = "xxx"
RECEIVER_EMAIL = "xxx"
BATCH_SIZE = 1000
TIMEOUT = 300
data_queue = queue.Queue()
def get_db_connection():
return pymysql.connect(
host=DB_HOST,
user=DB_USER,
password=DB_PASSWORD,
database=DB_NAME,
charset='utf8mb4'
)
def send_email(subject, body):
msg = MIMEMultipart()
msg['From'] = SENDER_EMAIL
msg['To'] = RECEIVER_EMAIL
msg['Subject'] = subject
msg.attach(MIMEText(body, 'plain'))
try:
server = smtplib.SMTP(SMTP_SERVER, SMTP_PORT)
server.starttls()
server.login(SENDER_EMAIL, SENDER_PASSWORD)
text = msg.as_string()
server.sendmail(SENDER_EMAIL, RECEIVER_EMAIL, text)
server.quit()
print(f"Email sent to {RECEIVER_EMAIL}")
except Exception as e:
print(f"Error sending email: {e}")
def consume_from_kafka():
consumer = KafkaConsumer(
KAFKA_TOPIC,
bootstrap_servers=[KAFKA_SERVER],
group_id="log-consumer-group",
value_deserializer=lambda x: json.loads(x.decode('utf-8'))
)
for message in consumer:
data = message.value
data_queue.put(data)
tp = TopicPartition(message.topic, message.partition)
current_offset = consumer.position(tp)
end_offsets = consumer.end_offsets([tp])
latest_offset = end_offsets[tp]
progress = (current_offset / latest_offset) * 100 if latest_offset > 0 else 0
logger.info(f"Partition: {message.partition},number:{current_offset}/{latest_offset} progress: {progress:.2f}% ")
def validate_data(data):
if 'field1' in data and isinstance(data['field1'], (int, float)) and data['field1'] > 0:
return True
return False
def batch_insert_to_db():
while True:
if data_queue.qsize() >= BATCH_SIZE:
process_batch()
else:
time.sleep(TIMEOUT)
if data_queue.qsize() > 0:
process_batch()
def process_batch():
print("Processing batch...")
data_batch = []
while not data_queue.empty() and len(data_batch) < BATCH_SIZE:
data_batch.append(data_queue.get())
try:
connection = get_db_connection()
cursor = connection.cursor()
for data in data_batch:
sql = """
INSERT INTO log_table (field1, field2, field3)
VALUES (%s, %s, %s)
"""
cursor.execute(sql, (data['field1'], data['field2'], data['field3']))
connection.commit()
print(f"Inserted {len(data_batch)} records into the database.")
except Exception as e:
print(f"Error inserting data: {e}")
finally:
cursor.close()
connection.close()
def should_send_email(data):
if 'field1' in data and data['field1'] > 100:
return True
return False
def start_kafka_consumer():
consumer_thread = threading.Thread(target=consume_from_kafka)
consumer_thread.daemon = True
consumer_thread.start()
def start_batch_processor():
processor_thread = threading.Thread(target=batch_insert_to_db)
processor_thread.daemon = True
processor_thread.start()
def is_error_log(log: str) -> bool:
'''检测nginx error.log里面错误的日志'''
pattern = r'\[\s*(error|warn|crit|alert)\s*\]'
if re.search(pattern, log, re.IGNORECASE):
return True
return False
def is_laravel_error_log(log: str) -> bool:
''' 检测laravel日志是否是error的'''
pattern = r'\.ERROR:'
if re.search(pattern, log, re.IGNORECASE):
return True
return False
'''
django日志拼接规则:
filebeat.inputs:
- type: log
paths:
- /path/to/your/django/logs/*.log # 修改为你的 Django 日志路径
multiline.pattern: '^Internal Server Error:' # 正则匹配日志开头
multiline.negate: true # 否定匹配,表示接下来的行将会与前面一行拼接
multiline.match: after # 如果接下来有匹配的内容,则拼接到前面的日志行之后
'''
if __name__ == "__main__":
start_kafka_consumer()
while True:
time.sleep(5*60)