短时间内,对业务的DML分析

本文介绍了一种使用Python分析MySQL二进制日志(Binlog)的方法,通过解析Binlog记录并将其存储到数据库中进行进一步分析。文章详细描述了如何创建用于存储Binlog分析结果的数据表,以及如何解析Binlog中的不同操作类型(如INSERT、UPDATE、DELETE),并统计每次事务涉及的行数。此外,还提供了分析事务成本时间分布和事务中变更行数分布的方法。
#coding:utf-8
#!/usr/bin/python3
import pymysql
from pymysql.cursors import DictCursor
import re
import os
import sys
import datetime
import time
import getpass
import logging
import importlib
importlib.reload(logging)
logging.basicConfig(level=logging.DEBUG,format='%(asctime)s %(levelname)s %(message)s ')

usage=''' usage: python [script's path] [option]
ALL options need to assign:

-h     : host, the database host,which database will store the results after analysis 
-u     : user, the db user
-p     : password, the db user's password
-P     : port, the db port
-f     : file path, the binlog file
-tr    : table name for record , the table name to store the row record
-tt    : table name for transaction, the table name to store transactions
Example: python queryanalyse.py -h=127.0.0.1 -P=3310 -u=root -p=password -f=/tmp/stock_binlog.log -tt=flashback.tbtran -tr=flashback.tbrow

'''
global host
global user
global port
global password
global tbrow
global tbtran

user = input("please write your db_user :")
host = getpass.getpass("please write your db_host :")
port = user = input("please write your db_user :")("please write your db_port :")
#fpath = input("please write your binlogfile_address :")
password = getpass.getpass("please write your db_password :")
tbrow = input("please write your db_tb_row_record (eq:db_name.tb_name):")
tbtran = input("please write your db_tbtran (eq:db_name.tb_name):")
#####--------------------------------------------------------------------------------------------------------------------------------------
#def bowen():


#####--------------------------------------------------------------------------------------------------------------------------------------

def _get_db():
	if len(sys.argv) == 1:
		print(usage)
		sys.exit(1)
	elif sys.argv[1] == '--help':
		print(usage)
		sys.exit()
	elif len(sys.argv) > 2:
		#print (2222222222222)
		for i in sys.argv[1:]:
			_argv = i.split('=')
			if _argv[0] == '-h':
				host = _argv[1]
			elif _argv[0] == '-h':
				user = _argv[1]
			elif _argv[0] == '-u':
				port = int(_argv[1])
			elif _argv[0] == '-p':
				password = _argv[1]
			elif _argv[0] == '-P':
				port = _argv[1]
			elif _argv[0] == '-tr':
				tbrow = _argv[1]
			elif _argv[0] == '-tt':
				tbtran = _argv[1]
			else:
				print(usage)
#####--------------------------------------------------------------------------------------------------------------------------------------				
#def create_tab():
sql_01 = """CREATE TABLE IF NOT EXISTS  {} (
	auto_id int(10) unsigned NOT NULL AUTO_INCREMENT,
	begin_time datetime NOT NULL,
	end_time datetime NOT NULL,
	PRIMARY KEY (auto_id)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;""".format(tbtran)
sql_02 = """
CREATE TABLE IF NOT EXISTS {} (
	auto_id int(10) unsigned NOT NULL AUTO_INCREMENT,
	sqltype int(11) NOT NULL COMMENT '1 is insert,2 is update,3 is delete',
	tran_num int(11) NOT NULL COMMENT 'the transaction number',
	dbname varchar(50) NOT NULL,
	tbname varchar(50) NOT NULL,
	PRIMARY KEY (auto_id),
	KEY sqltype (sqltype),
	KEY dbname (dbname),
	KEY tbname (tbname)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;""".format(tbrow)
sql_03 = """ truncate table {}	""".format(tbtran)
sql_04 = """ truncate table {}	""".format(tbrow)
conn = pymysql.connect(host=host,user=user,passwd=password,port=int(port))
cursor = conn.cursor()	
logging.info("Now,creating table ...")
cursor.execute(sql_01)
cursor.execute(sql_02)
cursor.execute(sql_03)
cursor.execute(sql_04)
logging.info('created table {} and {}'.format(tbtran,tbrow))
conn.commit()
#cursor.close()
#####--------------------------------------------------------------------------------------------------------------------------------------	
#def rowrecord():
tran_num=1
record_sql=''
tran_sql=''
log = '/tmp/binlog_test.log'
binlog_file = os.popen('cat /tmp/binlog_test.log').readlines()
for bline in binlog_file:
	if bline.find('Table_map:') != -1:
		l = bline.index('server')
		n = bline.index('Table_map')
		begin_time = bline[:l:].rstrip(' ').replace('#', '20')
		if record_sql=='':
			bowen_begin_time = begin_time[0:4] + '-' + begin_time[4:6] + '-' + begin_time[6:]
		
		db_name = bline[n::].split(' ')[1].replace('`', '').split('.')[0]
		tb_name = bline[n::].split(' ')[1].replace('`', '').split('.')[1]
		bline=''
	elif bline.find('Xid =') != -1:
		l = bline.index('server')	
		end_time = bline[:l:].rstrip(' ').replace('#', '20')
		bowen_end_time = end_time[0:4] + '-' + end_time[4:6] + '-' + end_time[6:]
		#print(bowen_end_time,'well')
		tran_sql=record_sql+"insert into {}(begin_time,end_time) VALUES ('{}','{}')".format(tbtran,bowen_begin_time,bowen_end_time)
#		conn = pymysql.connect(host=host,user=user,passwd=password,port=int(port))
#		cursor = conn.cursor()			
		cursor.execute(tran_sql)
		conn.commit()
		record_sql = ''
		tran_num += 1
	elif bline.startswith('### INSERT INTO'):
		sql_05 = "insert into {}(sqltype,tran_num,dbname,tbname) VALUES (1,{},'{}','{}');".format(tbrow,tran_num,db_name,tb_name)
#			conn = pymysql.connect(host=host,user=user,passwd=password,port=int(port))
#			cursor = conn.cursor()
		cursor.execute(sql_05)		
		conn.commit()		
	elif bline.startswith('### UPDATE'):
		sql_06 = record_sql+"insert into {}(sqltype,tran_num,dbname,tbname) VALUES (2,{},'{}','{}');".format(tbrow,tran_num,db_name,tb_name)
		cursor.execute(sql_06)
		conn.commit()
	elif bline.startswith('### DELETE FROM'):
		sql_07 = record_sql+"insert into {}(sqltype,tran_num,dbname,tbname) VALUES (3,{},'{}','{}');".format(tbrow,tran_num,db_name,tb_name)
		cursor.execute(sql_07)
		conn.commit()
#		cursor.close()
#####--------------------------------------------------------------------------------------------------------------------------------------
#def binlogdesc():
sql=''	
t_num=0
r_num=0
logging.info('Analysed result printing...\n')
sql_08 = "select 'tbtran' name,count(*) nums from {}  union all select 'tbrow' name,count(*) nums from {};".format(tbtran,tbrow)
cursor.execute(sql_08)
rows=cursor.fetchall()
for row in rows:
	if row['name']=='tbtran':
		t_num = row['nums']
	else:
		r_num = row['nums']
print('This binlog file has {} transactions, {} rows are changed '.format(t_num,r_num))

sql_09 = '''select 
				count(case when cost_sec between 0 and 1 then 1 end ) cos_1,
				count(case when cost_sec between 1.1 and 5 then 1 end ) cos_5,
				count(case when cost_sec between 5.1 and 10 then 1 end ) cos_10,
				count(case when cost_sec between 10.1 and 30 then 1 end ) cos_30,
				count(case when cost_sec >30.1 then 1 end ) cos_more,
				max(cost_sec) cos_max
		from 
		(
				select 
					auto_id,timestampdiff(second,begin_time,end_time) cost_sec
				from {}
		) a;'''.format(tbtran)
cursor.execute(sql_09)
rows=cursor.fetchall()

for row in rows:
	print('The most cost time : {} '.format(row['cos_max']))
	print('The distribution map of each transaction costed time: ')
	print('Cost time between    0 and  1 second : {} , {}%'.format(row['cos_1'],int(row['cos_1']*100/t_num)))
	print('Cost time between  1.1 and  5 second : {} , {}%'.format(row['cos_5'], int(row['cos_5'] * 100 / t_num)))
	print('Cost time between  5.1 and 10 second : {} , {}%'.format(row['cos_10'], int(row['cos_10'] * 100 / t_num)))
	print('Cost time between 10.1 and 30 second : {} , {}%'.format(row['cos_30'], int(row['cos_30'] * 100 / t_num)))
	print('Cost time                     > 30.1 : {} , {}%\n'.format(row['cos_more'], int(row['cos_more'] * 100 / t_num)))
	
sql_10='''select 
			count(case when nums between 0 and 10 then 1 end ) row_1,
			count(case when nums between 11 and 100 then 1 end ) row_2,
			count(case when nums between 101 and 1000 then 1 end ) row_3,
			count(case when nums between 1001 and 10000 then 1 end ) row_4,
			count(case when nums >10001 then 1 end ) row_5,
			max(nums) row_max
		from 
		(
			select 
				count(*) nums
			from {} group by tran_num
		) a;'''.format(tbrow)
cursor.execute(sql_10)
rows=cursor.fetchall()

for row in rows:
	print('The most changed rows for each row: {} '.format(row['row_max']))
	print('The distribution map of each transaction changed rows : ')
	print('Changed rows between    1 and    10 second : {} , {}%'.format(row['row_1'],int(row['row_1']*100/t_num)))
	print('Changed rows between   11 and   100 second : {} , {}%'.format(row['row_2'], int(row['row_2'] * 100 / t_num)))
	print('Changed rows between  101 and  1000 second : {} , {}%'.format(row['row_3'], int(row['row_3'] * 100 / t_num)))
	print('Changed rows between 1001 and 10000 second : {} , {}%'.format(row['row_4'], int(row['row_4'] * 100 / t_num)))
	print('Changed rows                       > 10001 : {} , {}%\n'.format(row['row_5'], int(row['row_5'] * 100 / t_num)))
sql_11='select sqltype ,count(*) nums from {} group by sqltype ;'.format(tbrow)
cursor.execute(sql_11)
rows=cursor.fetchall()

print('The distribution map of the {} changed rows : '.format(r_num))
for row in rows:
	if row['sqltype']==1:
		print('INSERT rows :{} , {}% '.format(row['nums'],int(row['nums']*100/r_num)))
	if row['sqltype']==2:
		print('UPDATE rows :{} , {}% '.format(row['nums'],int(row['nums']*100/r_num)))
	if row['sqltype']==3:
		print('DELETE rows :{} , {}%\n '.format(row['nums'],int(row['nums']*100/r_num)))

sql_12 = '''select 
			dbname,tbname ,
			count(*) ALL_rows,
			count(*)*100/{} per,
			count(case when sqltype=1 then 1 end) INSERT_rows,
			count(case when sqltype=2 then 1 end) UPDATE_rows,
			count(case when sqltype=3 then 1 end) DELETE_rows
		from {} 
		group by dbname,tbname 
		order by ALL_rows desc 
		limit 10;'''.format(r_num,tbrow)
cursor.execute(sql_12)
rows = cursor.fetchall()

print('The distribution map of the {} changed rows : '.format(r_num))
print('tablename'.ljust(50),
	'|','changed_rows'.center(15),
	'|','percent'.center(10),
	'|','insert_rows'.center(18),
	'|','update_rows'.center(18),
	'|','delete_rows'.center(18)
	)
print('---------------------------------------------------------------------------------------------------------------------------')
for row in rows:
	print((row['dbname']+'.'+row['tbname']).ljust(50),
		'|',str(row['ALL_rows']).rjust(15),
		'|',(str(int(row['per']))+'%').rjust(10),
		'|',str(row['INSERT_rows']).rjust(10)+' , '+(str(int(row['INSERT_rows']*100/row['ALL_rows']))+'%').ljust(5),
		'|',str(row['UPDATE_rows']).rjust(10)+' , '+(str(int(row['UPDATE_rows']*100/row['ALL_rows']))+'%').ljust(5),
		'|',str(row['DELETE_rows']).rjust(10)+' , '+(str(int(row['DELETE_rows']*100/row['ALL_rows']))+'%').ljust(5),
		)
print('\n')

logging.info('Finished to analyse the binlog file !!!')
cursor.close()
#####--------------------------------------------------------------------------------------------------------------------------------------




注意:

1、该脚本适合短时间内的业务分析
2、根据binlog截取的大小会影响脚本的执行时间
# ----------------------------------------
# 小玩意,建议在自己的环境跑
# ----------------------------------------

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值