说明
最初应该有一步初始化日志库和日志表的操作 定义一个获取时间差的函数,将时间差转换为文本%H:%M:%S,其中%H是可以大于24的
import datetime
import time
import uuid
import pyspark. sql
from pyspark. sql import Row, SparkSession
from conf. configuration import ControlDBConf
class SpLogger :
def __init__ ( self, control_db_conf: ControlDBConf) :
self. datetime_fmt = "%Y-%m-%d %H:%M:%S"
self. __spark = SparkSession. builder. appName( "SpLogger" ) . getOrCreate( )
self. __jdbcUrl = "jdbc:sqlserver://{JdbcHostName}:{JdbcPort};database={JdbcDbName}" . format (
JdbcHostName= control_db_conf. JdbcHostName,
JdbcPort= control_db_conf. JdbcPort,
JdbcDbName= control_db_conf. JdbcDbName)
self. __ConnectionProperties = {
"user" : control_db_conf. UserName,
"password" : control_db_conf. Password,
"driver" : control_db_conf. Driver
}
self. __tb_name = control_db_conf. JdbcTbName
self. initial( )
def initial ( self) :
'''
create log database & table if not exists
'''
pass
def get_duration ( self, start_time: datetime, end_time: datetime) :
time_delta = end_time - start_time
seconds = time_delta. seconds + time_delta. days * 24 * 60 * 60
hours, remainder = divmod ( seconds, 3600 )
minutes, seconds = divmod ( remainder, 60 )
duration = "{:02}:{:02}:{:02}" . format ( hours, minutes, seconds)
return duration
def log_sqlcmd ( self, sqlcmd) :
uid = str ( uuid. uuid1( ) )
status = "succ"
errors = "None"
try :
start_time = datetime. datetime. now( )
time. sleep( 2 )
self. __spark. sql( sqlcmd)
except Exception as err:
status = "fail"
errors = err
finally :
finished_time = datetime. datetime. now( )
duration = self. get_duration( start_time= start_time, end_time= finished_time)
print (
f' { uid} ->: { sqlcmd[ : 10] } start from { start_time} to { finished_time} ,cost { duration} ,status is { status} ,detail error info is { errors} ' )
df_log = self. __spark. createDataFrame( [
Row( uid= uid,
sqlcmd= sqlcmd[ : 200 ] ,
start_time= datetime. datetime. strftime( start_time, self. datetime_fmt) ,
finished_time= datetime. datetime. strftime( finished_time, self. datetime_fmt) ,
duration= duration,
status= status,
errors= errors)
] )
self. write_log( df_log)
def write_log ( self, df: pyspark. sql. DataFrame) :
df. write. jdbc( url= self. __jdbcUrl, table= self. __tb_name, mode= "append" , properties= self. __ConnectionProperties)
if __name__ == '__main__' :
SL = SpLogger( control_db_conf= ControlDBConf)
for i in range ( 2 ) :
SL. log_sqlcmd( sqlcmd= "aaa" + str ( i) )
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
0d5dd096-e826-11ed-8022-145afc2ab96c ->: aaa0 start from 2023-05-01 21:42:44.900674 to 2023-05-01 21:42:46.904280,cost 00:00:02,status is succ,detail error info is None
0ff975f0-e826-11ed-812b-145afc2ab96c ->: aaa1 start from 2023-05-01 21:42:49.276158 to 2023-05-01 21:42:51.291866,cost 00:00:02,status is succ,detail error info is None