Python+Mysql+Nginx做服务器日志分析。

本文介绍了一种使用Python解析Nginx访问日志的方法,包括如何提取IP地址、时间戳、请求URL等关键信息,并将这些数据存入MySQL数据库的过程。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

#!coding=utf-8
import re,time,struct,base64,linecache,glob,requests,json,pymysql,linecache


db = pymysql.connect("localhost","root","root","demo",charset="utf8")
cursor = db.cursor()


def parsetime(date, month, year, log_time):
    time_str = '%s%s%s %s' %(year, month, date, log_time)
    return time.strptime(time_str, '%Y%b%d %H:%M:%S')

def parserequest(rqst):
    param = r"?P<param>.*"
    p = re.compile(r"/report\?(%s)" %param, re.VERBOSE)
    return re.findall(p, rqst)


def geturlapi(apiurl,postvalue):

    data = {'ua': postvalue}
    r = requests.post(apiurl, data)
    return r

def readline(path):
    return linecache.getlines(path)


def phonedata(UserAgent):


    if UserAgent['status'] == 0 and len(UserAgent['data'])>0:

        if 'device' in UserAgent['data']:
            phone_model = UserAgent['data']['device']  # 手机型号
        else:
            phone_model =""

        if 'os' in UserAgent['data']:
            phone_os= UserAgent['data']['os']  # 手机系统
        else:
            phone_os =""

        if 'os_version' in UserAgent['data']:
            os_version = UserAgent['data']['os_version']  # 手机系统版本
        else:
            os_version =""

        if 'browser' in UserAgent['data']:
            browser = UserAgent['data']['browser']  # 手机浏览器
        else:
            browser =""

        if 'browser_version' in UserAgent['data']:
            browser_version = UserAgent['data']['browser_version']  # 手机浏览器版本
        else:
            browser_version =""

        arr={'phone_model':phone_model,'phone_os':phone_os+' '+os_version,'browser':browser+' '+browser_version}

    else:
        arr = {'phone_model': '', 'phone_os': '','browser': ''}

    return arr



def getLogin(logstr):

    ip = r"?P<ip>[\d.]*"
    date = r"?P<date>\d+"
    month = r"?P<month>\w+"
    year = r"?P<year>\d+"
    log_time = r"?P<time>\S+"
    method = r"?P<method>\S+"
    request = r"?P<request>\S+"
    status = r"?P<status>\d+"
    bodyBytesSent = r"?P<bodyBytesSent>\d+"
    refer = r"""?P<refer>
             [^\"]*
             """
    userAgent = r"""?P<userAgent>
                .*
               """

    reObject = re.compile(
        r"(%s)\ -\ -\ \[(%s)/(%s)/(%s)\:(%s)\ [\S]+\]\ \"(%s)?[\s]?(%s)?.*?\"\ (%s)\ (%s)\ \"(%s)\"\ \"(%s).*?\"" % (
        ip, date, month, year, log_time, method, request, status, bodyBytesSent, refer, userAgent), re.VERBOSE)

    p1 = reObject.findall(logstr)


    x=p1[0]

    ip = x[0]  # ip地址

    dataarray = parsetime(x[1], x[2], x[3], x[4])
    timestamp = int(time.mktime(dataarray))  # 时间戳
    year = x[3]  # 年
    month = x[2]  # 月
    day = x[1]  # 日
    methon = x[5]
    urlarray = x[6].split('?')
    url = urlarray[0]
    url_parameter = ''
    if len(urlarray)>1:
        url_parameter = urlarray[1]

    response_code = x[7]
    response_size = x[8]

    UserAgent = geturlapi('http://www.demo.com/xx/xx', x[10])#自己找User-Agent库或者自己分析, 
    UserAgent = UserAgent.json()


    UserAgentData=phonedata(UserAgent)

    phone_model = UserAgentData['phone_model']  # 手机型号
    phone_os = UserAgentData['phone_os']  # 手机系统
    browser = UserAgentData['browser'] # 浏览器

    sql = "INSERT INTO `nginxlog` (`logid`, `ip`, `timestamp`, `year`, `month`, `day`, `url`, `url_parameter`, `response_code`, `response_size`, `methon`, `phone_model`, `phone_os`, `browser`) VALUES (NULL, '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s')" % (
    ip, timestamp, year, month, day, url, url_parameter, response_code, response_size, x[5], phone_model, phone_os,
    browser)

    cursor.execute(sql)



#if __init__=='__main__':

i=9
while i<100000:
    w = linecache.getline('access.log', i)
    getLogin(w)
    if(i%100==0):
        linecache.clearcache()#每读取100条数据清除缓存空下内存,

    i+=1
    print(i)



db.close()



写点有点简陋,自己可以优化下。

转载于:https://my.oschina.net/jishuge/blog/877183

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值