进入pychram终端:
pip install user-agent pyyaml ua-parser
完整代码:
from pathlib import Path
import datetime
import re
from user_agents import parse
from queue import Queue
import threading
from collections import defaultdict
#192.168.56.1 - - [18/Mar/2019:10:55:04 +0800] "POST /zabbix/jsrpc.php?output=json-rpc HTTP/1.1" 200 64 "-" "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36"
#日志格式处理(数据清洗)
def extract(line:str)->dict:
# print(line)
pattern = '''(?P<remote>[\d\.]{7,}) - - \[(?P<datetime>[^\[\]]+)\] \"(?P<request>[^\"]+)\" (?P<status>\d+) (?P<size>\d+) \"(?P<reference>[^\"]+)\" \"(?P<useragent>[^\"]+)\"'''
regex = re.compile(pattern)
matcher = regex.match(line)
if matcher:
return {k:ops.get(k,lambda x:x)(v) for k,v in matcher.groupdict().items()}
def convers_time(src:str):
ret= datetime.datetime.strptime(