Shell:把某个日志目录下3天以前的文件(以日期开头的.log、.err文件)以日期为单位打包并压缩,并将压缩后的文件存放到上级目录下面的backup路径下。

本文介绍了一种使用Shell脚本来实现日志文件自动备份的方法。该脚本能够筛选指定目录下的.log和.err文件,按文件名中的日期进行归档,并压缩成.zip文件,最后清理原始文件夹中的归档文件夹。
这里是我同学的某次面试题,下面是我的解法
path="/home/web/yy/" # 固定日志文件路径
f=`ls $path*.log $path*.err -1 -c` # 拿到路径底下的.log和.err文件
mkdir backup # 创建 backup 文件夹
cd backup # 进入 backup 文件夹
d2=`date -d "3 days ago" +%Y%m%d` # 获取3天前的日期
for name in ${f};do # 循环刚才拿到的那些日志文件
	name=`basename $name`  # 由于拿到的文件是全路径,所以截取文件全名即可 /home/wen/yy/20210722.log ==> 20210722.log
    if expr "${name%.*} " "<" "$d2" >/dev/null;then # 如果文件名时间小于三天前的时间
	    if [ ! -d "${name%.*}" ]; then # 如果还没有这个日期的文件夹
		   mkdir ${name%.*} # 创建日期文件夹
	    fi
	    cp -fa $path$name "${name%.*}"  # 把当日的日志文件,全部都塞进日期文件夹底下,注意:这里已经是backup 文件夹下了,循环之前已经进入了
    fi
done # 塞文件结束

f2=`ls -1 -c` # 获取到backup文件夹下的全部日期文件夹
for name in ${f2};do # 循环
	echo ${name}
	zip -r ${name}.zip ${name} # 把日期文件夹打包成zip文件 
	rm -rf ${name} # 把日期文件夹删除掉,只留下zip文件
done

exit 0 # 退出

import os import re import subprocess import mysql.connector from pyhive import hive from datetime import datetime, timedelta import logging from concurrent.futures import ThreadPoolExecutor # 配置日志 logging.basicConfig(level=logging.INFO, format=&#39;%(asctime)s - %(levelname)s - %(message)s&#39;) logger = logging.getLogger(__name__) # 配置信息 MYSQL_CONFIG = { &#39;host&#39;: &#39;10.14.101.113&#39;, &#39;port&#39;: 30001, &#39;user&#39;: &#39;grafana&#39;, &#39;password&#39;: &#39;&#39;, &#39;database&#39;: &#39;apex_log&#39;, &#39;charset&#39;: &#39;utf8&#39; } HIVE_CONFIG = { &#39;host&#39;: &#39;10.14.50.142&#39;, &#39;port&#39;: 7001, &#39;username&#39;: &#39;hive&#39;, &#39;database&#39;: &#39;dim_game_wd&#39; } LOG_BASE_PATH = &#39;log&#39; # 日志目录 TEMP_FILE_PATH = &#39;log_tmp&#39; # 临时文件存放目录 def create_temp_dir(): """创建临时目录""" os.makedirs(TEMP_FILE_PATH, exist_ok=True) def get_previous_date(): """获取日期""" prev_day = datetime.now() - timedelta(days=8) return prev_day.strftime("%Y%m%d"), prev_day.strftime("%Y-%m-%d") def fetch_filter_contents(): """从MySQL获取过滤内容生成pattern文件""" try: conn = mysql.connector.connect(**MYSQL_CONFIG) cursor = conn.cursor() cursor.execute("SELECT filter_content FROM apex_filter_content WHERE need_filter=1") filter_contents = [row[0] for row in cursor.fetchall()] if not filter_contents: logger.warning("没有需要过滤的内容,跳过后续处理") return 0 # 写入UTF-8文件转换为GB18030 utf8_file = os.path.join(TEMP_FILE_PATH, &#39;pattern.utf8&#39;) gb_file = os.path.join(TEMP_FILE_PATH, &#39;pattern.gb&#39;) with open(utf8_file, &#39;w&#39;, encoding=&#39;utf-8&#39;) as f: f.write(&#39;\n&#39;.join(filter_contents)) subprocess.run(f&#39;iconv -f UTF-8 -t GB18030 {utf8_file} > {gb_file}&#39;, shell=True) logger.info(f"生成过滤模式文件,包含 {len(filter_contents)} 条规则") return len(filter_contents) except mysql.connector.Error as err: logger.error(f"MySQL错误: {err}") return 0 finally: if conn.is_connected(): cursor.close() conn.close() def run_grep_search(log_date_str): """执行grep命令搜索日志""" try: gb_file = os.path.join(TEMP_FILE_PATH, &#39;pattern.gb&#39;) result_file = os.path.join(TEMP_FILE_PATH, &#39;filter_results.log&#39;) # 构建grep命令 cmd = f"grep -a -F -f {gb_file} {LOG_BASE_PATH}/*/svr*/*{log_date_str}.txt > {result_file}" logger.info(f"执行命令: {cmd}") subprocess.run(cmd, shell=True) logger.info("日志过滤完成") return result_file except Exception as e: logger.error(f"执行grep命令出错: {e}") return None def parse_log_line(line): """解析单行日志,提取所需信息""" # 定义正则表达式模式 dist_id_pattern = re.compile(rf&#39;{re.escape(LOG_BASE_PATH)}/(\d+)/svr\d+/&#39;) account_gid_pattern = re.compile(r&#39;\[.*?\((.*?)/(.*?)\)\]&#39;) # 匹配区组ID dist_match = dist_id_pattern.search(line) if not dist_match: return None dist_id = dist_match.group(1) # 匹配账号和GID account_gid_match = account_gid_pattern.search(line) if not account_gid_match: return None account, gid = account_gid_match.groups() # 匹配过滤内容 with open(os.path.join(TEMP_FILE_PATH, &#39;pattern.utf8&#39;), &#39;r&#39;, encoding=&#39;utf-8&#39;) as f: filter_contents = [content.strip() for content in f.readlines()] filter_match = None for content in filter_contents: if content in line: filter_match = content break if not filter_match: return None return { &#39;dist_id&#39;: dist_id, &#39;account&#39;: account, &#39;gid&#39;: gid, &#39;filter_content&#39;: filter_match } def process_log_results(file_path, log_date): """处理日志结果文件""" results = [] seen = set() try: with open(file_path, &#39;r&#39;, encoding=&#39;gb18030&#39;, errors=&#39;ignore&#39;) as f: # 使用线程池行处理日志行 with ThreadPoolExecutor(max_workers=8) as executor: future_to_line = {executor.submit(parse_log_line, line): line for line in f} for future in future_to_line: result = future.result() if result: # 使用复合键去重 (dist_id, account, gid, filter_content) key = (result[&#39;dist_id&#39;], result[&#39;account&#39;], result[&#39;gid&#39;], result[&#39;filter_content&#39;]) if key not in seen: seen.add(key) result[&#39;log_date&#39;] = log_date results.append(result) #print(results) logger.info(f"解析出 {len(results)} 条有效记录") return results except Exception as e: logger.error(f"处理日志文件出错: {e}") return [] def query_hive_data(parsed_data, log_date): """从Hive查询玩家数据""" if not parsed_data: logger.warning("没有解析数据,跳过Hive查询") return [] try: conn = hive.Connection(**HIVE_CONFIG) cursor = conn.cursor() # 构建IN子句的值列表 dist_ids = set() accounts = set() gids = set() for item in parsed_data: dist_ids.add(item[&#39;dist_id&#39;]) accounts.add(f"&#39;{item[&#39;account&#39;]}&#39;") gids.add(f"&#39;{item[&#39;gid&#39;]}&#39;") # 构建查询语句 query = f""" SELECT t.dist_id, sc.sub_name, t.filter_content, t.account, t.gid, COALESCE(ci.level, 0) AS level, COALESCE(ci.zhanli_lv, 0) AS zhanli_lv, COALESCE(a.gold_coin, 0) AS gold_coin FROM ( SELECT CAST(dist_id AS INT) AS dist_id, account, gid, filter_content FROM (VALUES {&#39;,&#39;.join([f"(&#39;{item[&#39;dist_id&#39;]}&#39;,&#39;{item[&#39;account&#39;]}&#39;,&#39;{item[&#39;gid&#39;]}&#39;,&#39;{item[&#39;filter_content&#39;]}&#39;)" for item in parsed_data])}) AS t(dist_id, account, gid, filter_content) ) t LEFT JOIN sub_category sc ON t.dist_id = sc.dist_id LEFT JOIN account a ON t.dist_id = a.pt_dist AND t.account = a.account LEFT JOIN char_info ci ON t.dist_id = ci.pt_dist AND t.gid = ci.gid """ logger.info("执行Hive查询...") print(query) &#39;&#39;&#39; cursor.execute(query) results = [] for row in cursor.fetchall(): results.append({ &#39;log_date&#39;: log_date, &#39;sub_name&#39;: row[1] or &#39;Unknown&#39;, &#39;dist_id&#39;: row[0], &#39;filter_content&#39;: row[2], &#39;account&#39;: row[3], &#39;gid&#39;: row[4], &#39;level&#39;: row[5], &#39;zhanli_lv&#39;: row[6], &#39;gold_coin&#39;: row[7] }) logger.info(f"从Hive查询到 {len(results)} 条玩家数据") return results &#39;&#39;&#39; except Exception as e: logger.error(f"Hive查询出错: {e}") return [] finally: if conn: cursor.close() conn.close() def save_to_player_data(data): """保存最终结果到apex_player_data表""" if not data: logger.warning("没有数据需要保存") return 0 try: conn = mysql.connector.connect(**MYSQL_CONFIG) cursor = conn.cursor() insert_sql = """ INSERT INTO apex_player_data ( log_date, sub_name, dist_id, filter_content, account, gid, level, zhanli_lv, gold_coin ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s) """ # 批量插入数据 batch_size = 100 total_rows = len(data) for i in range(0, total_rows, batch_size): batch = data[i:i+batch_size] values = [ (item[&#39;log_date&#39;], item[&#39;sub_name&#39;], item[&#39;dist_id&#39;], item[&#39;filter_content&#39;], item[&#39;account&#39;], item[&#39;gid&#39;], item[&#39;level&#39;], item[&#39;zhanli_lv&#39;], item[&#39;gold_coin&#39;]) for item in batch ] cursor.executemany(insert_sql, values) conn.commit() logger.info(f"已保存 {min(i+batch_size, total_rows)}/{total_rows} 条记录") logger.info(f"成功保存 {total_rows} 条记录到apex_player_data表") return total_rows except mysql.connector.Error as err: logger.error(f"MySQL错误: {err}") return 0 finally: if conn.is_connected(): cursor.close() conn.close() def cleanup_temp_files(): """清理临时文件""" try: files_to_remove = [ os.path.join(TEMP_FILE_PATH, &#39;pattern.utf8&#39;), os.path.join(TEMP_FILE_PATH, &#39;pattern.gb&#39;), os.path.join(TEMP_FILE_PATH, &#39;filter_results.log&#39;) ] for file_path in files_to_remove: if os.path.exists(file_path): os.remove(file_path) logger.info(f"已删除临时文件: {file_path}") except Exception as e: logger.error(f"清理临时文件出错: {e}") def main(): logger.info("开始执行日志过滤任务") create_temp_dir() # 步骤1: 获取日期 log_date_str, log_date_dash = get_previous_date() logger.info(f"处理日期: {log_date_dash}") # 步骤2: 获取过滤内容 filter_count = fetch_filter_contents() if filter_count == 0: cleanup_temp_files() return # 步骤3: 执行grep搜索 result_file = run_grep_search(log_date_str) if not result_file: cleanup_temp_files() return # 步骤4: 解析结果 parsed_data = process_log_results(result_file, log_date_dash) if not parsed_data: cleanup_temp_files() logger.info("没有匹配的日志记录,任务结束") return # 步骤5: 查询Hive获取玩家数据 hive_data = query_hive_data(parsed_data, log_date_dash) # 步骤6: 存入最终表 if hive_data: save_to_player_data(hive_data) # 清理临时文件 cleanup_temp_files() logger.info("任务执行完成") if __name__ == "__main__": main() ------- 这个脚本改一下,循环log目录下每个区组id,然后执行过滤操作,输出文件格式为log_tmp/{区组id}_{日志日期}.log 然后再进行账号、gid、过滤内容(filter_content)的匹配,循环区组id,作为where条件去查hive库(单次 查一个区组),将返回结果入mysql库apex_player_data表, 根据区组id、account、gid、查询账号对应的战力等级、角色等级、金元宝数量: dim_game_wd. sub_category 通过dist_id,查询sub_name区组名称 dim_game_wd.account 通过pt_dist=dist_id,account=account,查询gold_coin(金元宝) dim_game_wd. char_info pt_dist=dist_id,gid=gid,查询level、zhanli_lv 最终返回过滤内容:filter_content,对应的apex_player_data表中所有字段
最新发布
12-18
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值