要求只截取rule id 到exit部分
rule id 169
**
exit
import re
import pandas as pd
from openpyxl import Workbook
# 定义文件路径
input_file = r"D:\PythonProject\hillstoneSW\input\SHTEST-FW-SG-6000-M3108-CONFIG.Startup-202501241438.DAT"
output_file = r"D:\PythonProject\hillstoneSW\output\output2.xlsx"
# 初始化一个空的列表,用于存储解析后的数据
data = []
# 定义标题顺序
columns_order = [
"Rule ID", "action", "log", "src-zone", "src-ip", "src-range", "src-addr", "src-host",
"dst-zone", "dst-ip", "dst-range", "dst-addr", "dst-host", "service", "schedule", "status",
"name", "im", "application"
]
# 打开文件并逐行读取
with open(input_file, "r", encoding="utf-8") as f:
lines = f.readlines()
# 定义一个正则表达式,用于匹配键值对
# 修改正则表达式以支持 src-range 和 dst-range 的两个空格
pattern = re.compile(r"\s*(\w[\w-]*)\s*(\"[^\"]*\"|\S+(?:\s+\S+)*)")
# 遍历每一行,解析内容
for line in lines:
line = line.strip()
if line.startswith("rule id"):
# 如果是规则的开始,初始化一个字典来存储当前规则的信息
rule = {col: "" for col in columns_order} # 初始化所有字段为空字符串
rule["Rule ID"] = int(line.split()[2]) # 设置 Rule ID
elif line.startswith("exit"):
# 如果是规则的结束,将当前规则添加到数据列表中
data.append(rule)
else:
# 匹配键值对
match = pattern.match(line)
if match:
key, value = match.groups()
value = value.strip('"') # 去掉引号
# 特殊处理 disable 字段
if key.strip().lower() == "disabl": # 确保字段名正确匹配
rule["status"] = "disable" # 设置 status 字段
elif key.strip().lower() == "log": # 处理 log 字段
if "log" not in rule:
rule["log"] = value
else:
rule["log"] += f"; {value}"
elif key.strip() in rule:
# 如果键已经存在,将值追加到字符串中,用分号分隔
if rule[key.strip()]:
rule[key.strip()] += f"; {value}"
else:
rule[key.strip()] = value
else:
# 如果键不存在,直接添加到字典中
rule[key.strip()] = value
# 将数据转换为DataFrame
df = pd.DataFrame(data)
# 重新排序列,以匹配指定的标题顺序
df = df[columns_order]
# 将DataFrame保存为Excel文件
df.to_excel(output_file, index=False)
print(f"数据已成功保存到 {output_file}")