python写入csv/xlsx文件--三元组

本文介绍如何使用Python将实体关系数据写入CSV和XLSX文件,包括打开文件、读取数据、使用正则表达式提取实体、判断关系类型并写入表格的具体步骤。

写入csv文件

import re
import csv
def get_entity_and_ralation_csv(src_preds, src_preds_outputs, tgt):
	f1 = open(src_preds, 'r', encoding='utf-8')
	f2 = open(src_preds_outputs, 'r', encoding='utf-8')
	g = open(tgt, 'w', newline='')#不用写utf-8
	csv_writer = csv.writer(g)
	csv_writer.writerow(["实体","关系","实体"])
	lines_f1 = f1.readlines()
	lines_f2 = f2.readlines()
	file_len = len(lines_f1)
	i = 0
	while i < file_len:
		text = lines_f1[i].strip('\ufeff\n').replace(' ','')
		relation = lines_f2[i],strip('\ufeff\n')
		# e1_start = text.index('<e1>')
		# e2_end = text.index('</e1>')
		# e1_start = text.index('<e2>')
		# e2_end = text.index('</e2>')
		e1 = re.findall(r"<e1>(.+?)</e1>")
		e2 = re.findall(r"<e2>(.+?)</e2>")
		e1 = e1.pop()
		e2 = e2.pop()
		if relation != 'Other':
			csv_writer.writerow([e1,relation,e2])
		
		i += 1

写入xlsx文件

import os
import openpyxl as xl
def get_entity_and_ralation_csv(src_preds, src_preds_outputs, tgt):
	f1 = open(src_preds, 'r', encoding='utf-8')
	f2 = open(src_preds_outputs, 'r', encoding='utf-8')
	
	if os.path.exists(tgt):
		workbook = xl.load_workbook(tgt)
	else:
		workbook = xl.Workbook()
		workbook.save(tgt)
	
	sheet = workbook.active
	headers = ["实体","关系","实体"]
	sheet.append(headers)
	
	lines_f1 = f1.readlines()
	lines_f2 = f2.readlines()
	file_len = len(lines_f1)
	i = 0
	while i < file_len:
		text = lines_f1[i].strip('\ufeff\n').replace(' ','')
		relation = lines_f2[i],strip('\ufeff\n')
		# e1_start = text.index('<e1>')
		# e2_end = text.index('</e1>')
		# e1_start = text.index('<e2>')
		# e2_end = text.index('</e2>')
		e1 = re.findall(r"<e1>(.+?)</e1>")
		e2 = re.findall(r"<e2>(.+?)</e2>")
		e1 = e1.pop()
		e2 = e2.pop()
		data = [e1,relation,e2]
		if relation != 'Other':
			sheet.append(data)
		
		i += 1
	
	workbook.save(tgt)
	f1.close()
	f2.close()

1、src_preds中每行是一句话,其中两个实体分别被特殊字符分割,如:

一种<e1>育苗纸容器机</e1>,包括<e2>导向机构</e2>、分切机构、切虚线机构、印胶机构、甩刀机构和送纸机构。

2、src_preds_outputs每行为一个词,对应src_preds每句话中两个实体的关系,如:

Whole-Component(e1,e2)

3、 tgt存储三元组

import numpy as np import json import os import sys import csv #权重文件位置 WEIGHTS_JSON="../result/weight.json" #采样次数 SAMPLE_TIMES=100 #权重方法 WEIGHTMETHOD="fuzzy_bwm" #排序方法 RANKMETHOD="mabac" #结果目录 RESULT_DIR="../result/analysis/mc/c_out_tfn/" #攻击数量 E_NUMBER=20 #初始化结果目录 os.makedirs(RESULT_DIR,exist_ok=True)#递归创建,已存在则跳过 #清空目录文件 for file in os.listdir(RESULT_DIR): file_path=os.path.join(RESULT_DIR,file) try: os.remove(file_path) except OSError as e: print(f"无法删除旧文件{file_path},原因{e}") #逆变换采样函数 def c_tfn(tfn): l,m,u=tfn[0],tfn[1],tfn[2] if not (l<=m<=u): raise ValueError("tfn格式错误") r=np.random.rand()#生成[0,1]之间的随机数 if not(l==m and m==u): p=(m-l)/(u-l) if r<p: return l+sqrt(r*(u-l)*(m-l)) else: return u-sqrt((1-r)*(u-m)*(u-l)) elif l==m and u==m: return l#恒定值 elif l==m and u!=m: return u-sqrt((1-r)*(u-m)*(u-l)) elif u==m and l!=m: return l+sqrt(r*(u-l)*(m-l)) #原始权重计算 weightmethod="fuzzy_bwm" os.system('/usr/bin/time -f"%e %M" -a -o ../result/tmp/time.log python3 ../lib/Weight/{}/{}.py {} {}'.format(weightmethod , weightmethod , '../input/fuzzy_bwm_input/c4_c5_fuzzy_bwm.xlsx', '../result/weight.json')) try: with open(WEIGHTS_JSON,"r") as f: weight_data=json.load(f)#读取权重文件 base_tfn=weight_data["TFN"]#基础模糊tfn分布 except AssertionError as e: raise AssertionError(f"权重数量有误:{e}") result={} #蒙特卡洛循环 for sample_idx in range(SAMPLE_TIMES): #处理后的权重 processed_weight=[c_tfn(i) for i in base_tfn] #归一化 total=sum(processed_weight) if total == 0: raise ValueError("处理后权重和为零,无法归一化,检查基础权重或者扰动幅度") normalized_weights=[v/total for v in processed_weight] weight_data["weight"]=normalized_weights#扰动后 weight_output_path=os.path.join(RESULT_DIR,"processed_tfn_weights.json") with open(weight_output_path,"w") as f: json.dump(weight_data,f,indent=2)#处理后的权重 #用扰动后权重排序 os.system('/usr/bin/time -f"%e %M" -a -o ../result/tmp/time.log python3 ../lib/Rank/{}/{}.py {} {} {} {} {}'.format(RANKMETHOD, RANKMETHOD , '../result/tmp/topo.json',weight_output_path, '../result/tmp/data.csv', '../result/rank.json', '../result/path_metrics.csv')) try: with open('../result/rank.json',"r") as f: rank=json.load(f)#读取排名文件 except Exception as e: print(f"排名读取出错:{e}") #输出 output_path=os.path.join(RESULT_DIR,'') result[sample_idx]={ "processed_weight":processed_weight, "normalized_weights":normalized_weights, "rank":rank["ranking"] } output_path=os.path.join(RESULT_DIR,f"monte_carlo_tfn_fbwmout.json") with open(output_path,"w") as f: json.dump(result,f,indent=2) #计算排名的平均值和标准差 #提取排名 edge_rank={} for i in range(SAMPLE_TIMES): edge_rank[i]=[result[i]["ranking"][j][0] for j in range(E_NUMBER)]#排序 #print(edge_rank) #获取扰动前的前十名 p=os.system('/usr/bin/time -f"%e %M" -a -o ../result/tmp/time.log python3 ../lib/Rank/{}/{}.py {} {} {} {} {}'.format(RANKMETHOD , RANKMETHOD , '../result/tmp/topo.json',WEIGHTS_JSON, '../result/tmp/data.csv', '../result/rank.json', '../result/path_metrics.csv')) try: with open('../result/rank.json',"r") as f: rank_before=json.load(f)#读取排名文件 except Exception as e: print(f"排名读取出错:{e}") edge_Ten_rank=[rank_before["ranking"][i][0] for i in range(10)]#取前十名 #print(edge_Ten_rank) #统计 rank_rank={} for edge in edge_Ten_rank: rank_rank[edge]=[]#边的排名统计 for i in range(SAMPLE_TIMES): for edge in edge_Ten_rank: for j in range(E_NUMBER): if edge_rank[i][j]==edge: rank_rank[edge].append(j+1) #得到排名的结果rank_rank #print(rank_rank) #计算均值与标准差 mean_rank=[] std_rank=[] for edge in edge_Ten_rank: mean_rank.append(np.mean(rank_rank[edge])) std_rank.append(np.std(rank_rank[edge],ddof=0)) mean_std_output_path=os.path.join(RESULT_DIR,"mean_std_rank_tfn.csv") with open(mean_std_output_path,"w",newline="") as f: writer=csv.writer(f) writer.writerow(edge_Ten_rank)#排名 writer.writerow(mean_rank)#均值 writer.writerow(std_rank)#标准差。。。。程序功能可以实现嘛
最新发布
08-29
from math import * import numpy as np import pandas as pd import os import json import sys import csv #权重文件位置 WEIGHTS_JSON="../result/weight.json" #采样次数 SAMPLE_TIMES=100 #Fbwm输入目录 FBWM_INPUT_DIR="../input/fuzzy_bwm_input/" #Fbwm输入文件 FBWM_INPUT_PATH=os.path.join(FBWM_INPUT_DIR,"c4_c5_fuzzy_bwm.xlsx") #权重方法 WEIGHTMETHOD="bwm" #结果目录 RESULT_DIR="../result/analysis/mc/tri_dis/" #攻击数量 #初始化结果目录 os.makedirs(RESULT_DIR,exist_ok=True)#递归创建,已存在则跳过 #清空目录文件 for file in os.listdir(RESULT_DIR): file_path=os.path.join(RESULT_DIR,file) try: os.remove(file_path) except OSError as e: print(f"无法删除旧文件{file_path},原因{e}") #逆变换采样函数 def c_tfn(tfn): l,m,u=tfn[0],tfn[1],tfn[2] if not (l<=m<=u): raise ValueError("tfn格式错误") r=np.random.rand()#生成[0,1]之间的随机数 if not(l==m and m==u): p=(m-l)/(u-l) if r<p: return l+sqrt(r*(u-l)*(m-l)) else: return u-sqrt((1-r)*(u-m)*(u-l)) elif l==m and u==m: return l#恒定值 elif l==m and u!=m: return u-sqrt((1-r)*(u-m)*(u-l)) elif u==m and l!=m: return l+sqrt(r*(u-l)*(m-l)) #读取权重评估数据 address=FBWM_INPUT_PATH df=pd.read_excel(address, sheet_name = "Sheet1", skiprows = 2, nrows= 9, usecols=[1,2,3],header=None) df.columns=["Criteria","Best","Worst"] Best = df["Criteria"][df[df['Best'] == "Equally importance"].index.tolist()[0]] Worst = df["Criteria"][df[df['Worst'] == "Equally importance"].index.tolist()[0]] df.columns = ["Criteria", "Best", "Worst"] Cnum = df.shape[0] df.set_index(df["Criteria"], inplace=True) #print(df) # Fuzzification Fuzzy = {"Equally importance": [1, 1, 1], "Weakly important": [2 / 3, 1, 3 / 2], "Fairly important": [3 / 2, 2, 5 / 2], "Very important": [5 / 2, 3, 7 / 2], "Absolutely important": [7 / 2, 4, 9 / 2]} #输出作为后续输入 mc_bwminput_output_path=os.path.join(RESULT_DIR,"mc_bwm_input.xlsx") result={} #蒙特卡洛循环 for sample_idx in range(SAMPLE_TIMES): #print("原始列名:",df.columns) if "Number" not in df.columns: df.insert(loc=0,column="Number",value=range(1,10)) df.columns=["Number","Criteria","Best","Worst"] #print("改后列名:",df.columns) #print(df) for i in ["Best", "Worst"]: for j in range(df.shape[0]): df[i][j] =c_tfn(Fuzzy[df[i][j]])#随机采样 df.to_excel(mc_bwminput_output_path,index=False)#随机采样后的bwm输入 weightmethod=WEIGHTMETHOD #权重计算 os.system('/usr/bin/time -f"%e %M" -a -o ../result/tmp/time.log python3 ../lib/Weight/{}/{}.py {} {}'.format(weightmethod , weightmethod , mc_bwminput_output_path, '../result/weight.json')) #指标计算 #os.system('/usr/bin/time -f"%e %M" -a -o ../result/tmp/time.log python3 ../lib/cal_metrics.py {} {}'.format('../result/tmp/topo.json', '../result/path_metrics.csv')) #排名计算 os.system('/usr/bin/time -f"%e %M" -a -o ../result/tmp/time.log python3 ../lib/Rank/{}/{}.py {} {} {} {} {}'.format('mabac' , 'mabac' , '../result/tmp/topo.json',WEIGHTS_JSON, '../result/tmp/data.csv', '../result/rank.json', '../result/path_metrics.csv')) try: with open('../result/rank.json',"r") as f: rank=json.load(f)#读取排名文件 except Exception as e: print(f"排名读取出错:{e}") try: with open(WEIGHTS_JSON,"r") as m: weight=json.load(m)#读取权重文件 except Exception as e: print(f"权重读取出错:{e}") #整合输出 result[sample_idx]={ "weight":weight, "ranking":rank["ranking"] } #输出排序结果 output_path=os.path.join(RESULT_DIR,"monte_carlo_tfn.json") with open(output_path,"w") as f: json.dump(result,f,indent=2) 为什么运行的时候会有报错:File "/root/Module_3/pack_v2.4/analysis/monte_carlo_tfn.py", line 85, in <module> df[i][j] =c_tfn(Fuzzy[df[i][j]])#随机采样 ~~~~~^^^^^^^^^^ KeyError: 1.9530171225014494,,,并且在结果目录下只生成了一个excel文件,这个文件的结构是我需要的,但另外一个json文件,没有生成
08-29
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值