神经网络权重处理(记录)
对已生成的权重处理(txt文件)
原文件
观察到虽然权重数量庞大,但是很多数据是重复的,大体处理过程如下:
- 去除权重文件中重复的数据;
- 查看新文件,手动去除备注(权重文件方便记忆网络结构);
- 分析权重分布,并绘制分布图;
权重处理参考代码片
***********************权重处理部分代码(供参考)********
file = open("test.txt","r")
list = file.readlines()
lists = []
for fields in list:
fields=fields.strip()
fields=fields.split(" ")
lists.append(fields)
print (lists[0][0])
print (lists[3][4],lists[3][5])
#转换为list
########################################
lista = []
with open("new_file_1.txt", "r") as f:
for line in f.readlines():
data = line.split('\n\t')
for str in data:
sub_str = str.split(' ')
if sub_str:
lista.append(sub_str)
list1=lista[0]
list2=lista[1]
list3=lista[2]
list4=lista[3]
#转换为list 方案二
#######################################################
readDir = "./original_file.txt"
writeDir = "./new_file.txt"
outfile=open(writeDir,"w")
f = open(readDir,"r")
lines_seen = set() # Build an unordered collection of unique elements.
for line in f:
line = line.strip('\n')
if line not in lines_seen:
outfile.write(line+ '\n')
lines_seen.add(line)
###############3#去除权重文件中重复的行#####################################
##############################统计满足条件的数量 ############################
max_value_index = [k for k,v in enumerate(lists[0]) if v>=0.5 and v<=1]
mid_value_index = [k for k,v in enumerate(lists[0]) if v<=0 and v<0.5]
big_value_index = [k for k,v in enumerate(lists[0]) if v>=-0.5 and v<0]
min_value_index = [k for k,v in enumerate(lists[0]) if v>=-1 and v<-0.5]
len_max = len(max_value_index)
len_mid = len(mid_value_index)
len_big = len(big_value_index)
len_min = len(min_value_index)
print("0.5<=x<=1的次数:",len_max,",对应第几次满足:",max_value_index)
print("0<=x<0.5的次数:",len_mid,",对应第几次满足:",mid_value_index)
print("-0.5<=x<0的次数:",len_big,",对应第几次满足:",big_value_index)
print("-1<=x<-0.5的次数:",len_min,",对应第几次满足:",min_value_index)
################################################################
###############################统计满足条件的数量 方案二####################################
import numpy as np
fir=np.sum(list(map(lambda x: x>=0 and x<0.5, lists[0]))) #统计满足条件的数量 方案二
print("x>=0 and x<0.5",fir)
###############################################################
# -*- coding: utf-8 -*-
"""
Created on Sat Oct 24 21:05:08 2020
#!/usr/bin/env python
#coding=utf-8
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
print("。。。读取文件中。。。\n")
file = open("new_file_1.txt","r")
mylist = file.readlines()
lines =len(mylist) #得到行数
print ("修改后行数: %s" % (lines))
column1=len(mylist[0]) #得到列数
print ("第一行字符数: %s" % (column1))
column2=len(mylist[1]) #得到列数
print ("第二行字符数: %s" % (column2))
column3=len(mylist[2]) #得到列数
print ("第三行字符数: %s" % (column3))
column4=len(mylist[3]) #得到列数
print ("第四行字符数: %s" % (column4))
lists = []
for fields in mylist:
fields=fields.strip()
fields=fields.split(" ")
lists.append(fields)#转换list
print ("元素[0][0]="+lists[0][0])
lists[0]=np.array(lists[0],dtype=float) #将其转换成numpy的数组,并定义数据类型为float
lists[1]=np.array(lists[1],dtype=float)
lists[2]=np.array(lists[2],dtype=float)
lists[3]=np.array(lists[3],dtype=float)
print("\n")
print("。。。字符处理为数字。。。\n")
lista = []
with open("new_file_1.txt", "r") as f:
for line in f.readlines():
data = line.split('\t')
for str in data:
sub_str = str.split(" ")
if sub_str:
lista.append(sub_str)
column1=len(lista[0])-1 #得到列数
print ("第一行列数: %s" % (column1))
column2=len(lista[1])-1 #得到列数
print ("第二行列数: %s" % (column2))
column3=len(lista[2])-1 #得到列数
print ("第三行列数: %s" % (column3))
column4=len(lista[3])-1 #得到列数
print ("第四行列数: %s" % (column4),"\n")
#print(lists[1])
#fir=np.sum(list(map(lambda x: x>=0 and x<0.5, lists[0])))
#print ("第四行字符数: %s" % (column4))
#print(fir)
print ("第一行数据分布")
max_value_index = [k for k,v in enumerate(lists[0]) if v>=0.5 and v<=1] #统计权重范围
mid_value_index = [k for k,v in enumerate(lists[0]) if v>=0 and v<0.5]
big_value_index = [k for k,v in enumerate(lists[0]) if v>=-0.5 and v<0]
min_value_index = [k for k,v in enumerate(lists[0]) if v>=-1 and v<-0.5]
len_max = len(max_value_index)
len_mid = len(mid_value_index)
len_big = len(big_value_index)
len_min = len(min_value_index)
print("0.5<=x<=1的次数:",len_max)
print("0<=x<0.5的次数:",len_mid)
print("-0.5<=x<0的次数:",len_big)
print("-1<=x<-0.5的次数:",len_min)
print("max=",max(lists[0]))
print("min=",min(lists[0]),"\n")
print ("第二行数据分布")
max_value_index = [k for k,v in enumerate(lists[1]) if v>=0.5 and v<=1] #统计权重范围
mid_value_index = [k for k,v in enumerate(lists[1]) if v>=0 and v<0.5]
big_value_index = [k for k,v in enumerate(lists[1]) if v>=-0.5 and v<0]
min_value_index = [k for k,v in enumerate(lists[1]) if v>=-1 and v<-0.5]
len_max = len(max_value_index)
len_mid = len(mid_value_index)
len_big = len(big_value_index)
len_min = len(min_value_index)
print("0.5<=x<=1的次数:",len_max)
print("0<=x<0.5的次数:",len_mid)
print("-0.5<=x<0的次数:",len_big)
print("-1<=x<-0.5的次数:",len_min)
print("max=",max(lists[1]))
print("min=",min(lists[1]))
###################################################################################################################################
# -*- coding: utf-8 -*-
"""
Created on Sat Oct 24 21:05:08 2020
#!/usr/bin/env python
#coding=utf-8
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
print("。。。读取文件中。。。\n")
file = open("new_file_1.txt","r")
mylist = file.readlines()
lines =len(mylist) #得到行数
print ("修改后行数: %s" % (lines))
column1=len(mylist[0]) #得到列数
print ("第一行字符数: %s" % (column1))
column2=len(mylist[1]) #得到列数
print ("第二行字符数: %s" % (column2))
column3=len(mylist[2]) #得到列数
print ("第三行字符数: %s" % (column3))
column4=len(mylist[3]) #得到列数
print ("第四行字符数: %s" % (column4))
lists = []
for fields in mylist:
fields=fields.strip()
fields=fields.split(" ")
lists.append(fields)#转换list
print ("元素[0][0]="+lists[0][0])
lists[0]=np.array(lists[0],dtype=float) #将其转换成numpy的数组,并定义数据类型为float
lists[1]=np.array(lists[1],dtype=float)
lists[2]=np.array(lists[2],dtype=float)
lists[3]=np.array(lists[3],dtype=float)
print("\n")
print("。。。字符处理为数字。。。\n")
lista = []
with open("new_file_1.txt", "r") as f:
for line in f.readlines():
data = line.split('\t')
for str in data:
sub_str = str.split(" ")
if sub_str:
lista.append(sub_str)
column1=len(lista[0])-1 #得到列数
print ("第一行列数: %s" % (column1))
column2=len(lista[1])-1 #得到列数
print ("第二行列数: %s" % (column2))
column3=len(lista[2])-1 #得到列数
print ("第三行列数: %s" % (column3))
column4=len(lista[3])-1 #得到列数
print ("第四行列数: %s" % (column4),"\n")
#print(lists[1])
#fir=np.sum(list(map(lambda x: x>=0 and x<0.5, lists[0])))
#print ("第四行字符数: %s" % (column4))
#print(fir)
print ("第一行数据分布")
max_value_index = [k for k,v in enumerate(lists[0]) if v>=0.5 and v<=1] #统计权重范围
mid_value_index = [k for k,v in enumerate(lists[0]) if v>=0 and v<0.5]
big_value_index = [k for k,v in enumerate(lists[0]) if v>=-0.5 and v<0]
min_value_index = [k for k,v in enumerate(lists[0]) if v>=-1 and v<-0.5]
len_max = len(max_value_index)
len_mid = len(mid_value_index)
len_big = len(big_value_index)
len_min = len(min_value_index)
print("0.5<=x<=1的次数:",len_max)
print("0<=x<0.5的次数:",len_mid)
print("-0.5<=x<0的次数:",len_big)
print("-1<=x<-0.5的次数:",len_min)
print("max=",max(lists[0]))
print("min=",min(lists[0]),"\n")
print ("第二行数据分布")
max_value_index = [k for k,v in enumerate(lists[1]) if v>=0.5 and v<=1] #统计权重范围
mid_value_index = [k for k,v in enumerate(lists[1]) if v>=0 and v<0.5]
big_value_index = [k for k,v in enumerate(lists[1]) if v>=-0.5 and v<0]
min_value_index = [k for k,v in enumerate(lists[1]) if v>=-1 and v<-0.5]
len_max = len(max_value_index)
len_mid = len(mid_value_index)
len_big = len(big_value_index)
len_min = len(min_value_index)
print("0.5<=x<=1的次数:",len_max)
print("0<=x<0.5的次数:",len_mid)
print("-0.5<=x<0的次数:",len_big)
print("-1<=x<-0.5的次数:",len_min)
print("max=",max(lists[1]))
print("min=",min(lists[1]))
plt.figure()
plt. hist(lists[0], facecolor='r',histtype = 'stepfilled', bins = 200)
plt.figure()
plt. hist(lists[1], facecolor='g',histtype = 'stepfilled', bins = 200)
plt.figure()
plt. hist(lists[2],facecolor='y', histtype = 'stepfilled', bins = 200)
plt.figure()
plt. hist(lists[3], histtype = 'stepfilled', bins = 200)
plt.draw()
权重分析参考代码片
***********************权重分析代码(供参考)********
# -*- coding: utf-8 -*-
"""
Created on Sat Oct 24 21:05:08 2020
#!/usr/bin/env python
#coding=utf-8
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
print("。。。读取文件中。。。\n")
file = open("new_file_1.txt","r")
mylist = file.readlines()
lines =len(mylist) #得到行数
print ("修改后行数: %s" % (lines))
column1=len(mylist[0]) #得到列数
print ("第一行字符数: %s" % (column1))
column2=len(mylist[1]) #得到列数
print ("第二行字符数: %s" % (column2))
column3=len(mylist[2]) #得到列数
print ("第三行字符数: %s" % (column3))
column4=len(mylist[3]) #得到列数
print ("第四行字符数: %s" % (column4))
lists = []
for fields in mylist:
fields=fields.strip()
fields=fields.split(" ")
lists.append(fields)#转换list
print ("元素[0][0]="+lists[0][0])
lists[0]=np.array(lists[0],dtype=float) #将其转换成numpy的数组,并定义数据类型为float
lists[1]=np.array(lists[1],dtype=float)
lists[2]=np.array(lists[2],dtype=float)
lists[3]=np.array(lists[3],dtype=float)
print("\n")
print("。。。字符处理为数字。。。\n")
lista = []
with open("new_file_1.txt", "r") as f:
for line in f.readlines():
data = line.split('\t')
for str in data:
sub_str = str.split(" ")
if sub_str:
lista.append(sub_str)
column1=len(lista[0])-1 #得到列数
print ("第一行列数: %s" % (column1))
column2=len(lista[1])-1 #得到列数
print ("第二行列数: %s" % (column2))
column3=len(lista[2])-1 #得到列数
print ("第三行列数: %s" % (column3))
column4=len(lista[3])-1 #得到列数
print ("第四行列数: %s" % (column4),"\n")
#print(lists[1])
#fir=np.sum(list(map(lambda x: x>=0 and x<0.5, lists[0])))
#print ("第四行字符数: %s" % (column4))
#print(fir)
print ("第一行数据分布")
max_value_index = [k for k,v in enumerate(lists[0]) if v>=0.5 and v<=1] #统计权重范围
mid_value_index = [k for k,v in enumerate(lists[0]) if v>=0 and v<0.5]
big_value_index = [k for k,v in enumerate(lists[0]) if v>=-0.5 and v<0]
min_value_index = [k for k,v in enumerate(lists[0]) if v>=-1 and v<-0.5]
len_max = len(max_value_index)
len_mid = len(mid_value_index)
len_big = len(big_value_index)
len_min = len(min_value_index)
print("0.5<=x<=1的次数:",len_max)
print("0<=x<0.5的次数:",len_mid)
print("-0.5<=x<0的次数:",len_big)
print("-1<=x<-0.5的次数:",len_min)
print("max=",max(lists[0]))
print("min=",min(lists[0]),"\n")
print ("第二行数据分布")
max_value_index = [k for k,v in enumerate(lists[1]) if v>=0.5 and v<=1] #统计权重范围
mid_value_index = [k for k,v in enumerate(lists[1]) if v>=0 and v<0.5]
big_value_index = [k for k,v in enumerate(lists[1]) if v>=-0.5 and v<0]
min_value_index = [k for k,v in enumerate(lists[1]) if v>=-1 and v<-0.5]
len_max = len(max_value_index)
len_mid = len(mid_value_index)
len_big = len(big_value_index)
len_min = len(min_value_index)
print("0.5<=x<=1的次数:",len_max)
print("0<=x<0.5的次数:",len_mid)
print("-0.5<=x<0的次数:",len_big)
print("-1<=x<-0.5的次数:",len_min)
print("max=",max(lists[1]))
print("min=",min(lists[1]))
plt.figure()
plt. hist(lists[0], facecolor='r',histtype = 'stepfilled', bins = 200)
plt.figure()
plt. hist(lists[1], facecolor='g',histtype = 'stepfilled', bins = 200)
plt.figure()
plt. hist(lists[2],facecolor='y', histtype = 'stepfilled', bins = 200)
plt.figure()
plt. hist(lists[3], histtype = 'stepfilled', bins = 200)
plt.draw()
结果
-
第一层权重
-
第二层权重
-
第三层权重
-
第四层权重
-
总权重