805. Split Array With Same Average

本文探讨了将数组A划分为两个非空子集B和C,使得B和C的元素平均值相等的问题。提出了两种解决方案,一是通过深度优先搜索(DFS)进行剪枝优化,二是采用中间相遇法,通过将数组元素减去平均值实现固定和的匹配。

解法

解法一:转化成是否存在k个数使得和为target

很明显,B和C那相等的均值一定是A的均值
所以给定B的长度l,我们可以计算出B的总和应该是sum(A)∗ln\frac{sum(A)*l}{n}nsum(A)l
所以问题就转化成数组里是不是有l个数的和加起来刚好等于这个值,dfs求解
但是会超时,需要剪下枝:

  1. 如果所有候选的数加起来也组不够target,pass
  2. 如果前一个数为a,它失败了,这个数也是a,那也不用遍历【瞬间快了】
class Solution(object):
    def splitArraySameAverage(self, A):
        """
        :type A: List[int]
        :rtype: bool
        """
        A.sort()
        n = len(A)
        total = sum(A)

        P = [0] * n
        P[0] = A[0]
        for i, a in itertools.islice(enumerate(A), 1, None):
            P[i] = P[i - 1] + a

        def check(i, l, target):
            if i == -1:
                return l == 0 and target == 0
            if l == 0:
                return target == 0
            if target < 0:
                return False
            if target > P[i]:
                return False
            for j in xrange(i, l - 2, -1):
                if (j==i or A[j]!=A[j+1]) and check(j - 1, l - 1, target - A[j]):
                    return True
            return False

        for l in xrange(1, n // 2 + 1):
            if total * l % n == 0 and check(n - 1, l, total * l / n):
                return True
        return False

解法二:中间相遇

我觉得大概这种和刚好等于多少多少的类型都可以用中间相遇来解
但是由于B的均值是确定的,但是和不一定,如何得到B的固定的和呢?
我们把A里的每个数都减去平均值aver,那么A的均值就变成了0,B的均值也是0,和就也固定为0了
使用中间相遇法,把数组分成左右两部分,如果左边有和为x并且右边为-x,那么把两部分的数组拿起来就可以拼成B了
这里有两个需要注意的:

  1. x和-x不能刚好用了全部左右全部的数字,否则C会是空的
  2. 但是如果只用某一边的数字就可以凑成0了,那么就不用再考虑后面的了
class Solution(object):
    def splitArraySameAverage(self, A):
        """
        :type A: List[int]
        :rtype: bool
        """
        from fractions import Fraction
        n = len(A)
        total = sum(A)
        A = [a-Fraction(total,n) for a in A]
        if n==1:
            return False
        AL = A[:n//2]
        AR = A[n//2:]
        def all_sums(lis):
            res = set()
            for a in lis:
                new = set()
                new.add(a)
                for num in res:
                    new.add(num+a)
                res |= new
            return res
        left = all_sums(AL)
        if 0 in left:
            return True
        right = all_sums(AR)
        if 0 in right:
            return True
        sum_left = sum(AL)
        sum_right = sum(AR)
        return any(-x in right and (x,-x)!=(sum_left,sum_right) for x in left)
parser = argparse.ArgumentParser(description= “DATA ANALYSE”) parser.add_argument(‘-mode’, default = “uncore”, help = “uncore core”) parser.add_argument(‘-mode’, default = “core”, help = “uncore core”) parser.add_argument(‘-fit_mode’, default = “singlevariate”, help = “singlevariate/multivariate 多个变量”) parser.add_argument(‘-linear_mode’, default = “linear”, help = “linear or non-linear 线性或者非线性”) parser.add_argument(‘-input_path’, default = “vmin_20240812”, help = “input_path”) opts = parser.parse_args() def opts_init(): opts.hpm_select = “” opts.input_log_path = “…/{}/{}/”.format(opts.input_path,opts.mode) opts.output_dir = “./{}{}{}”.format(opts.mode,opts.linear_mode,opts.fit_mode) opts.output_log_path = os.path.join(opts.output_dir,“log”) if not os.path.exists(opts.output_dir): os.makedirs(opts.output_dir) opts.output_ckp= os.path.join(opts.output_dir,“data.ckp”) if opts.mode == “core”: opts.freq_list = [“1200”,“1550”,“1800”,“2000”,“2200”] if opts.mode == “uncore”: opts.freq_list = [“1200”,“1550”,“2000”] if opts.fit_mode == "singlevariate": # opts.hpm_select_list = ["sum137"] # hpm_select_list = [0,1,2,3,4,5,6,7,8,"sumall","sum137"] opts.hpm_select_list = [0,1,2,3,4,5,6,7,8,"sumall","sum137"] # opts.hpm_select_list = ["avg"] # hpm_select_list = [0,1,2,3,4,5,6,7,8,"sumall","sum137"] # opts.hpm_select_list = ["avg137"] # hpm_select_list = [0,1,2,3,4,5,6,7,8,"sumall","sum137"] elif opts.fit_mode == "multivariate": opts.hpm_select_list = ["multivariate_137"] def print_file(*args, mode = “info”, config = opts): filename = opts.output_log_path def same_print(*args, filename = filename, end = “\n” ): # def same_print(*args, filename = filename, end = os.linesep ): with open(filename,“a+”,encoding=“utf-8”) as f: for index , single_args in enumerate(args): if index < len(args)-1: print(single_args, end = " ") print(single_args, end = " ", file=f) else: print(single_args, end = end) print(single_args, end = end, file=f) if mode == "info": same_print(*args, filename = filename) elif mode == "info_noend": same_print(*args,end=" ", filename = filename) elif mode == "sep_start": # sep_start same_print("===="*5, filename = filename, end = " ") same_print(*args, filename = filename, end = " ") same_print("===="*5, filename = filename) elif mode == "sep_end": # Delimiter Separated end same_print("===="*5, filename = filename, end = " ") count = 0 for single_args in args: count += len(str(single_args)) same_print("="*count, filename = filename, end = " ") same_print("===="*5, filename = filename) else: raise ValueError("Mode Wrong") def plot_error(data, info=“”): # 设置matplotlib正常显示中文和负号 # matplotlib.rcParams[‘font.sans-serif’]=[‘SimHei’] # 用黑体显示中文 # matplotlib.rcParams[‘axes.unicode_minus’]=False # 正常显示负号 # 随机生成(10000,)服从正态分布的数据 # data = np.random.randn(10000) “”" 绘制直方图 data:必选参数,绘图数据 bins:直方图的长条形数目,可选项,默认为10 normed:是否将得到的直方图向量归一化,可选项,默认为0,代表不归一化,显示频数。normed=1,表示归一化,显示频率。 facecolor:长条形的颜色 edgecolor:长条形边框的颜色 alpha:透明度 “”" plt.hist(data, bins=40, density=0, facecolor=“blue”, edgecolor=“black”, alpha=0.7) # 显示横轴标签 plt.xlabel(“Error Distribution”) # # 显示纵轴标签 plt.ylabel(“Count”) # # 显示图标题 # plt.title(“频数/频率分布直方图”) plt.title(‘Error Distribution: {}’.format(opts.key_mark )) plt.savefig(os.path.join(opts.output_dir,“{}_error_dist.png”.format(opts.key_mark))) plt.cla() plt.close(“all”) def get_average(records): “”" 平均值 “”" return sum(records) / len(records) def get_variance(records): “”" 方差 反映一个数据集的离散程度 “”" average = get_average(records) return sum([(x - average) ** 2 for x in records]) / len(records) def Evaluate(Y, Y_fit, info ): sse = np.sum((Y_fit - Y) ** 2) mse = sse / Y.shape[0] rmse = np.sqrt(mse) normal_rmse = rmse / (np.max(Y) - np.min(Y)) var = np.sum((Y - np.mean(Y)) ** 2) r2 = 1 - sse / var # adjr2 = 1 - (1 - r2) * (n - 1)/(n - p - 1) error = Y_fit - Y plot_error(error) error = abs(Y_fit - Y) error_var = get_variance(error) error_dev = math.sqrt(error_var) print_file("{}:{:<15} {}:{:<15} {}:{:<15} {}:{:<15}".format("RMSE",round(rmse,4),"Normal RMSE",round(normal_rmse,4),"ERR_Dev",round(error_dev,2), "R2",round(r2,2))) # print_file("{},{},{},{}".format(round(rmse,2),round(normal_rmse,2),round(error_dev,2),round(r2,2))) class chip_info_parser: def init(self, result_path, soc_path , opts = opts): self.result_path = result_path self.soc_path = soc_path self.mode = opts.mode self.hpm_select = opts.hpm_select self.vmin_dict = {} self.chip_id = “” self.hpm_dict = { “DIE0_Core_max”:[], “DIE0_Core_avg”:[], “DIE0_Core_min”:[], “DIE1_Core_max”:[], “DIE1_Core_avg”:[], “DIE1_Core_min”:[], “DIE0_UnCore_min”:[], “DIE0_UnCore_avg”:[], “DIE0_UnCore_max”:[], “DIE1_UnCore_min”:[], “DIE1_UnCore_avg”:[], “DIE1_UnCore_max”:[], } self.parser_soc() self.parser_vim() self.chip_info_sum() def parser_vim(self): # with open(self.result_path, encoding='utf-8-sig') as f: # print(self.result_path) with open(self.result_path) as f: for index, row in enumerate( csv.reader(f, skipinitialspace=True)): # print(row) # input() if index == 0 : for index,i in enumerate(row): if i == "Target_Dieid": index_die_id = index if self.mode == "core": if i == "Cur Core Freq": index_die_freq = index if self.mode == "uncore": if i == "Cur Uncore Freq": index_die_freq = index if i == "VMIN": index_die_vmin = index continue if len(row) == 0 : continue # print(index_die_id) # print(index_die_freq) # print(index_die_vmin) # input() if "CORE" in row: if row[index_die_freq] not in self.vmin_dict: #判断频点在不在 self.vmin_dict[row[index_die_freq]] = {} if row[index_die_id] == "0": self.vmin_dict[row[index_die_freq]]["DIE0_Core"] = (float(row[index_die_vmin]), self.get_worst_hpm(0,"core") , self.chip_id ) if row[index_die_id] == "1": self.vmin_dict[row[index_die_freq]]["DIE1_Core"] = (float(row[index_die_vmin]), self.get_worst_hpm(1,"core"), self.chip_id) if "UNCORE" in row: if row[index_die_freq] not in self.vmin_dict: self.vmin_dict[row[index_die_freq]] = {} if row[index_die_id] == "0": self.vmin_dict[row[index_die_freq]]["DIE0_UnCore"] = (float(row[index_die_vmin]), self.get_worst_hpm(0,"uncore"), self.chip_id) if row[index_die_id] == "1": self.vmin_dict[row[index_die_freq]]["DIE1_UnCore"] = (float(row[index_die_vmin]), self.get_worst_hpm(1,"uncore"), self.chip_id) # print(self.vmin_dict) # input() def parser_soc(self): hpm_mode = "" openFile = open(self.soc_path) for line in openFile.readlines(): if "CHIP ID " in line: line = line.replace(" ", "") line = line.replace("\n", "") self.chip_id = line.split(":")[1] # 解析一下HPM if "CHIP HPM Efuse" in line: hpm_mode = "Open" if hpm_mode: line.replace(" ", "") if "CORE" in line: hpm_mode = "CORE" if hpm_mode: line.replace(" ", "") if "UNCORE" in line : hpm_mode = "UNCORE" if hpm_mode: line.replace(" ", "") if "CHIP HPM Summary" in line : hpm_mode = "" if hpm_mode == "CORE" and ("min" in line) and ("max" in line) and ("stdev" in line): if "TA" in line and "\'volt\': 900" in line: max_data = [float(data) for data in line.split("\'max\': [")[1].split("], \'min\': ")[0].split(",")] avg_data = [float(data) for data in line.split("\'avg\': [")[1].split("], \'stdev\': ")[0].split(",")] min_data = [float(data) for data in line.split("\'min\': [")[1].split("], \'avg\': ")[0].split(",")] self.hpm_dict["DIE0_Core_max"].append(max_data) self.hpm_dict["DIE0_Core_avg"].append(avg_data) self.hpm_dict["DIE0_Core_min"].append(min_data) if "TB" in line and "\'volt\': 900" in line: max_data = [float(data) for data in line.split("\'max\': [")[1].split("], \'min\': ")[0].split(",")] avg_data = [float(data) for data in line.split("\'avg\': [")[1].split("], \'stdev\': ")[0].split(",")] min_data = [float(data) for data in line.split("\'min\': [")[1].split("], \'avg\': ")[0].split(",")] self.hpm_dict["DIE1_Core_max"].append(max_data) self.hpm_dict["DIE1_Core_avg"].append(avg_data) self.hpm_dict["DIE1_Core_min"].append(min_data) if hpm_mode == "UNCORE" and ("min" in line) and ("max" in line) and ("stdev" in line): if "TA" in line and "\'volt\': 700" in line: max_data = [float(data) for data in line.split("\'max\': [")[1].split("], \'min\': ")[0].split(",")] avg_data = [float(data) for data in line.split("\'avg\': [")[1].split("], \'stdev\': ")[0].split(",")] min_data = [float(data) for data in line.split("\'min\': [")[1].split("], \'avg\': ")[0].split(",")] self.hpm_dict["DIE0_UnCore_max"].append(max_data) self.hpm_dict["DIE0_UnCore_avg"].append(avg_data) self.hpm_dict["DIE0_UnCore_min"].append(min_data) if "TB" in line and "\'volt\': 700" in line: max_data = [float(data) for data in line.split("\'max\': [")[1].split("], \'min\': ")[0].split(",")] avg_data = [float(data) for data in line.split("\'avg\': [")[1].split("], \'stdev\': ")[0].split(",")] min_data = [float(data) for data in line.split("\'min\': [")[1].split("], \'avg\': ")[0].split(",")] self.hpm_dict["DIE1_UnCore_max"].append(max_data) self.hpm_dict["DIE1_UnCore_avg"].append(avg_data) self.hpm_dict["DIE1_UnCore_min"].append(min_data) openFile.close() def get_worst_hpm(self, die, mode): hpm_select_list1 = [0,1,2,3,4,5,6,7,8,9] hpm_select_list2 = [str(i) for i in hpm_select_list1] if self.hpm_select in hpm_select_list1 or self.hpm_select in hpm_select_list2: return self.get_worst_hpm_single(die, mode, self.hpm_select) if self.hpm_select == "sumall" : return self.get_worst_hpm_all(die, mode) if self.hpm_select == "sum137" : return self.get_worst_hpm_137(die, mode) if self.hpm_select == "multivariate_137" : return self.get_worst_hpm_duoyuan137(die, mode) if self.hpm_select == "avg" : return self.get_worst_hpm_avg_all(die, mode) if self.hpm_select == "avg137" : return self.get_worst_hpm_avg_137(die, mode) def get_worst_hpm_single(self, die, mode = "core", hpm_select = 0): hpm_select = int(hpm_select) # 当前思路 获得hpm最小值的 if mode == "core": hpm_list = self.hpm_dict["DIE{}_Core_min".format(die)] elif mode == "uncore": hpm_list = self.hpm_dict["DIE{}_UnCore_min".format(die)] else: raise ValueError worst_hpm = 1E10 for hpm in hpm_list: if worst_hpm > hpm[hpm_select]: worst_hpm = hpm[hpm_select] return worst_hpm def get_worst_hpm_all(self, die, mode = "core"): # 当前思路 获得hpm最小值的 if mode == "core": hpm_list = self.hpm_dict["DIE{}_Core_min".format(die)] elif mode == "uncore": hpm_list = self.hpm_dict["DIE{}_UnCore_min".format(die)] else: raise ValueError worst_hpm = 1E10 for hpm in hpm_list: if worst_hpm > sum(hpm): worst_hpm = sum(hpm) return worst_hpm def get_worst_hpm_137(self, die, mode = "core"): # 当前思路 获得hpm最小值的 if mode == "core": hpm_list = self.hpm_dict["DIE{}_Core_min".format(die)] elif mode == "uncore": hpm_list = self.hpm_dict["DIE{}_UnCore_min".format(die)] else: raise ValueError worst_hpm = 1E10 for hpm in hpm_list: if worst_hpm > hpm[1]+hpm[3]+hpm[7]: worst_hpm = hpm[1]+hpm[3]+hpm[7] return worst_hpm def get_worst_hpm_duoyuan137(self, die, mode = "core"): # 当前思路 获得hpm最小值的 if mode == "core": hpm_list = self.hpm_dict["DIE{}_Core_min".format(die)] elif mode == "uncore": hpm_list = self.hpm_dict["DIE{}_UnCore_min".format(die)] else: raise ValueError worst_hpm1 = 1E10 for hpm in hpm_list: if worst_hpm1 > hpm[1]: worst_hpm1 = hpm[1] worst_hpm3 = 1E10 for hpm in hpm_list: if worst_hpm3 > hpm[3]: worst_hpm3 = hpm[3] worst_hpm7 = 1E10 for hpm in hpm_list: if worst_hpm7 > hpm[7]: worst_hpm7 = hpm[7] return worst_hpm1,worst_hpm3,worst_hpm7 def get_worst_hpm_avg_all(self, die, mode = "core"): # 当前思路 获得hpm平均值 if mode == "core": hpm_list = self.hpm_dict["DIE{}_Core_avg".format(die)] elif mode == "uncore": hpm_list = self.hpm_dict["DIE{}_UnCore_avg".format(die)] else: raise ValueError hpm_avg = [] for i in hpm_list[0]: hpm_avg.append(0) for hpm in hpm_list: for i in range(len(hpm)): hpm_avg[i] += hpm[i]/4 return sum(hpm_avg) def get_worst_hpm_avg_137(self, die, mode = "core"): # 当前思路 获得hpm平均值 if mode == "core": hpm_list = self.hpm_dict["DIE{}_Core_avg".format(die)] elif mode == "uncore": hpm_list = self.hpm_dict["DIE{}_UnCore_avg".format(die)] else: raise ValueError hpm_avg = [] for i in hpm_list[0]: hpm_avg.append(0) for hpm in hpm_list: for i in range(len(hpm)): hpm_avg[i] += hpm[i]/4 return hpm_avg[1]+hpm_avg[3]+hpm_avg[7] def chip_info_sum(self): pass def save_to_pkl(opts, data_dict): with open(os.path.join(opts.output_dir,“data.ckp”), ‘wb’) as f: pickle.dump(data_dict, f, pickle.HIGHEST_PROTOCOL) print_file(“===”*2,“Saving ckp at {} successfully”.format(opts.output_ckp)) def data_pre(): # 根据输入日志文件夹获取日志文件夹 chip_info_list =[] # if os.path.exists( opts.output_ckp): # with open(opts.output_ckp, ‘rb’) as f: # chip_info_list = pickle.load(f) # print_file(“===”*2,“Loading ckp at {} successfully”.format(opts.output_ckp)) # return chip_info_list single_chip_log_list = [] for single_chip_log_temp in os.listdir(opts.input_log_path): single_chip_log = os.path.join(opts.input_log_path, single_chip_log_temp) single_chip_log_list.append(single_chip_log) for single_chip_log in single_chip_log_list: # print_file("Processing ", single_chip_log) # 获取result路径 for file_path in os.listdir(single_chip_log): if "result" in file_path: result_path = os.path.join(single_chip_log, file_path) if "Soc_Manager" in file_path: soc_path = os.path.join(single_chip_log, file_path) chip_info = chip_info_parser(result_path, soc_path ,opts) chip_info_list.append(chip_info) # save_to_pkl(opts, chip_info_list) return chip_info_list def data_pross_linear(X,Y): if isinstance(X[0],tuple): X = np.array(X).reshape(-1, 3) else: X = np.array(X).reshape(-1, 1) Y = np.array(Y).reshape(-1, 1) from sklearn.linear_model import LinearRegression lin = LinearRegression() lin.fit(X, Y) Evaluate(Y, lin.predict(X), “线性”) print_file(“K:{}, B:{}”.format(lin.coef_,lin.intercept_) ) plt.scatter(X[:,0].reshape(-1, 1), Y , color = ‘blue’) plt.scatter(X[:,0].reshape(-1, 1), lin.predict(X), color = ‘red’) for i in range(len(chip_id)): if “10_544” in chip_id[i]: # plt.annotate(chip_id[i], xy = ( hpm[i], vmin[i]), xytext = ( hpm[i]+1, vmin[i]+1)) # if “10_1242” in chip_id[i]: plt.annotate(chip_id[i], xy = ( hpm[i], vmin[i]), xytext = ( hpm[i]+1, vmin[i]+1)) plt.title(‘Linear Regression: {}’.format(opts.key_mark )) plt.xlabel(‘HPM Select’) plt.ylabel(‘Vmin’) plt.savefig(os.path.join(opts.output_dir,“{}_fitting.png”.format(opts.key_mark))) plt.cla() plt.close(“all”) def data_pross_poly(X, Y): from numpy import polyfit, poly1d X = np.array(X) Y = np.array(Y) coeff = polyfit(X, Y, 2) Evaluate(Y, coeff[0] * X * X + coeff[1] * X + coeff[2], “非线性”) print_file(“coeff:{}”.format(coeff) ) plt.scatter(X, Y, color = ‘blue’) plt.scatter(X, coeff[0] * X * X + coeff[1] * X + coeff[2], color = ‘red’) plt.title(‘Poly Regression: {}’.format(opts.key_mark )) plt.xlabel(‘HPM Select’) plt.ylabel(‘Vmin’) plt.savefig(os.path.join(opts.output_dir,“{}_fitting.png”.format(opts.key_mark))) plt.cla() plt.close(“all”) def plot_scatter(X, Y): if isinstance(X[0],tuple): X = np.array(X).reshape(-1, 3) X = X[:,0] else: X = np.array(X).reshape(-1, 1) plt.scatter(X, Y, color = ‘blue’) # for i in range(len(chip_id)): # if “10_1613” in chip_id[i]: # plt.annotate(chip_id[i], xy = ( hpm[i], vmin[i]), xytext = ( hpm[i]+1, vmin[i]+1)) plt.title(‘Scatter: {}’.format(opts.key_mark )) plt.xlabel(‘HPM Select’) plt.ylabel(‘Vmin’) plt.savefig(os.path.join(opts.output_dir,“{}_scatter.png”.format(opts.key_mark))) plt.cla() plt.close(“all”) if name == “main”: opts_init() for hpm_select in opts.hpm_select_list: print("**********************hpm选择方式",hpm_select) for freq in opts.freq_list: opts.hpm_select = hpm_select opts.freq = freq chip_info_list = data_pre() chip_id,hpm,vmin = [],[],[] for chip_info in chip_info_list: # print(chip_info.vmin_dict[opts.freq]) try: if opts.mode == "core": if "DIE0_Core" in chip_info.vmin_dict[opts.freq]: vmin.append(chip_info.vmin_dict[opts.freq]["DIE0_Core"][0]) hpm.append(chip_info.vmin_dict[opts.freq]["DIE0_Core"][1]) chip_id.append(chip_info.vmin_dict[opts.freq]["DIE0_Core"][2]) if "DIE1_Core" in chip_info.vmin_dict[opts.freq]: vmin.append(chip_info.vmin_dict[opts.freq]["DIE1_Core"][0]) hpm.append(chip_info.vmin_dict[opts.freq]["DIE1_Core"][1]) chip_id.append(chip_info.vmin_dict[opts.freq]["DIE1_Core"][2]) elif opts.mode == "uncore": if "DIE0_UnCore" in chip_info.vmin_dict[opts.freq] : vmin.append(chip_info.vmin_dict[opts.freq]["DIE0_UnCore"][0]) hpm.append(chip_info.vmin_dict[opts.freq]["DIE0_UnCore"][1]) chip_id.append(chip_info.vmin_dict[opts.freq]["DIE0_UnCore"][2]) if "DIE1_UnCore" in chip_info.vmin_dict[opts.freq]: vmin.append(chip_info.vmin_dict[opts.freq]["DIE1_UnCore"][0]) hpm.append(chip_info.vmin_dict[opts.freq]["DIE1_UnCore"][1]) chip_id.append(chip_info.vmin_dict[opts.freq]["DIE1_UnCore"][2]) except: pass opts.key_mark = "{}_{}_{}_{}_{}".format(opts.mode, opts.freq, opts.linear_mode, opts.fit_mode, opts.hpm_select) # print(hpm) # print(vmin) if opts.linear_mode == "linear": # 线性拟合 data_pross_linear(hpm,vmin) elif opts.linear_mode == "non-linear": # 非线性拟合 data_pross_poly(hpm,vmin) plot_scatter(hpm,vmin) print_file("频点:{}, HPM选择方式:{}_{}, Core/Uncore:{:<10}, 样本量共计 {}".format(opts.freq, opts.fit_mode, opts.hpm_select, opts.mode,len(hpm))) print_file("====="*10) import os import argparse import pickle import csv import matplotlib matplotlib.use(‘Agg’) import matplotlib.pyplot as plt plt.rcParams[‘figure.figsize’]=(19.2, 10.8) import numpy as np import math 默认配置 parser = argparse.ArgumentParser(description= “DATA ANALYSE”) parser.add_argument(‘-mode’, default = “uncore”, help = “uncore core”) parser.add_argument(‘-mode’, default = “core”, help = “uncore core”) parser.add_argument(‘-fit_mode’, default = “singlevariate”, help = “singlevariate/multivariate 多个变量”) parser.add_argument(‘-linear_mode’, default = “linear”, help = “linear or non-linear 线性或者非线性”) parser.add_argument(‘-input_path’, default = “vmin_20240812”, help = “input_path”) opts = parser.parse_args() def opts_init(): opts.hpm_select = “” opts.input_log_path = “…/{}/{}/”.format(opts.input_path,opts.mode) opts.output_dir = “./{}{}{}”.format(opts.mode,opts.linear_mode,opts.fit_mode) opts.output_log_path = os.path.join(opts.output_dir,“log”) if not os.path.exists(opts.output_dir): os.makedirs(opts.output_dir) opts.output_ckp= os.path.join(opts.output_dir,“data.ckp”) if opts.mode == “core”: opts.freq_list = [“1200”,“1550”,“1800”,“2000”,“2200”] if opts.mode == “uncore”: opts.freq_list = [“1200”,“1550”,“2000”] if opts.fit_mode == "singlevariate": # opts.hpm_select_list = ["sum137"] # hpm_select_list = [0,1,2,3,4,5,6,7,8,"sumall","sum137"] opts.hpm_select_list = [0,1,2,3,4,5,6,7,8,"sumall","sum137"] # opts.hpm_select_list = ["avg"] # hpm_select_list = [0,1,2,3,4,5,6,7,8,"sumall","sum137"] # opts.hpm_select_list = ["avg137"] # hpm_select_list = [0,1,2,3,4,5,6,7,8,"sumall","sum137"] elif opts.fit_mode == "multivariate": opts.hpm_select_list = ["multivariate_137"] def print_file(*args, mode = “info”, config = opts): filename = opts.output_log_path def same_print(*args, filename = filename, end = “\n” ): # def same_print(*args, filename = filename, end = os.linesep ): with open(filename,“a+”,encoding=“utf-8”) as f: for index , single_args in enumerate(args): if index < len(args)-1: print(single_args, end = " ") print(single_args, end = " ", file=f) else: print(single_args, end = end) print(single_args, end = end, file=f) if mode == "info": same_print(*args, filename = filename) elif mode == "info_noend": same_print(*args,end=" ", filename = filename) elif mode == "sep_start": # sep_start same_print("===="*5, filename = filename, end = " ") same_print(*args, filename = filename, end = " ") same_print("===="*5, filename = filename) elif mode == "sep_end": # Delimiter Separated end same_print("===="*5, filename = filename, end = " ") count = 0 for single_args in args: count += len(str(single_args)) same_print("="*count, filename = filename, end = " ") same_print("===="*5, filename = filename) else: raise ValueError("Mode Wrong") def plot_error(data, info=“”): # 设置matplotlib正常显示中文和负号 # matplotlib.rcParams[‘font.sans-serif’]=[‘SimHei’] # 用黑体显示中文 # matplotlib.rcParams[‘axes.unicode_minus’]=False # 正常显示负号 # 随机生成(10000,)服从正态分布的数据 # data = np.random.randn(10000) “”" 绘制直方图 data:必选参数,绘图数据 bins:直方图的长条形数目,可选项,默认为10 normed:是否将得到的直方图向量归一化,可选项,默认为0,代表不归一化,显示频数。normed=1,表示归一化,显示频率。 facecolor:长条形的颜色 edgecolor:长条形边框的颜色 alpha:透明度 “”" plt.hist(data, bins=40, density=0, facecolor=“blue”, edgecolor=“black”, alpha=0.7) # 显示横轴标签 plt.xlabel(“Error Distribution”) # # 显示纵轴标签 plt.ylabel(“Count”) # # 显示图标题 # plt.title(“频数/频率分布直方图”) plt.title(‘Error Distribution: {}’.format(opts.key_mark )) plt.savefig(os.path.join(opts.output_dir,“{}_error_dist.png”.format(opts.key_mark))) plt.cla() plt.close(“all”) def get_average(records): “”" 平均值 “”" return sum(records) / len(records) def get_variance(records): “”" 方差 反映一个数据集的离散程度 “”" average = get_average(records) return sum([(x - average) ** 2 for x in records]) / len(records) def Evaluate(Y, Y_fit, info ): sse = np.sum((Y_fit - Y) ** 2) mse = sse / Y.shape[0] rmse = np.sqrt(mse) normal_rmse = rmse / (np.max(Y) - np.min(Y)) var = np.sum((Y - np.mean(Y)) ** 2) r2 = 1 - sse / var # adjr2 = 1 - (1 - r2) * (n - 1)/(n - p - 1) error = Y_fit - Y plot_error(error) error = abs(Y_fit - Y) error_var = get_variance(error) error_dev = math.sqrt(error_var) print_file("{}:{:<15} {}:{:<15} {}:{:<15} {}:{:<15}".format("RMSE",round(rmse,4),"Normal RMSE",round(normal_rmse,4),"ERR_Dev",round(error_dev,2), "R2",round(r2,2))) # print_file("{},{},{},{}".format(round(rmse,2),round(normal_rmse,2),round(error_dev,2),round(r2,2))) class chip_info_parser: def init(self, result_path, soc_path , opts = opts): self.result_path = result_path self.soc_path = soc_path self.mode = opts.mode self.hpm_select = opts.hpm_select self.vmin_dict = {} self.chip_id = “” self.hpm_dict = { “DIE0_Core_max”:[], “DIE0_Core_avg”:[], “DIE0_Core_min”:[], “DIE1_Core_max”:[], “DIE1_Core_avg”:[], “DIE1_Core_min”:[], “DIE0_UnCore_min”:[], “DIE0_UnCore_avg”:[], “DIE0_UnCore_max”:[], “DIE1_UnCore_min”:[], “DIE1_UnCore_avg”:[], “DIE1_UnCore_max”:[], } self.parser_soc() self.parser_vim() self.chip_info_sum() def parser_vim(self): # with open(self.result_path, encoding='utf-8-sig') as f: # print(self.result_path) with open(self.result_path) as f: for index, row in enumerate( csv.reader(f, skipinitialspace=True)): # print(row) # input() if index == 0 : for index,i in enumerate(row): if i == "Target_Dieid": index_die_id = index if self.mode == "core": if i == "Cur Core Freq": index_die_freq = index if self.mode == "uncore": if i == "Cur Uncore Freq": index_die_freq = index if i == "VMIN": index_die_vmin = index continue if len(row) == 0 : continue # print(index_die_id) # print(index_die_freq) # print(index_die_vmin) # input() if "CORE" in row: if row[index_die_freq] not in self.vmin_dict: #判断频点在不在 self.vmin_dict[row[index_die_freq]] = {} if row[index_die_id] == "0": self.vmin_dict[row[index_die_freq]]["DIE0_Core"] = (float(row[index_die_vmin]), self.get_worst_hpm(0,"core") , self.chip_id ) if row[index_die_id] == "1": self.vmin_dict[row[index_die_freq]]["DIE1_Core"] = (float(row[index_die_vmin]), self.get_worst_hpm(1,"core"), self.chip_id) if "UNCORE" in row: if row[index_die_freq] not in self.vmin_dict: self.vmin_dict[row[index_die_freq]] = {} if row[index_die_id] == "0": self.vmin_dict[row[index_die_freq]]["DIE0_UnCore"] = (float(row[index_die_vmin]), self.get_worst_hpm(0,"uncore"), self.chip_id) if row[index_die_id] == "1": self.vmin_dict[row[index_die_freq]]["DIE1_UnCore"] = (float(row[index_die_vmin]), self.get_worst_hpm(1,"uncore"), self.chip_id) # print(self.vmin_dict) # input() def parser_soc(self): hpm_mode = "" openFile = open(self.soc_path) for line in openFile.readlines(): if "CHIP ID " in line: line = line.replace(" ", "") line = line.replace("\n", "") self.chip_id = line.split(":")[1] # 解析一下HPM if "CHIP HPM Efuse" in line: hpm_mode = "Open" if hpm_mode: line.replace(" ", "") if "CORE" in line: hpm_mode = "CORE" if hpm_mode: line.replace(" ", "") if "UNCORE" in line : hpm_mode = "UNCORE" if hpm_mode: line.replace(" ", "") if "CHIP HPM Summary" in line : hpm_mode = "" if hpm_mode == "CORE" and ("min" in line) and ("max" in line) and ("stdev" in line): if "TA" in line and "\'volt\': 900" in line: max_data = [float(data) for data in line.split("\'max\': [")[1].split("], \'min\': ")[0].split(",")] avg_data = [float(data) for data in line.split("\'avg\': [")[1].split("], \'stdev\': ")[0].split(",")] min_data = [float(data) for data in line.split("\'min\': [")[1].split("], \'avg\': ")[0].split(",")] self.hpm_dict["DIE0_Core_max"].append(max_data) self.hpm_dict["DIE0_Core_avg"].append(avg_data) self.hpm_dict["DIE0_Core_min"].append(min_data) if "TB" in line and "\'volt\': 900" in line: max_data = [float(data) for data in line.split("\'max\': [")[1].split("], \'min\': ")[0].split(",")] avg_data = [float(data) for data in line.split("\'avg\': [")[1].split("], \'stdev\': ")[0].split(",")] min_data = [float(data) for data in line.split("\'min\': [")[1].split("], \'avg\': ")[0].split(",")] self.hpm_dict["DIE1_Core_max"].append(max_data) self.hpm_dict["DIE1_Core_avg"].append(avg_data) self.hpm_dict["DIE1_Core_min"].append(min_data) if hpm_mode == "UNCORE" and ("min" in line) and ("max" in line) and ("stdev" in line): if "TA" in line and "\'volt\': 700" in line: max_data = [float(data) for data in line.split("\'max\': [")[1].split("], \'min\': ")[0].split(",")] avg_data = [float(data) for data in line.split("\'avg\': [")[1].split("], \'stdev\': ")[0].split(",")] min_data = [float(data) for data in line.split("\'min\': [")[1].split("], \'avg\': ")[0].split(",")] self.hpm_dict["DIE0_UnCore_max"].append(max_data) self.hpm_dict["DIE0_UnCore_avg"].append(avg_data) self.hpm_dict["DIE0_UnCore_min"].append(min_data) if "TB" in line and "\'volt\': 700" in line: max_data = [float(data) for data in line.split("\'max\': [")[1].split("], \'min\': ")[0].split(",")] avg_data = [float(data) for data in line.split("\'avg\': [")[1].split("], \'stdev\': ")[0].split(",")] min_data = [float(data) for data in line.split("\'min\': [")[1].split("], \'avg\': ")[0].split(",")] self.hpm_dict["DIE1_UnCore_max"].append(max_data) self.hpm_dict["DIE1_UnCore_avg"].append(avg_data) self.hpm_dict["DIE1_UnCore_min"].append(min_data) openFile.close() def get_worst_hpm(self, die, mode): hpm_select_list1 = [0,1,2,3,4,5,6,7,8,9] hpm_select_list2 = [str(i) for i in hpm_select_list1] if self.hpm_select in hpm_select_list1 or self.hpm_select in hpm_select_list2: return self.get_worst_hpm_single(die, mode, self.hpm_select) if self.hpm_select == "sumall" : return self.get_worst_hpm_all(die, mode) if self.hpm_select == "sum137" : return self.get_worst_hpm_137(die, mode) if self.hpm_select == "multivariate_137" : return self.get_worst_hpm_duoyuan137(die, mode) if self.hpm_select == "avg" : return self.get_worst_hpm_avg_all(die, mode) if self.hpm_select == "avg137" : return self.get_worst_hpm_avg_137(die, mode) def get_worst_hpm_single(self, die, mode = "core", hpm_select = 0): hpm_select = int(hpm_select) # 当前思路 获得hpm最小值的 if mode == "core": hpm_list = self.hpm_dict["DIE{}_Core_min".format(die)] elif mode == "uncore": hpm_list = self.hpm_dict["DIE{}_UnCore_min".format(die)] else: raise ValueError worst_hpm = 1E10 for hpm in hpm_list: if worst_hpm > hpm[hpm_select]: worst_hpm = hpm[hpm_select] return worst_hpm def get_worst_hpm_all(self, die, mode = "core"): # 当前思路 获得hpm最小值的 if mode == "core": hpm_list = self.hpm_dict["DIE{}_Core_min".format(die)] elif mode == "uncore": hpm_list = self.hpm_dict["DIE{}_UnCore_min".format(die)] else: raise ValueError worst_hpm = 1E10 for hpm in hpm_list: if worst_hpm > sum(hpm): worst_hpm = sum(hpm) return worst_hpm def get_worst_hpm_137(self, die, mode = "core"): # 当前思路 获得hpm最小值的 if mode == "core": hpm_list = self.hpm_dict["DIE{}_Core_min".format(die)] elif mode == "uncore": hpm_list = self.hpm_dict["DIE{}_UnCore_min".format(die)] else: raise ValueError worst_hpm = 1E10 for hpm in hpm_list: if worst_hpm > hpm[1]+hpm[3]+hpm[7]: worst_hpm = hpm[1]+hpm[3]+hpm[7] return worst_hpm def get_worst_hpm_duoyuan137(self, die, mode = "core"): # 当前思路 获得hpm最小值的 if mode == "core": hpm_list = self.hpm_dict["DIE{}_Core_min".format(die)] elif mode == "uncore": hpm_list = self.hpm_dict["DIE{}_UnCore_min".format(die)] else: raise ValueError worst_hpm1 = 1E10 for hpm in hpm_list: if worst_hpm1 > hpm[1]: worst_hpm1 = hpm[1] worst_hpm3 = 1E10 for hpm in hpm_list: if worst_hpm3 > hpm[3]: worst_hpm3 = hpm[3] worst_hpm7 = 1E10 for hpm in hpm_list: if worst_hpm7 > hpm[7]: worst_hpm7 = hpm[7] return worst_hpm1,worst_hpm3,worst_hpm7 def get_worst_hpm_avg_all(self, die, mode = "core"): # 当前思路 获得hpm平均值 if mode == "core": hpm_list = self.hpm_dict["DIE{}_Core_avg".format(die)] elif mode == "uncore": hpm_list = self.hpm_dict["DIE{}_UnCore_avg".format(die)] else: raise ValueError hpm_avg = [] for i in hpm_list[0]: hpm_avg.append(0) for hpm in hpm_list: for i in range(len(hpm)): hpm_avg[i] += hpm[i]/4 return sum(hpm_avg) def get_worst_hpm_avg_137(self, die, mode = "core"): # 当前思路 获得hpm平均值 if mode == "core": hpm_list = self.hpm_dict["DIE{}_Core_avg".format(die)] elif mode == "uncore": hpm_list = self.hpm_dict["DIE{}_UnCore_avg".format(die)] else: raise ValueError hpm_avg = [] for i in hpm_list[0]: hpm_avg.append(0) for hpm in hpm_list: for i in range(len(hpm)): hpm_avg[i] += hpm[i]/4 return hpm_avg[1]+hpm_avg[3]+hpm_avg[7] def chip_info_sum(self): pass def save_to_pkl(opts, data_dict): with open(os.path.join(opts.output_dir,“data.ckp”), ‘wb’) as f: pickle.dump(data_dict, f, pickle.HIGHEST_PROTOCOL) print_file(“===”*2,“Saving ckp at {} successfully”.format(opts.output_ckp)) def data_pre(): # 根据输入日志文件夹获取日志文件夹 chip_info_list =[] # if os.path.exists( opts.output_ckp): # with open(opts.output_ckp, ‘rb’) as f: # chip_info_list = pickle.load(f) # print_file(“===”*2,“Loading ckp at {} successfully”.format(opts.output_ckp)) # return chip_info_list single_chip_log_list = [] for single_chip_log_temp in os.listdir(opts.input_log_path): single_chip_log = os.path.join(opts.input_log_path, single_chip_log_temp) single_chip_log_list.append(single_chip_log) for single_chip_log in single_chip_log_list: # print_file("Processing ", single_chip_log) # 获取result路径 for file_path in os.listdir(single_chip_log): if "result" in file_path: result_path = os.path.join(single_chip_log, file_path) if "Soc_Manager" in file_path: soc_path = os.path.join(single_chip_log, file_path) chip_info = chip_info_parser(result_path, soc_path ,opts) chip_info_list.append(chip_info) # save_to_pkl(opts, chip_info_list) return chip_info_list def data_pross_linear(X,Y): if isinstance(X[0],tuple): X = np.array(X).reshape(-1, 3) else: X = np.array(X).reshape(-1, 1) Y = np.array(Y).reshape(-1, 1) from sklearn.linear_model import LinearRegression lin = LinearRegression() lin.fit(X, Y) Evaluate(Y, lin.predict(X), “线性”) print_file(“K:{}, B:{}”.format(lin.coef_,lin.intercept_) ) plt.scatter(X[:,0].reshape(-1, 1), Y , color = ‘blue’) plt.scatter(X[:,0].reshape(-1, 1), lin.predict(X), color = ‘red’) for i in range(len(chip_id)): if “10_544” in chip_id[i]: # plt.annotate(chip_id[i], xy = ( hpm[i], vmin[i]), xytext = ( hpm[i]+1, vmin[i]+1)) # if “10_1242” in chip_id[i]: plt.annotate(chip_id[i], xy = ( hpm[i], vmin[i]), xytext = ( hpm[i]+1, vmin[i]+1)) plt.title(‘Linear Regression: {}’.format(opts.key_mark )) plt.xlabel(‘HPM Select’) plt.ylabel(‘Vmin’) plt.savefig(os.path.join(opts.output_dir,“{}_fitting.png”.format(opts.key_mark))) plt.cla() plt.close(“all”) def data_pross_poly(X, Y): from numpy import polyfit, poly1d X = np.array(X) Y = np.array(Y) coeff = polyfit(X, Y, 2) Evaluate(Y, coeff[0] * X * X + coeff[1] * X + coeff[2], “非线性”) print_file(“coeff:{}”.format(coeff) ) plt.scatter(X, Y, color = ‘blue’) plt.scatter(X, coeff[0] * X * X + coeff[1] * X + coeff[2], color = ‘red’) plt.title(‘Poly Regression: {}’.format(opts.key_mark )) plt.xlabel(‘HPM Select’) plt.ylabel(‘Vmin’) plt.savefig(os.path.join(opts.output_dir,“{}_fitting.png”.format(opts.key_mark))) plt.cla() plt.close(“all”) def plot_scatter(X, Y): if isinstance(X[0],tuple): X = np.array(X).reshape(-1, 3) X = X[:,0] else: X = np.array(X).reshape(-1, 1) plt.scatter(X, Y, color = ‘blue’) # for i in range(len(chip_id)): # if “10_1613” in chip_id[i]: # plt.annotate(chip_id[i], xy = ( hpm[i], vmin[i]), xytext = ( hpm[i]+1, vmin[i]+1)) plt.title(‘Scatter: {}’.format(opts.key_mark )) plt.xlabel(‘HPM Select’) plt.ylabel(‘Vmin’) plt.savefig(os.path.join(opts.output_dir,“{}_scatter.png”.format(opts.key_mark))) plt.cla() plt.close(“all”) if name == “main”: opts_init() for hpm_select in opts.hpm_select_list: print("**********************hpm选择方式",hpm_select) for freq in opts.freq_list: opts.hpm_select = hpm_select opts.freq = freq chip_info_list = data_pre() chip_id,hpm,vmin = [],[],[] for chip_info in chip_info_list: # print(chip_info.vmin_dict[opts.freq]) try: if opts.mode == "core": if "DIE0_Core" in chip_info.vmin_dict[opts.freq]: vmin.append(chip_info.vmin_dict[opts.freq]["DIE0_Core"][0]) hpm.append(chip_info.vmin_dict[opts.freq]["DIE0_Core"][1]) chip_id.append(chip_info.vmin_dict[opts.freq]["DIE0_Core"][2]) if "DIE1_Core" in chip_info.vmin_dict[opts.freq]: vmin.append(chip_info.vmin_dict[opts.freq]["DIE1_Core"][0]) hpm.append(chip_info.vmin_dict[opts.freq]["DIE1_Core"][1]) chip_id.append(chip_info.vmin_dict[opts.freq]["DIE1_Core"][2]) elif opts.mode == "uncore": if "DIE0_UnCore" in chip_info.vmin_dict[opts.freq] : vmin.append(chip_info.vmin_dict[opts.freq]["DIE0_UnCore"][0]) hpm.append(chip_info.vmin_dict[opts.freq]["DIE0_UnCore"][1]) chip_id.append(chip_info.vmin_dict[opts.freq]["DIE0_UnCore"][2]) if "DIE1_UnCore" in chip_info.vmin_dict[opts.freq]: vmin.append(chip_info.vmin_dict[opts.freq]["DIE1_UnCore"][0]) hpm.append(chip_info.vmin_dict[opts.freq]["DIE1_UnCore"][1]) chip_id.append(chip_info.vmin_dict[opts.freq]["DIE1_UnCore"][2]) except: pass opts.key_mark = "{}_{}_{}_{}_{}".format(opts.mode, opts.freq, opts.linear_mode, opts.fit_mode, opts.hpm_select) # print(hpm) # print(vmin) if opts.linear_mode == "linear": # 线性拟合 data_pross_linear(hpm,vmin) elif opts.linear_mode == "non-linear": # 非线性拟合 data_pross_poly(hpm,vmin) plot_scatter(hpm,vmin) print_file("频点:{}, HPM选择方式:{}_{}, Core/Uncore:{:<10}, 样本量共计 {}".format(opts.freq, opts.fit_mode, opts.hpm_select, opts.mode,len(hpm))) print_file("====="*10)代码运行的目录结构
09-03
import os import argparse import pickle import csv import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt plt.rcParams['figure.figsize']=(19.2, 10.8) import numpy as np import math # 默认配置 parser = argparse.ArgumentParser(description= "DATA ANALYSE") # parser.add_argument('-mode', default = "uncore", help = "uncore core") parser.add_argument('-mode', default = "core", help = "uncore core") parser.add_argument('-fit_mode', default = "singlevariate", help = "singlevariate/multivariate 多个变量") parser.add_argument('-linear_mode', default = "linear", help = "linear or non-linear 线性或者非线性") parser.add_argument('-input_path', default = "vmin_20240812", help = "input_path") opts = parser.parse_args() def opts_init(): opts.hpm_select = "" opts.input_log_path = "../{}/{}/".format(opts.input_path,opts.mode) opts.output_dir = "./{}_{}_{}".format(opts.mode,opts.linear_mode,opts.fit_mode) opts.output_log_path = os.path.join(opts.output_dir,"log") if not os.path.exists(opts.output_dir): os.makedirs(opts.output_dir) opts.output_ckp= os.path.join(opts.output_dir,"data.ckp") if opts.mode == "core": opts.freq_list = ["1200","1550","1800","2000","2200"] if opts.mode == "uncore": opts.freq_list = ["1200","1550","2000"] if opts.fit_mode == "singlevariate": # opts.hpm_select_list = ["sum137"] # hpm_select_list = [0,1,2,3,4,5,6,7,8,"sumall","sum137"] opts.hpm_select_list = [0,1,2,3,4,5,6,7,8,"sumall","sum137"] # opts.hpm_select_list = ["avg"] # hpm_select_list = [0,1,2,3,4,5,6,7,8,"sumall","sum137"] # opts.hpm_select_list = ["avg137"] # hpm_select_list = [0,1,2,3,4,5,6,7,8,"sumall","sum137"] elif opts.fit_mode == "multivariate": opts.hpm_select_list = ["multivariate_137"] def print_file(*args, mode = "info", config = opts): filename = opts.output_log_path def same_print(*args, filename = filename, end = "\n" ): # def same_print(*args, filename = filename, end = os.linesep ): with open(filename,"a+",encoding="utf-8") as f: for index , single_args in enumerate(args): if index < len(args)-1: print(single_args, end = " ") print(single_args, end = " ", file=f) else: print(single_args, end = end) print(single_args, end = end, file=f) if mode == "info": same_print(*args, filename = filename) elif mode == "info_noend": same_print(*args,end=" ", filename = filename) elif mode == "sep_start": # sep_start same_print("===="*5, filename = filename, end = " ") same_print(*args, filename = filename, end = " ") same_print("===="*5, filename = filename) elif mode == "sep_end": # Delimiter Separated end same_print("===="*5, filename = filename, end = " ") count = 0 for single_args in args: count += len(str(single_args)) same_print("="*count, filename = filename, end = " ") same_print("===="*5, filename = filename) else: raise ValueError("Mode Wrong") def plot_error(data, info=""): # 设置matplotlib正常显示中文和负号 # matplotlib.rcParams['font.sans-serif']=['SimHei'] # 用黑体显示中文 # matplotlib.rcParams['axes.unicode_minus']=False # 正常显示负号 # 随机生成(10000,)服从正态分布的数据 # data = np.random.randn(10000) """ 绘制直方图 data:必选参数,绘图数据 bins:直方图的长条形数目,可选项,默认为10 normed:是否将得到的直方图向量归一化,可选项,默认为0,代表不归一化,显示频数。normed=1,表示归一化,显示频率。 facecolor:长条形的颜色 edgecolor:长条形边框的颜色 alpha:透明度 """ plt.hist(data, bins=40, density=0, facecolor="blue", edgecolor="black", alpha=0.7) # 显示横轴标签 plt.xlabel("Error Distribution") # # 显示纵轴标签 plt.ylabel("Count") # # 显示图标题 # plt.title("频数/频率分布直方图") plt.title('Error Distribution: {}'.format(opts.key_mark )) plt.savefig(os.path.join(opts.output_dir,"{}_error_dist.png".format(opts.key_mark))) plt.cla() plt.close("all") def get_average(records): """ 平均值 """ return sum(records) / len(records) def get_variance(records): """ 方差 反映一个数据集的离散程度 """ average = get_average(records) return sum([(x - average) ** 2 for x in records]) / len(records) def Evaluate(Y, Y_fit, info ): sse = np.sum((Y_fit - Y) ** 2) mse = sse / Y.shape[0] rmse = np.sqrt(mse) normal_rmse = rmse / (np.max(Y) - np.min(Y)) var = np.sum((Y - np.mean(Y)) ** 2) r2 = 1 - sse / var # adjr2 = 1 - (1 - r2) * (n - 1)/(n - p - 1) error = Y_fit - Y plot_error(error) error = abs(Y_fit - Y) error_var = get_variance(error) error_dev = math.sqrt(error_var) print_file("{}:{:<15} {}:{:<15} {}:{:<15} {}:{:<15}".format("RMSE",round(rmse,4),"Normal RMSE",round(normal_rmse,4),"ERR_Dev",round(error_dev,2), "R2",round(r2,2))) # print_file("{},{},{},{}".format(round(rmse,2),round(normal_rmse,2),round(error_dev,2),round(r2,2))) class chip_info_parser: def __init__(self, result_path, soc_path , opts = opts): self.result_path = result_path self.soc_path = soc_path self.mode = opts.mode self.hpm_select = opts.hpm_select self.vmin_dict = {} self.chip_id = "" self.hpm_dict = { "DIE0_Core_max":[], "DIE0_Core_avg":[], "DIE0_Core_min":[], "DIE1_Core_max":[], "DIE1_Core_avg":[], "DIE1_Core_min":[], "DIE0_UnCore_min":[], "DIE0_UnCore_avg":[], "DIE0_UnCore_max":[], "DIE1_UnCore_min":[], "DIE1_UnCore_avg":[], "DIE1_UnCore_max":[], } self.parser_soc() self.parser_vim() self.chip_info_sum() def parser_vim(self): # with open(self.result_path, encoding='utf-8-sig') as f: # print(self.result_path) with open(self.result_path) as f: for index, row in enumerate( csv.reader(f, skipinitialspace=True)): # print(row) # input() if index == 0 : for index,i in enumerate(row): if i == "Target_Dieid": index_die_id = index if self.mode == "core": if i == "Cur Core Freq": index_die_freq = index if self.mode == "uncore": if i == "Cur Uncore Freq": index_die_freq = index if i == "VMIN": index_die_vmin = index continue if len(row) == 0 : continue # print(index_die_id) # print(index_die_freq) # print(index_die_vmin) # input() if "CORE" in row: if row[index_die_freq] not in self.vmin_dict: #判断频点在不在 self.vmin_dict[row[index_die_freq]] = {} if row[index_die_id] == "0": self.vmin_dict[row[index_die_freq]]["DIE0_Core"] = (float(row[index_die_vmin]), self.get_worst_hpm(0,"core") , self.chip_id ) if row[index_die_id] == "1": self.vmin_dict[row[index_die_freq]]["DIE1_Core"] = (float(row[index_die_vmin]), self.get_worst_hpm(1,"core"), self.chip_id) if "UNCORE" in row: if row[index_die_freq] not in self.vmin_dict: self.vmin_dict[row[index_die_freq]] = {} if row[index_die_id] == "0": self.vmin_dict[row[index_die_freq]]["DIE0_UnCore"] = (float(row[index_die_vmin]), self.get_worst_hpm(0,"uncore"), self.chip_id) if row[index_die_id] == "1": self.vmin_dict[row[index_die_freq]]["DIE1_UnCore"] = (float(row[index_die_vmin]), self.get_worst_hpm(1,"uncore"), self.chip_id) # print(self.vmin_dict) # input() def parser_soc(self): hpm_mode = "" openFile = open(self.soc_path) for line in openFile.readlines(): if "CHIP ID " in line: line = line.replace(" ", "") line = line.replace("\n", "") self.chip_id = line.split(":")[1] # 解析一下HPM if "CHIP HPM Efuse" in line: hpm_mode = "Open" if hpm_mode: line.replace(" ", "") if "CORE" in line: hpm_mode = "CORE" if hpm_mode: line.replace(" ", "") if "UNCORE" in line : hpm_mode = "UNCORE" if hpm_mode: line.replace(" ", "") if "CHIP HPM Summary" in line : hpm_mode = "" if hpm_mode == "CORE" and ("min" in line) and ("max" in line) and ("stdev" in line): if "TA" in line and "\'volt\': 900" in line: max_data = [float(data) for data in line.split("\'max\': [")[1].split("], \'min\': ")[0].split(",")] avg_data = [float(data) for data in line.split("\'avg\': [")[1].split("], \'stdev\': ")[0].split(",")] min_data = [float(data) for data in line.split("\'min\': [")[1].split("], \'avg\': ")[0].split(",")] self.hpm_dict["DIE0_Core_max"].append(max_data) self.hpm_dict["DIE0_Core_avg"].append(avg_data) self.hpm_dict["DIE0_Core_min"].append(min_data) if "TB" in line and "\'volt\': 900" in line: max_data = [float(data) for data in line.split("\'max\': [")[1].split("], \'min\': ")[0].split(",")] avg_data = [float(data) for data in line.split("\'avg\': [")[1].split("], \'stdev\': ")[0].split(",")] min_data = [float(data) for data in line.split("\'min\': [")[1].split("], \'avg\': ")[0].split(",")] self.hpm_dict["DIE1_Core_max"].append(max_data) self.hpm_dict["DIE1_Core_avg"].append(avg_data) self.hpm_dict["DIE1_Core_min"].append(min_data) if hpm_mode == "UNCORE" and ("min" in line) and ("max" in line) and ("stdev" in line): if "TA" in line and "\'volt\': 700" in line: max_data = [float(data) for data in line.split("\'max\': [")[1].split("], \'min\': ")[0].split(",")] avg_data = [float(data) for data in line.split("\'avg\': [")[1].split("], \'stdev\': ")[0].split(",")] min_data = [float(data) for data in line.split("\'min\': [")[1].split("], \'avg\': ")[0].split(",")] self.hpm_dict["DIE0_UnCore_max"].append(max_data) self.hpm_dict["DIE0_UnCore_avg"].append(avg_data) self.hpm_dict["DIE0_UnCore_min"].append(min_data) if "TB" in line and "\'volt\': 700" in line: max_data = [float(data) for data in line.split("\'max\': [")[1].split("], \'min\': ")[0].split(",")] avg_data = [float(data) for data in line.split("\'avg\': [")[1].split("], \'stdev\': ")[0].split(",")] min_data = [float(data) for data in line.split("\'min\': [")[1].split("], \'avg\': ")[0].split(",")] self.hpm_dict["DIE1_UnCore_max"].append(max_data) self.hpm_dict["DIE1_UnCore_avg"].append(avg_data) self.hpm_dict["DIE1_UnCore_min"].append(min_data) openFile.close() def get_worst_hpm(self, die, mode): hpm_select_list1 = [0,1,2,3,4,5,6,7,8,9] hpm_select_list2 = [str(i) for i in hpm_select_list1] if self.hpm_select in hpm_select_list1 or self.hpm_select in hpm_select_list2: return self.get_worst_hpm_single(die, mode, self.hpm_select) if self.hpm_select == "sumall" : return self.get_worst_hpm_all(die, mode) if self.hpm_select == "sum137" : return self.get_worst_hpm_137(die, mode) if self.hpm_select == "multivariate_137" : return self.get_worst_hpm_duoyuan137(die, mode) if self.hpm_select == "avg" : return self.get_worst_hpm_avg_all(die, mode) if self.hpm_select == "avg137" : return self.get_worst_hpm_avg_137(die, mode) def get_worst_hpm_single(self, die, mode = "core", hpm_select = 0): hpm_select = int(hpm_select) # 当前思路 获得hpm最小值的 if mode == "core": hpm_list = self.hpm_dict["DIE{}_Core_min".format(die)] elif mode == "uncore": hpm_list = self.hpm_dict["DIE{}_UnCore_min".format(die)] else: raise ValueError worst_hpm = 1E10 for hpm in hpm_list: if worst_hpm > hpm[hpm_select]: worst_hpm = hpm[hpm_select] return worst_hpm def get_worst_hpm_all(self, die, mode = "core"): # 当前思路 获得hpm最小值的 if mode == "core": hpm_list = self.hpm_dict["DIE{}_Core_min".format(die)] elif mode == "uncore": hpm_list = self.hpm_dict["DIE{}_UnCore_min".format(die)] else: raise ValueError worst_hpm = 1E10 for hpm in hpm_list: if worst_hpm > sum(hpm): worst_hpm = sum(hpm) return worst_hpm def get_worst_hpm_137(self, die, mode = "core"): # 当前思路 获得hpm最小值的 if mode == "core": hpm_list = self.hpm_dict["DIE{}_Core_min".format(die)] elif mode == "uncore": hpm_list = self.hpm_dict["DIE{}_UnCore_min".format(die)] else: raise ValueError worst_hpm = 1E10 for hpm in hpm_list: if worst_hpm > hpm[1]+hpm[3]+hpm[7]: worst_hpm = hpm[1]+hpm[3]+hpm[7] return worst_hpm def get_worst_hpm_duoyuan137(self, die, mode = "core"): # 当前思路 获得hpm最小值的 if mode == "core": hpm_list = self.hpm_dict["DIE{}_Core_min".format(die)] elif mode == "uncore": hpm_list = self.hpm_dict["DIE{}_UnCore_min".format(die)] else: raise ValueError worst_hpm1 = 1E10 for hpm in hpm_list: if worst_hpm1 > hpm[1]: worst_hpm1 = hpm[1] worst_hpm3 = 1E10 for hpm in hpm_list: if worst_hpm3 > hpm[3]: worst_hpm3 = hpm[3] worst_hpm7 = 1E10 for hpm in hpm_list: if worst_hpm7 > hpm[7]: worst_hpm7 = hpm[7] return worst_hpm1,worst_hpm3,worst_hpm7 def get_worst_hpm_avg_all(self, die, mode = "core"): # 当前思路 获得hpm平均值 if mode == "core": hpm_list = self.hpm_dict["DIE{}_Core_avg".format(die)] elif mode == "uncore": hpm_list = self.hpm_dict["DIE{}_UnCore_avg".format(die)] else: raise ValueError hpm_avg = [] for i in hpm_list[0]: hpm_avg.append(0) for hpm in hpm_list: for i in range(len(hpm)): hpm_avg[i] += hpm[i]/4 return sum(hpm_avg) def get_worst_hpm_avg_137(self, die, mode = "core"): # 当前思路 获得hpm平均值 if mode == "core": hpm_list = self.hpm_dict["DIE{}_Core_avg".format(die)] elif mode == "uncore": hpm_list = self.hpm_dict["DIE{}_UnCore_avg".format(die)] else: raise ValueError hpm_avg = [] for i in hpm_list[0]: hpm_avg.append(0) for hpm in hpm_list: for i in range(len(hpm)): hpm_avg[i] += hpm[i]/4 return hpm_avg[1]+hpm_avg[3]+hpm_avg[7] def chip_info_sum(self): pass def save_to_pkl(opts, data_dict): with open(os.path.join(opts.output_dir,"data.ckp"), 'wb') as f: pickle.dump(data_dict, f, pickle.HIGHEST_PROTOCOL) print_file("==="*2,"Saving ckp at {} successfully".format(opts.output_ckp)) def data_pre(): # 根据输入日志文件夹获取日志文件夹 chip_info_list =[] # if os.path.exists( opts.output_ckp): # with open(opts.output_ckp, 'rb') as f: # chip_info_list = pickle.load(f) # print_file("==="*2,"Loading ckp at {} successfully".format(opts.output_ckp)) # return chip_info_list single_chip_log_list = [] for single_chip_log_temp in os.listdir(opts.input_log_path): single_chip_log = os.path.join(opts.input_log_path, single_chip_log_temp) single_chip_log_list.append(single_chip_log) for single_chip_log in single_chip_log_list: # print_file("Processing ", single_chip_log) # 获取result路径 for file_path in os.listdir(single_chip_log): if "result" in file_path: result_path = os.path.join(single_chip_log, file_path) if "Soc_Manager" in file_path: soc_path = os.path.join(single_chip_log, file_path) chip_info = chip_info_parser(result_path, soc_path ,opts) chip_info_list.append(chip_info) # save_to_pkl(opts, chip_info_list) return chip_info_list def data_pross_linear(X,Y): if isinstance(X[0],tuple): X = np.array(X).reshape(-1, 3) else: X = np.array(X).reshape(-1, 1) Y = np.array(Y).reshape(-1, 1) from sklearn.linear_model import LinearRegression lin = LinearRegression() lin.fit(X, Y) Evaluate(Y, lin.predict(X), "线性") print_file("K:{}, B:{}".format(lin.coef_,lin.intercept_) ) plt.scatter(X[:,0].reshape(-1, 1), Y , color = 'blue') plt.scatter(X[:,0].reshape(-1, 1), lin.predict(X), color = 'red') for i in range(len(chip_id)): if "10_544" in chip_id[i]: # plt.annotate(chip_id[i], xy = ( hpm[i], vmin[i]), xytext = ( hpm[i]+1, vmin[i]+1)) # if "10_1242" in chip_id[i]: plt.annotate(chip_id[i], xy = ( hpm[i], vmin[i]), xytext = ( hpm[i]+1, vmin[i]+1)) plt.title('Linear Regression: {}'.format(opts.key_mark )) plt.xlabel('HPM Select') plt.ylabel('Vmin') plt.savefig(os.path.join(opts.output_dir,"{}_fitting.png".format(opts.key_mark))) plt.cla() plt.close("all") def data_pross_poly(X, Y): from numpy import polyfit, poly1d X = np.array(X) Y = np.array(Y) coeff = polyfit(X, Y, 2) Evaluate(Y, coeff[0] * X * X + coeff[1] * X + coeff[2], "非线性") print_file("coeff:{}".format(coeff) ) plt.scatter(X, Y, color = 'blue') plt.scatter(X, coeff[0] * X * X + coeff[1] * X + coeff[2], color = 'red') plt.title('Poly Regression: {}'.format(opts.key_mark )) plt.xlabel('HPM Select') plt.ylabel('Vmin') plt.savefig(os.path.join(opts.output_dir,"{}_fitting.png".format(opts.key_mark))) plt.cla() plt.close("all") def plot_scatter(X, Y): if isinstance(X[0],tuple): X = np.array(X).reshape(-1, 3) X = X[:,0] else: X = np.array(X).reshape(-1, 1) plt.scatter(X, Y, color = 'blue') # for i in range(len(chip_id)): # if "10_1613" in chip_id[i]: # plt.annotate(chip_id[i], xy = ( hpm[i], vmin[i]), xytext = ( hpm[i]+1, vmin[i]+1)) plt.title('Scatter: {}'.format(opts.key_mark )) plt.xlabel('HPM Select') plt.ylabel('Vmin') plt.savefig(os.path.join(opts.output_dir,"{}_scatter.png".format(opts.key_mark))) plt.cla() plt.close("all") if __name__ == "__main__": opts_init() for hpm_select in opts.hpm_select_list: print("**********************hpm选择方式",hpm_select) for freq in opts.freq_list: opts.hpm_select = hpm_select opts.freq = freq chip_info_list = data_pre() chip_id,hpm,vmin = [],[],[] for chip_info in chip_info_list: # print(chip_info.vmin_dict[opts.freq]) try: if opts.mode == "core": if "DIE0_Core" in chip_info.vmin_dict[opts.freq]: vmin.append(chip_info.vmin_dict[opts.freq]["DIE0_Core"][0]) hpm.append(chip_info.vmin_dict[opts.freq]["DIE0_Core"][1]) chip_id.append(chip_info.vmin_dict[opts.freq]["DIE0_Core"][2]) if "DIE1_Core" in chip_info.vmin_dict[opts.freq]: vmin.append(chip_info.vmin_dict[opts.freq]["DIE1_Core"][0]) hpm.append(chip_info.vmin_dict[opts.freq]["DIE1_Core"][1]) chip_id.append(chip_info.vmin_dict[opts.freq]["DIE1_Core"][2]) elif opts.mode == "uncore": if "DIE0_UnCore" in chip_info.vmin_dict[opts.freq] : vmin.append(chip_info.vmin_dict[opts.freq]["DIE0_UnCore"][0]) hpm.append(chip_info.vmin_dict[opts.freq]["DIE0_UnCore"][1]) chip_id.append(chip_info.vmin_dict[opts.freq]["DIE0_UnCore"][2]) if "DIE1_UnCore" in chip_info.vmin_dict[opts.freq]: vmin.append(chip_info.vmin_dict[opts.freq]["DIE1_UnCore"][0]) hpm.append(chip_info.vmin_dict[opts.freq]["DIE1_UnCore"][1]) chip_id.append(chip_info.vmin_dict[opts.freq]["DIE1_UnCore"][2]) except: pass opts.key_mark = "{}_{}_{}_{}_{}".format(opts.mode, opts.freq, opts.linear_mode, opts.fit_mode, opts.hpm_select) # print(hpm) # print(vmin) if opts.linear_mode == "linear": # 线性拟合 data_pross_linear(hpm,vmin) elif opts.linear_mode == "non-linear": # 非线性拟合 data_pross_poly(hpm,vmin) plot_scatter(hpm,vmin) print_file("频点:{}, HPM选择方式:{}_{}, Core/Uncore:{:<10}, 样本量共计 {}".format(opts.freq, opts.fit_mode, opts.hpm_select, opts.mode,len(hpm))) print_file("====="*10) import os import argparse import pickle import csv import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt plt.rcParams['figure.figsize']=(19.2, 10.8) import numpy as np import math # 默认配置 parser = argparse.ArgumentParser(description= "DATA ANALYSE") # parser.add_argument('-mode', default = "uncore", help = "uncore core") parser.add_argument('-mode', default = "core", help = "uncore core") parser.add_argument('-fit_mode', default = "singlevariate", help = "singlevariate/multivariate 多个变量") parser.add_argument('-linear_mode', default = "linear", help = "linear or non-linear 线性或者非线性") parser.add_argument('-input_path', default = "vmin_20240812", help = "input_path") opts = parser.parse_args() def opts_init(): opts.hpm_select = "" opts.input_log_path = "../{}/{}/".format(opts.input_path,opts.mode) opts.output_dir = "./{}_{}_{}".format(opts.mode,opts.linear_mode,opts.fit_mode) opts.output_log_path = os.path.join(opts.output_dir,"log") if not os.path.exists(opts.output_dir): os.makedirs(opts.output_dir) opts.output_ckp= os.path.join(opts.output_dir,"data.ckp") if opts.mode == "core": opts.freq_list = ["1200","1550","1800","2000","2200"] if opts.mode == "uncore": opts.freq_list = ["1200","1550","2000"] if opts.fit_mode == "singlevariate": # opts.hpm_select_list = ["sum137"] # hpm_select_list = [0,1,2,3,4,5,6,7,8,"sumall","sum137"] opts.hpm_select_list = [0,1,2,3,4,5,6,7,8,"sumall","sum137"] # opts.hpm_select_list = ["avg"] # hpm_select_list = [0,1,2,3,4,5,6,7,8,"sumall","sum137"] # opts.hpm_select_list = ["avg137"] # hpm_select_list = [0,1,2,3,4,5,6,7,8,"sumall","sum137"] elif opts.fit_mode == "multivariate": opts.hpm_select_list = ["multivariate_137"] def print_file(*args, mode = "info", config = opts): filename = opts.output_log_path def same_print(*args, filename = filename, end = "\n" ): # def same_print(*args, filename = filename, end = os.linesep ): with open(filename,"a+",encoding="utf-8") as f: for index , single_args in enumerate(args): if index < len(args)-1: print(single_args, end = " ") print(single_args, end = " ", file=f) else: print(single_args, end = end) print(single_args, end = end, file=f) if mode == "info": same_print(*args, filename = filename) elif mode == "info_noend": same_print(*args,end=" ", filename = filename) elif mode == "sep_start": # sep_start same_print("===="*5, filename = filename, end = " ") same_print(*args, filename = filename, end = " ") same_print("===="*5, filename = filename) elif mode == "sep_end": # Delimiter Separated end same_print("===="*5, filename = filename, end = " ") count = 0 for single_args in args: count += len(str(single_args)) same_print("="*count, filename = filename, end = " ") same_print("===="*5, filename = filename) else: raise ValueError("Mode Wrong") def plot_error(data, info=""): # 设置matplotlib正常显示中文和负号 # matplotlib.rcParams['font.sans-serif']=['SimHei'] # 用黑体显示中文 # matplotlib.rcParams['axes.unicode_minus']=False # 正常显示负号 # 随机生成(10000,)服从正态分布的数据 # data = np.random.randn(10000) """ 绘制直方图 data:必选参数,绘图数据 bins:直方图的长条形数目,可选项,默认为10 normed:是否将得到的直方图向量归一化,可选项,默认为0,代表不归一化,显示频数。normed=1,表示归一化,显示频率。 facecolor:长条形的颜色 edgecolor:长条形边框的颜色 alpha:透明度 """ plt.hist(data, bins=40, density=0, facecolor="blue", edgecolor="black", alpha=0.7) # 显示横轴标签 plt.xlabel("Error Distribution") # # 显示纵轴标签 plt.ylabel("Count") # # 显示图标题 # plt.title("频数/频率分布直方图") plt.title('Error Distribution: {}'.format(opts.key_mark )) plt.savefig(os.path.join(opts.output_dir,"{}_error_dist.png".format(opts.key_mark))) plt.cla() plt.close("all") def get_average(records): """ 平均值 """ return sum(records) / len(records) def get_variance(records): """ 方差 反映一个数据集的离散程度 """ average = get_average(records) return sum([(x - average) ** 2 for x in records]) / len(records) def Evaluate(Y, Y_fit, info ): sse = np.sum((Y_fit - Y) ** 2) mse = sse / Y.shape[0] rmse = np.sqrt(mse) normal_rmse = rmse / (np.max(Y) - np.min(Y)) var = np.sum((Y - np.mean(Y)) ** 2) r2 = 1 - sse / var # adjr2 = 1 - (1 - r2) * (n - 1)/(n - p - 1) error = Y_fit - Y plot_error(error) error = abs(Y_fit - Y) error_var = get_variance(error) error_dev = math.sqrt(error_var) print_file("{}:{:<15} {}:{:<15} {}:{:<15} {}:{:<15}".format("RMSE",round(rmse,4),"Normal RMSE",round(normal_rmse,4),"ERR_Dev",round(error_dev,2), "R2",round(r2,2))) # print_file("{},{},{},{}".format(round(rmse,2),round(normal_rmse,2),round(error_dev,2),round(r2,2))) class chip_info_parser: def __init__(self, result_path, soc_path , opts = opts): self.result_path = result_path self.soc_path = soc_path self.mode = opts.mode self.hpm_select = opts.hpm_select self.vmin_dict = {} self.chip_id = "" self.hpm_dict = { "DIE0_Core_max":[], "DIE0_Core_avg":[], "DIE0_Core_min":[], "DIE1_Core_max":[], "DIE1_Core_avg":[], "DIE1_Core_min":[], "DIE0_UnCore_min":[], "DIE0_UnCore_avg":[], "DIE0_UnCore_max":[], "DIE1_UnCore_min":[], "DIE1_UnCore_avg":[], "DIE1_UnCore_max":[], } self.parser_soc() self.parser_vim() self.chip_info_sum() def parser_vim(self): # with open(self.result_path, encoding='utf-8-sig') as f: # print(self.result_path) with open(self.result_path) as f: for index, row in enumerate( csv.reader(f, skipinitialspace=True)): # print(row) # input() if index == 0 : for index,i in enumerate(row): if i == "Target_Dieid": index_die_id = index if self.mode == "core": if i == "Cur Core Freq": index_die_freq = index if self.mode == "uncore": if i == "Cur Uncore Freq": index_die_freq = index if i == "VMIN": index_die_vmin = index continue if len(row) == 0 : continue # print(index_die_id) # print(index_die_freq) # print(index_die_vmin) # input() if "CORE" in row: if row[index_die_freq] not in self.vmin_dict: #判断频点在不在 self.vmin_dict[row[index_die_freq]] = {} if row[index_die_id] == "0": self.vmin_dict[row[index_die_freq]]["DIE0_Core"] = (float(row[index_die_vmin]), self.get_worst_hpm(0,"core") , self.chip_id ) if row[index_die_id] == "1": self.vmin_dict[row[index_die_freq]]["DIE1_Core"] = (float(row[index_die_vmin]), self.get_worst_hpm(1,"core"), self.chip_id) if "UNCORE" in row: if row[index_die_freq] not in self.vmin_dict: self.vmin_dict[row[index_die_freq]] = {} if row[index_die_id] == "0": self.vmin_dict[row[index_die_freq]]["DIE0_UnCore"] = (float(row[index_die_vmin]), self.get_worst_hpm(0,"uncore"), self.chip_id) if row[index_die_id] == "1": self.vmin_dict[row[index_die_freq]]["DIE1_UnCore"] = (float(row[index_die_vmin]), self.get_worst_hpm(1,"uncore"), self.chip_id) # print(self.vmin_dict) # input() def parser_soc(self): hpm_mode = "" openFile = open(self.soc_path) for line in openFile.readlines(): if "CHIP ID " in line: line = line.replace(" ", "") line = line.replace("\n", "") self.chip_id = line.split(":")[1] # 解析一下HPM if "CHIP HPM Efuse" in line: hpm_mode = "Open" if hpm_mode: line.replace(" ", "") if "CORE" in line: hpm_mode = "CORE" if hpm_mode: line.replace(" ", "") if "UNCORE" in line : hpm_mode = "UNCORE" if hpm_mode: line.replace(" ", "") if "CHIP HPM Summary" in line : hpm_mode = "" if hpm_mode == "CORE" and ("min" in line) and ("max" in line) and ("stdev" in line): if "TA" in line and "\'volt\': 900" in line: max_data = [float(data) for data in line.split("\'max\': [")[1].split("], \'min\': ")[0].split(",")] avg_data = [float(data) for data in line.split("\'avg\': [")[1].split("], \'stdev\': ")[0].split(",")] min_data = [float(data) for data in line.split("\'min\': [")[1].split("], \'avg\': ")[0].split(",")] self.hpm_dict["DIE0_Core_max"].append(max_data) self.hpm_dict["DIE0_Core_avg"].append(avg_data) self.hpm_dict["DIE0_Core_min"].append(min_data) if "TB" in line and "\'volt\': 900" in line: max_data = [float(data) for data in line.split("\'max\': [")[1].split("], \'min\': ")[0].split(",")] avg_data = [float(data) for data in line.split("\'avg\': [")[1].split("], \'stdev\': ")[0].split(",")] min_data = [float(data) for data in line.split("\'min\': [")[1].split("], \'avg\': ")[0].split(",")] self.hpm_dict["DIE1_Core_max"].append(max_data) self.hpm_dict["DIE1_Core_avg"].append(avg_data) self.hpm_dict["DIE1_Core_min"].append(min_data) if hpm_mode == "UNCORE" and ("min" in line) and ("max" in line) and ("stdev" in line): if "TA" in line and "\'volt\': 700" in line: max_data = [float(data) for data in line.split("\'max\': [")[1].split("], \'min\': ")[0].split(",")] avg_data = [float(data) for data in line.split("\'avg\': [")[1].split("], \'stdev\': ")[0].split(",")] min_data = [float(data) for data in line.split("\'min\': [")[1].split("], \'avg\': ")[0].split(",")] self.hpm_dict["DIE0_UnCore_max"].append(max_data) self.hpm_dict["DIE0_UnCore_avg"].append(avg_data) self.hpm_dict["DIE0_UnCore_min"].append(min_data) if "TB" in line and "\'volt\': 700" in line: max_data = [float(data) for data in line.split("\'max\': [")[1].split("], \'min\': ")[0].split(",")] avg_data = [float(data) for data in line.split("\'avg\': [")[1].split("], \'stdev\': ")[0].split(",")] min_data = [float(data) for data in line.split("\'min\': [")[1].split("], \'avg\': ")[0].split(",")] self.hpm_dict["DIE1_UnCore_max"].append(max_data) self.hpm_dict["DIE1_UnCore_avg"].append(avg_data) self.hpm_dict["DIE1_UnCore_min"].append(min_data) openFile.close() def get_worst_hpm(self, die, mode): hpm_select_list1 = [0,1,2,3,4,5,6,7,8,9] hpm_select_list2 = [str(i) for i in hpm_select_list1] if self.hpm_select in hpm_select_list1 or self.hpm_select in hpm_select_list2: return self.get_worst_hpm_single(die, mode, self.hpm_select) if self.hpm_select == "sumall" : return self.get_worst_hpm_all(die, mode) if self.hpm_select == "sum137" : return self.get_worst_hpm_137(die, mode) if self.hpm_select == "multivariate_137" : return self.get_worst_hpm_duoyuan137(die, mode) if self.hpm_select == "avg" : return self.get_worst_hpm_avg_all(die, mode) if self.hpm_select == "avg137" : return self.get_worst_hpm_avg_137(die, mode) def get_worst_hpm_single(self, die, mode = "core", hpm_select = 0): hpm_select = int(hpm_select) # 当前思路 获得hpm最小值的 if mode == "core": hpm_list = self.hpm_dict["DIE{}_Core_min".format(die)] elif mode == "uncore": hpm_list = self.hpm_dict["DIE{}_UnCore_min".format(die)] else: raise ValueError worst_hpm = 1E10 for hpm in hpm_list: if worst_hpm > hpm[hpm_select]: worst_hpm = hpm[hpm_select] return worst_hpm def get_worst_hpm_all(self, die, mode = "core"): # 当前思路 获得hpm最小值的 if mode == "core": hpm_list = self.hpm_dict["DIE{}_Core_min".format(die)] elif mode == "uncore": hpm_list = self.hpm_dict["DIE{}_UnCore_min".format(die)] else: raise ValueError worst_hpm = 1E10 for hpm in hpm_list: if worst_hpm > sum(hpm): worst_hpm = sum(hpm) return worst_hpm def get_worst_hpm_137(self, die, mode = "core"): # 当前思路 获得hpm最小值的 if mode == "core": hpm_list = self.hpm_dict["DIE{}_Core_min".format(die)] elif mode == "uncore": hpm_list = self.hpm_dict["DIE{}_UnCore_min".format(die)] else: raise ValueError worst_hpm = 1E10 for hpm in hpm_list: if worst_hpm > hpm[1]+hpm[3]+hpm[7]: worst_hpm = hpm[1]+hpm[3]+hpm[7] return worst_hpm def get_worst_hpm_duoyuan137(self, die, mode = "core"): # 当前思路 获得hpm最小值的 if mode == "core": hpm_list = self.hpm_dict["DIE{}_Core_min".format(die)] elif mode == "uncore": hpm_list = self.hpm_dict["DIE{}_UnCore_min".format(die)] else: raise ValueError worst_hpm1 = 1E10 for hpm in hpm_list: if worst_hpm1 > hpm[1]: worst_hpm1 = hpm[1] worst_hpm3 = 1E10 for hpm in hpm_list: if worst_hpm3 > hpm[3]: worst_hpm3 = hpm[3] worst_hpm7 = 1E10 for hpm in hpm_list: if worst_hpm7 > hpm[7]: worst_hpm7 = hpm[7] return worst_hpm1,worst_hpm3,worst_hpm7 def get_worst_hpm_avg_all(self, die, mode = "core"): # 当前思路 获得hpm平均值 if mode == "core": hpm_list = self.hpm_dict["DIE{}_Core_avg".format(die)] elif mode == "uncore": hpm_list = self.hpm_dict["DIE{}_UnCore_avg".format(die)] else: raise ValueError hpm_avg = [] for i in hpm_list[0]: hpm_avg.append(0) for hpm in hpm_list: for i in range(len(hpm)): hpm_avg[i] += hpm[i]/4 return sum(hpm_avg) def get_worst_hpm_avg_137(self, die, mode = "core"): # 当前思路 获得hpm平均值 if mode == "core": hpm_list = self.hpm_dict["DIE{}_Core_avg".format(die)] elif mode == "uncore": hpm_list = self.hpm_dict["DIE{}_UnCore_avg".format(die)] else: raise ValueError hpm_avg = [] for i in hpm_list[0]: hpm_avg.append(0) for hpm in hpm_list: for i in range(len(hpm)): hpm_avg[i] += hpm[i]/4 return hpm_avg[1]+hpm_avg[3]+hpm_avg[7] def chip_info_sum(self): pass def save_to_pkl(opts, data_dict): with open(os.path.join(opts.output_dir,"data.ckp"), 'wb') as f: pickle.dump(data_dict, f, pickle.HIGHEST_PROTOCOL) print_file("==="*2,"Saving ckp at {} successfully".format(opts.output_ckp)) def data_pre(): # 根据输入日志文件夹获取日志文件夹 chip_info_list =[] # if os.path.exists( opts.output_ckp): # with open(opts.output_ckp, 'rb') as f: # chip_info_list = pickle.load(f) # print_file("==="*2,"Loading ckp at {} successfully".format(opts.output_ckp)) # return chip_info_list single_chip_log_list = [] for single_chip_log_temp in os.listdir(opts.input_log_path): single_chip_log = os.path.join(opts.input_log_path, single_chip_log_temp) single_chip_log_list.append(single_chip_log) for single_chip_log in single_chip_log_list: # print_file("Processing ", single_chip_log) # 获取result路径 for file_path in os.listdir(single_chip_log): if "result" in file_path: result_path = os.path.join(single_chip_log, file_path) if "Soc_Manager" in file_path: soc_path = os.path.join(single_chip_log, file_path) chip_info = chip_info_parser(result_path, soc_path ,opts) chip_info_list.append(chip_info) # save_to_pkl(opts, chip_info_list) return chip_info_list def data_pross_linear(X,Y): if isinstance(X[0],tuple): X = np.array(X).reshape(-1, 3) else: X = np.array(X).reshape(-1, 1) Y = np.array(Y).reshape(-1, 1) from sklearn.linear_model import LinearRegression lin = LinearRegression() lin.fit(X, Y) Evaluate(Y, lin.predict(X), "线性") print_file("K:{}, B:{}".format(lin.coef_,lin.intercept_) ) plt.scatter(X[:,0].reshape(-1, 1), Y , color = 'blue') plt.scatter(X[:,0].reshape(-1, 1), lin.predict(X), color = 'red') for i in range(len(chip_id)): if "10_544" in chip_id[i]: # plt.annotate(chip_id[i], xy = ( hpm[i], vmin[i]), xytext = ( hpm[i]+1, vmin[i]+1)) # if "10_1242" in chip_id[i]: plt.annotate(chip_id[i], xy = ( hpm[i], vmin[i]), xytext = ( hpm[i]+1, vmin[i]+1)) plt.title('Linear Regression: {}'.format(opts.key_mark )) plt.xlabel('HPM Select') plt.ylabel('Vmin') plt.savefig(os.path.join(opts.output_dir,"{}_fitting.png".format(opts.key_mark))) plt.cla() plt.close("all") def data_pross_poly(X, Y): from numpy import polyfit, poly1d X = np.array(X) Y = np.array(Y) coeff = polyfit(X, Y, 2) Evaluate(Y, coeff[0] * X * X + coeff[1] * X + coeff[2], "非线性") print_file("coeff:{}".format(coeff) ) plt.scatter(X, Y, color = 'blue') plt.scatter(X, coeff[0] * X * X + coeff[1] * X + coeff[2], color = 'red') plt.title('Poly Regression: {}'.format(opts.key_mark )) plt.xlabel('HPM Select') plt.ylabel('Vmin') plt.savefig(os.path.join(opts.output_dir,"{}_fitting.png".format(opts.key_mark))) plt.cla() plt.close("all") def plot_scatter(X, Y): if isinstance(X[0],tuple): X = np.array(X).reshape(-1, 3) X = X[:,0] else: X = np.array(X).reshape(-1, 1) plt.scatter(X, Y, color = 'blue') # for i in range(len(chip_id)): # if "10_1613" in chip_id[i]: # plt.annotate(chip_id[i], xy = ( hpm[i], vmin[i]), xytext = ( hpm[i]+1, vmin[i]+1)) plt.title('Scatter: {}'.format(opts.key_mark )) plt.xlabel('HPM Select') plt.ylabel('Vmin') plt.savefig(os.path.join(opts.output_dir,"{}_scatter.png".format(opts.key_mark))) plt.cla() plt.close("all") if __name__ == "__main__": opts_init() for hpm_select in opts.hpm_select_list: print("**********************hpm选择方式",hpm_select) for freq in opts.freq_list: opts.hpm_select = hpm_select opts.freq = freq chip_info_list = data_pre() chip_id,hpm,vmin = [],[],[] for chip_info in chip_info_list: # print(chip_info.vmin_dict[opts.freq]) try: if opts.mode == "core": if "DIE0_Core" in chip_info.vmin_dict[opts.freq]: vmin.append(chip_info.vmin_dict[opts.freq]["DIE0_Core"][0]) hpm.append(chip_info.vmin_dict[opts.freq]["DIE0_Core"][1]) chip_id.append(chip_info.vmin_dict[opts.freq]["DIE0_Core"][2]) if "DIE1_Core" in chip_info.vmin_dict[opts.freq]: vmin.append(chip_info.vmin_dict[opts.freq]["DIE1_Core"][0]) hpm.append(chip_info.vmin_dict[opts.freq]["DIE1_Core"][1]) chip_id.append(chip_info.vmin_dict[opts.freq]["DIE1_Core"][2]) elif opts.mode == "uncore": if "DIE0_UnCore" in chip_info.vmin_dict[opts.freq] : vmin.append(chip_info.vmin_dict[opts.freq]["DIE0_UnCore"][0]) hpm.append(chip_info.vmin_dict[opts.freq]["DIE0_UnCore"][1]) chip_id.append(chip_info.vmin_dict[opts.freq]["DIE0_UnCore"][2]) if "DIE1_UnCore" in chip_info.vmin_dict[opts.freq]: vmin.append(chip_info.vmin_dict[opts.freq]["DIE1_UnCore"][0]) hpm.append(chip_info.vmin_dict[opts.freq]["DIE1_UnCore"][1]) chip_id.append(chip_info.vmin_dict[opts.freq]["DIE1_UnCore"][2]) except: pass opts.key_mark = "{}_{}_{}_{}_{}".format(opts.mode, opts.freq, opts.linear_mode, opts.fit_mode, opts.hpm_select) # print(hpm) # print(vmin) if opts.linear_mode == "linear": # 线性拟合 data_pross_linear(hpm,vmin) elif opts.linear_mode == "non-linear": # 非线性拟合 data_pross_poly(hpm,vmin) plot_scatter(hpm,vmin) print_file("频点:{}, HPM选择方式:{}_{}, Core/Uncore:{:<10}, 样本量共计 {}".format(opts.freq, opts.fit_mode, opts.hpm_select, opts.mode,len(hpm))) print_file("====="*10) 应该怎么使用
09-02
标题基于Python的自主学习系统后端设计与实现AI更换标题第1章引言介绍自主学习系统的研究背景、意义、现状以及本文的研究方法和创新点。1.1研究背景与意义阐述自主学习系统在教育技术领域的重要性和应用价值。1.2国内外研究现状分析国内外在自主学习系统后端技术方面的研究进展。1.3研究方法与创新点概述本文采用Python技术栈的设计方法和系统创新点。第2章相关理论与技术总结自主学习系统后端开发的相关理论和技术基础。2.1自主学习系统理论阐述自主学习系统的定义、特征和理论基础。2.2Python后端技术栈介绍DjangoFlask等Python后端框架及其适用场景。2.3数据库技术讨论关系型和非关系型数据库在系统中的应用方案。第3章系统设计与实现详细介绍自主学习系统后端的设计方案和实现过程。3.1系统架构设计提出基于微服务的系统架构设计方案。3.2核心模块设计详细说明用户管理、学习资源管理、进度跟踪等核心模块设计。3.3关键技术实现阐述个性化推荐算法、学习行为分析等关键技术的实现。第4章系统测试与评估对系统进行功能测试和性能评估。4.1测试环境与方法介绍测试环境配置和采用的测试方法。4.2功能测试结果展示各功能模块的测试结果和问题修复情况。4.3性能评估分析分析系统在高并发等场景下的性能表现。第5章结论与展望总结研究成果并提出未来改进方向。5.1研究结论概括系统设计的主要成果和技术创新。5.2未来展望指出系统局限性并提出后续优化方向。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值