python机器学习基础笔记3之加载数据(cook book)

博客主要介绍了使用Python进行机器学习时加载数据集的相关内容,涵盖了CSV文件(包括网络URL和本地文件)、EXCEL、JSON文件以及SQL数据库访问等不同格式数据集的加载方式。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

Loading datasets

# Load scikit-learn's datasets
from sklearn import datasets

# Load digits dataset(手写数字数据集)
digits = datasets.load_digits()

# Create features matrix
features = digits.data

# Create target vector
target = digits.target
# View first observation
features[0]


部分数据集:

load_boston
Contains 503 observations on Boston housing prices. It is a good dataset for
exploring regression algorithms.
load_iris
Contains 150 observations on the measurements of Iris flowers. It is a good data‐
set for exploring classification algorithms.
load_digits
Contains 1,797 observations from images of handwritten digits. It is a good data‐
set for teaching image classification.

CSV file

网络上url :

# Load library
import pandas as pd

# Create URL
url = 'https://tinyurl.com/simulated_data'

# Load dataset
dataframe = pd.read_csv(url)

# View first two rows
dataframe.head(2)

本地 file:

dataframe = pd.read_csv(r'path')

EXCEL

# Load library
import pandas as pd

# Create URL
url = 'https://tinyurl.com/simulated_excel'

# Load data
dataframe = pd.read_excel(url, sheetname=0, header=1)

# View the first two rows
dataframe.head(2)

# ps: sheetname can accept both strings containing the name of the sheet and
integers pointing to sheet positions (zero-indexed). If we need to load multiple sheets,
include them as a list. For example, sheetname=[0,1,2, "Monthly Sales"] will
return a dictionary of pandas DataFrames containing the first, second, and third
sheets and the sheet named Monthly Sales.

JSON file

# Load library
import pandas as pd

# Create URL
url = 'https://tinyurl.com/simulated_json'

# Load data
dataframe = pd.read_json(url, orient='columns')

# View the first two rows
dataframe.head(2)

注意: orient parameter, which indicates to pandas how the JSON file
is structured. However, it might take some experimenting to figure out which argu‐
ment (split, records, index, columns, and values) is the right one. Another helpful
tool pandas offers is json_normalize, which can help convert semistructured JSON
data into a pandas DataFrame.

SQL 数据库访问

# Load libraries
import pandas as pd
from sqlalchemy import create_engine

# Create a connection to the database
database_connection = create_engine('sqlite:///sample.db')

# Load data
dataframe = pd.read_sql_query('SELECT * FROM data', database_connection)

# View first two rows
dataframe.head(2)
import numpy as np import pandas as pd import matplotlib.pyplot as plt from scipy.optimize import minimize from mpl_toolkits.mplot3d import Axes3D import time # 设置中文字体 plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False # 材料参数 (附件1) materials = { 'I': {'rho': 300, 'cp': 1377, 'k': 0.082, 'thickness': 0.6e-3}, 'II': {'rho': 862, 'cp': 2100, 'k': 0.37, 'thickness': 6e-3}, 'III': {'rho': 74.2, 'cp': 1726, 'k': 0.045, 'thickness': 3.6e-3}, 'IV': {'rho': 1.18, 'cp': 1005, 'k': 0.028, 'thickness': 5e-3} } # 计算总厚度 total_thickness = sum(mat['thickness'] for mat in materials.values()) # 环境参数 u_e = 75 # 外界温度 (°C) u_s = 37 # 皮肤温度 (°C) # 初始猜测值 k_e_guess = 120.0 # 左侧对流换热系数 (W/(m²·°C)) k_s_guess = 8.36 # 右侧对流换热系数 (W/(m²·°C)) # 时间参数 total_time = 5400 # 90分钟转换为秒 dt = 5 # 使用稍大的时间步长加速计算 (s) nt = int(total_time / dt) # 时间步数 # 空间离散 (每层网格数) - 使用较粗网格加速计算 m = [4, 40, 24, 40] # 减少网格数以加速反演计算 M = sum(m) # 总节点数 M_indices = [0, m[0], sum(m[:2]), sum(m[:3]), M] # 各层分界点索引 # 计算空间步长 dx = [] for i, layer in enumerate(['I', 'II', 'III', 'IV']): dx.append(materials[layer]['thickness'] / m[i]) # 计算热扩散系数 alpha = [] for layer in ['I', 'II', 'III', 'IV']: mat = materials[layer] alpha.append(mat['k'] / (mat['rho'] * mat['cp'])) # === 核心求解函数 === def solve_heat_transfer(k_e, k_s, return_all=False): """求解热传导方程,返回皮肤表面温度时间序列""" # 计算λ_j lam = [] for i in range(4): lam.append(alpha[i] * dt / (dx[i]**2)) # 计算v_j v = [] for i, layer in enumerate(['I', 'II', 'III', 'IV']): v.append(materials[layer]['k'] / dx[i]) # 边界参数 mu_e = k_e * dx[0] / materials['I']['k'] mu_s = k_s * dx[3] / materials['IV']['k'] # 初始化温度场 (初始温度37°C) u_old = np.full(M + 1, 37.0) # 存储皮肤表面温度 skin_temps = np.zeros(nt + 1) skin_temps[0] = u_old[-1] # 初始皮肤温度 # 时间推进 for t_step in range(1, nt + 1): # 初始化三对角矩阵系数 a = np.zeros(M + 1) # 下对角线 b = np.zeros(M + 1) # 主对角线 c = np.zeros(M + 1) # 上对角线 d = np.zeros(M + 1) # 右端项 # 左边界 (i=0, 外层) b[0] = 1 + mu_e c[0] = -1 d[0] = mu_e * u_e # 右边界 (i=M, 内层) a[M] = -1 b[M] = 1 + mu_s d[M] = mu_s * u_s # 内部节点 for layer_idx in range(4): start_idx = M_indices[layer_idx] end_idx = M_indices[layer_idx + 1] for i in range(start_idx, end_idx + 1): if i == 0 or i == M: continue # 边界点已处理 # 交界点处理 if i in M_indices[1:4]: # 确定交界层 if i == M_indices[1]: # I-II交界面 v_j = v[0] v_j1 = v[1] elif i == M_indices[2]: # II-III交界面 v_j = v[1] v_j1 = v[2] elif i == M_indices[3]: # III-IV交界面 v_j = v[2] v_j1 = v[3] a[i] = -v_j b[i] = v_j + v_j1 c[i] = -v_j1 d[i] = 0 else: # 内部节点 a[i] = -lam[layer_idx] b[i] = 1 + 2 * lam[layer_idx] c[i] = -lam[layer_idx] d[i] = u_old[i] # 追赶法求解三对角方程组 # 1. 前向消元 cp = np.zeros(M + 1) # 存储c' dp = np.zeros(M + 1) # 存储d' # 首行 cp[0] = c[0] / b[0] dp[0] = d[0] / b[0] # 中间行 for i in range(1, M): denom = b[i] - a[i] * cp[i-1] cp[i] = c[i] / denom dp[i] = (d[i] - a[i] * dp[i-1]) / denom # 末行 denom = b[M] - a[M] * cp[M-1] dp[M] = (d[M] - a[M] * dp[M-1]) / denom # 2. 回代求解 u_new = np.zeros(M + 1) u_new[M] = dp[M] for i in range(M-1, -1, -1): u_new[i] = dp[i] - cp[i] * u_new[i+1] # 更新温度场并存储皮肤温度 u_old = u_new.copy() skin_temps[t_step] = u_new[-1] if return_all: return skin_temps, u_old return skin_temps # === 反演优化函数 === def objective_function(params, measured_data, time_points): """目标函数:计算模拟值与测量值的均方根误差""" k_e, k_s = params try: # 运行模型获得模拟温度 simulated_data = solve_heat_transfer(k_e, k_s) # 确保时间点对齐 sim_points = np.linspace(0, total_time, len(simulated_data)) interp_sim = np.interp(time_points, sim_points, simulated_data) # 计算均方根误差 rmse = np.sqrt(np.mean((interp_sim - measured_data)**2)) return rmse except Exception as e: print(f"计算失败: {e}") return 1e10 # 返回大值表示失败 # === 主程序 === if __name__ == "__main__": # 从Excel文件读取数据 file_path = '附件二.xlsx' # 修改为你的Excel文件路径 df = pd.read_excel(file_path, usecols=[0, 1], names=['A', 'B']) # 读取前两列 time_points = df['A'].values measured_temps = df['B'].values print(f"加载实测数据: {len(measured_temps)}个数据点") # 步骤2: 定义参数边界 bounds = [(110, 120), (8, 12)] # (k_e_min, k_e_max), (k_s_min, k_s_max) # 步骤3: 运行优化算法 print("开始反演计算...") start_time = time.time() # 使用全局优化算法寻找初始值 from scipy.optimize import differential_evolution result_global = differential_evolution( objective_function, bounds, args=(measured_temps, time_points), strategy='best1bin', popsize=15, tol=0.01, mutation=(0.5, 1), recombination=0.7, seed=42, maxiter=100 ) # 使用局部优化算法进一步优化 result = minimize( objective_function, x0=result_global.x, args=(measured_temps, time_points), method='L-BFGS-B', bounds=bounds, options={'maxiter': 50, 'ftol': 1e-4} ) # 提取优化结果 k_e_opt, k_s_opt = result.x min_rmse = result.fun elapsed_time = time.time() - start_time print(f"优化完成! 耗时: {elapsed_time:.2f}秒") print(f"优化结果: k_e = {k_e_opt:.2f} W/(m²·°C), k_s = {k_s_opt:.2f} W/(m²·°C)") print(f"最小均方根误差: {min_rmse:.4f}°C") # 步骤4: 验证优化结果 print("使用优化参数运行完整模型...") # 使用优化参数运行完整模型 full_skin_temps = solve_heat_transfer(k_e_opt, k_s_opt) full_time_points = np.linspace(0, total_time, len(full_skin_temps)) # 插值到实测时间点 interp_opt = np.interp(time_points, full_time_points, full_skin_temps) # 步骤5: 可视化结果 plt.figure(figsize=(12, 8)) # 绘制实测温度 plt.plot(time_points/60, measured_temps, 'bo', markersize=6, label='实测温度', alpha=0.7) # 绘制优化结果 plt.plot(time_points/60, interp_opt, 'r-', linewidth=2.5, label=f'优化模型 (k_e={k_e_opt:.1f}, k_s={k_s_opt:.1f})') # 绘制初始猜测结果 initial_skin_temps = solve_heat_transfer(k_e_guess, k_s_guess) interp_initial = np.interp(time_points, full_time_points, initial_skin_temps) plt.plot(time_points/60, interp_initial, 'g--', linewidth=2, label=f'初始猜测 (k_e={k_e_guess}, k_s={k_s_guess})') # 添加安全阈值线 plt.axhline(y=47, color='r', linestyle='--', alpha=0.7) plt.text(5, 47.5, '安全阈值 (47°C)', color='r', fontsize=12) # 设置图表 plt.title('皮肤表面温度实测与模拟对比', fontsize=16) plt.xlabel('时间 (分钟)', fontsize=12) plt.ylabel('温度 (°C)', fontsize=12) plt.grid(True, linestyle='--', alpha=0.7) plt.legend(loc='best', fontsize=12) plt.ylim(35, max(measured_temps.max(), interp_opt.max()) + 2) # 添加误差统计 plt.figtext(0.15, 0.75, f"优化参数误差统计:\nRMSE = {min_rmse:.4f}°C\nMAE = {np.mean(np.abs(interp_opt - measured_temps)):.4f}°C", bbox=dict(facecolor='white', alpha=0.8)) plt.tight_layout() plt.savefig('inverse_optimization_result.png', dpi=300) plt.show() # 步骤6: 生成反演报告 report = f"""=== 反演计算结果报告 === 优化算法: 差分进化 + L-BFGS-B 计算耗时: {elapsed_time:.2f}秒 优化参数: 左侧对流换热系数 k_e = {k_e_opt:.2f} W/(m²·°C) 右侧对流换热系数 k_s = {k_s_opt:.2f} W/(m²·°C) 误差统计: 均方根误差 (RMSE) = {min_rmse:.4f}°C 平均绝对误差 (MAE) = {np.mean(np.abs(interp_opt - measured_temps)):.4f}°C 最大绝对误差 = {np.max(np.abs(interp_opt - measured_temps)):.4f}°C 模型验证: 皮肤最终温度: {full_skin_temps[-1]:.2f}°C 超过安全阈值时间: {np.where(interp_opt > 47)[0][0]*dt/60:.1f}分钟 """ print(report) # 保存报告 with open('inversion_report.txt', 'w') as f: f.write(report) print("反演计算完成,结果已保存!") 基于这个代码基础上,对误差分析进行可视化
最新发布
07-15
<think>我们已经在主程序中计算了优化后的温度曲线和实测温度曲线。现在需要对误差进行可视化分析。 计划绘制以下图表: 1. 误差时间序列图:展示每个时间点上实测温度与模拟温度之间的绝对误差 2. 误差分布直方图:展示误差的分布情况 3. 误差累积分布图:展示误差的累积分布情况 4. 误差与时间的关系:分析误差随时间的变化趋势 我们将基于之前优化得到的模拟温度(interp_opt)和实测温度(measured_temps)来计算误差。 注意:由于实测数据的时间点与模拟结果的时间点已经通过插值对齐,所以可以直接计算每个时间点的误差。 步骤: 1. 计算绝对误差:abs_error = |模拟值 - 实测值| 2. 计算相对误差:relative_error = abs_error / 实测值 (可选,但温度接近37-48,相对误差可能不如绝对误差直观) 3. 绘制绝对误差随时间变化图 4. 绘制绝对误差的直方图 5. 绘制绝对误差的累积分布图 6. 分析误差的统计特征(均值、标准差、最大值等) 我们将在一个新的图形中展示这些分析。</think>我将基于提供的代码,添加误差分析可视化功能。以下是完整的代码实现,包括误差时间序列图、误差分布直方图和误差统计图: ```python import numpy as np import pandas as pd import matplotlib.pyplot as plt from scipy.optimize import minimize import time from matplotlib.gridspec import GridSpec # 设置中文字体 plt.rcParams[&#39;font.sans-serif&#39;] = [&#39;SimHei&#39;] plt.rcParams[&#39;axes.unicode_minus&#39;] = False # 材料参数 (附件1) materials = { &#39;I&#39;: {&#39;rho&#39;: 300, &#39;cp&#39;: 1377, &#39;k&#39;: 0.082, &#39;thickness&#39;: 0.6e-3}, &#39;II&#39;: {&#39;rho&#39;: 862, &#39;cp&#39;: 2100, &#39;k&#39;: 0.37, &#39;thickness&#39;: 6e-3}, &#39;III&#39;: {&#39;rho&#39;: 74.2, &#39;cp&#39;: 1726, &#39;k&#39;: 0.045, &#39;thickness&#39;: 3.6e-3}, &#39;IV&#39;: {&#39;rho&#39;: 1.18, &#39;cp&#39;: 1005, &#39;k&#39;: 0.028, &#39;thickness&#39;: 5e-3} } # 计算总厚度 total_thickness = sum(mat[&#39;thickness&#39;] for mat in materials.values()) # 环境参数 u_e = 75 # 外界温度 (°C) u_s = 37 # 皮肤温度 (°C) # 初始猜测值 k_e_guess = 120.0 # 左侧对流换热系数 (W/(m²·°C)) k_s_guess = 8.36 # 右侧对流换热系数 (W/(m²·°C)) # 时间参数 total_time = 5400 # 90分钟转换为秒 dt = 5 # 使用稍大的时间步长加速计算 (s) nt = int(total_time / dt) # 时间步数 # 空间离散 (每层网格数) - 使用较粗网格加速计算 m = [4, 40, 24, 40] # 减少网格数以加速反演计算 M = sum(m) # 总节点数 M_indices = [0, m[0], sum(m[:2]), sum(m[:3]), M] # 各层分界点索引 # 计算空间步长 dx = [] for i, layer in enumerate([&#39;I&#39;, &#39;II&#39;, &#39;III&#39;, &#39;IV&#39;]): dx.append(materials[layer][&#39;thickness&#39;] / m[i]) # 计算热扩散系数 alpha = [] for layer in [&#39;I&#39;, &#39;II&#39;, &#39;III&#39;, &#39;IV&#39;]: mat = materials[layer] alpha.append(mat[&#39;k&#39;] / (mat[&#39;rho&#39;] * mat[&#39;cp&#39;])) # === 核心求解函数 === def solve_heat_transfer(k_e, k_s, return_all=False): """求解热传导方程,返回皮肤表面温度时间序列""" # 计算λ_j lam = [] for i in range(4): lam.append(alpha[i] * dt / (dx[i]**2)) # 计算v_j v = [] for i, layer in enumerate([&#39;I&#39;, &#39;II&#39;, &#39;III&#39;, &#39;IV&#39;]): v.append(materials[layer][&#39;k&#39;] / dx[i]) # 边界参数 mu_e = k_e * dx[0] / materials[&#39;I&#39;][&#39;k&#39;] mu_s = k_s * dx[3] / materials[&#39;IV&#39;][&#39;k&#39;] # 初始化温度场 (初始温度37°C) u_old = np.full(M + 1, 37.0) # 存储皮肤表面温度 skin_temps = np.zeros(nt + 1) skin_temps[0] = u_old[-1] # 初始皮肤温度 # 时间推进 for t_step in range(1, nt + 1): # 初始化三对角矩阵系数 a = np.zeros(M + 1) # 下对角线 b = np.zeros(M + 1) # 主对角线 c = np.zeros(M + 1) # 上对角线 d = np.zeros(M + 1) # 右端项 # 左边界 (i=0, 外层) b[0] = 1 + mu_e c[0] = -1 d[0] = mu_e * u_e # 右边界 (i=M, 内层) a[M] = -1 b[M] = 1 + mu_s d[M] = mu_s * u_s # 内部节点 for layer_idx in range(4): start_idx = M_indices[layer_idx] end_idx = M_indices[layer_idx + 1] for i in range(start_idx, end_idx + 1): if i == 0 or i == M: continue # 边界点已处理 # 交界点处理 if i in M_indices[1:4]: # 确定交界层 if i == M_indices[1]: # I-II交界面 v_j = v[0] v_j1 = v[1] elif i == M_indices[2]: # II-III交界面 v_j = v[1] v_j1 = v[2] elif i == M_indices[3]: # III-IV交界面 v_j = v[2] v_j1 = v[3] a[i] = -v_j b[i] = v_j + v_j1 c[i] = -v_j1 d[i] = 0 else: # 内部节点 a[i] = -lam[layer_idx] b[i] = 1 + 2 * lam[layer_idx] c[i] = -lam[layer_idx] d[i] = u_old[i] # 追赶法求解三对角方程组 # 1. 前向消元 cp = np.zeros(M + 1) # 存储c&#39; dp = np.zeros(M + 1) # 存储d&#39; # 首行 cp[0] = c[0] / b[0] dp[0] = d[0] / b[0] # 中间行 for i in range(1, M): denom = b[i] - a[i] * cp[i-1] cp[i] = c[i] / denom dp[i] = (d[i] - a[i] * dp[i-1]) / denom # 末行 denom = b[M] - a[M] * cp[M-1] dp[M] = (d[M] - a[M] * dp[M-1]) / denom # 2. 回代求解 u_new = np.zeros(M + 1) u_new[M] = dp[M] for i in range(M-1, -1, -1): u_new[i] = dp[i] - cp[i] * u_new[i+1] # 更新温度场并存储皮肤温度 u_old = u_new.copy() skin_temps[t_step] = u_new[-1] if return_all: return skin_temps, u_old return skin_temps # === 反演优化函数 === def objective_function(params, measured_data, time_points): """目标函数:计算模拟值与测量值的均方根误差""" k_e, k_s = params try: # 运行模型获得模拟温度 simulated_data = solve_heat_transfer(k_e, k_s) # 确保时间点对齐 sim_points = np.linspace(0, total_time, len(simulated_data)) interp_sim = np.interp(time_points, sim_points, simulated_data) # 计算均方根误差 rmse = np.sqrt(np.mean((interp_sim - measured_data)**2)) return rmse except Exception as e: print(f"计算失败: {e}") return 1e10 # 返回大值表示失败 # === 误差分析可视化函数 === def visualize_errors(measured_temps, simulated_temps, time_points, k_e_opt, k_s_opt, min_rmse): """创建误差分析可视化图表""" # 计算误差 errors = simulated_temps - measured_temps abs_errors = np.abs(errors) # 创建2x2的子图布局 plt.figure(figsize=(16, 12)) gs = GridSpec(3, 2, height_ratios=[1, 1, 1]) # 图表1: 温度与误差时间序列 ax1 = plt.subplot(gs[0, :]) ax1.plot(time_points/60, measured_temps, &#39;b-&#39;, label=&#39;实测温度&#39;, linewidth=2) ax1.plot(time_points/60, simulated_temps, &#39;r--&#39;, label=&#39;模拟温度&#39;, linewidth=2) ax1.set_xlabel(&#39;时间 (分钟)&#39;, fontsize=12) ax1.set_ylabel(&#39;温度 (°C)&#39;, fontsize=12) ax1.set_title(&#39;温度时间序列对比&#39;, fontsize=14) ax1.grid(True, linestyle=&#39;--&#39;, alpha=0.7) ax1.legend(loc=&#39;best&#39;) # 添加误差曲线 ax2 = ax1.twinx() ax2.plot(time_points/60, errors, &#39;g-&#39;, label=&#39;误差&#39;, linewidth=1.5, alpha=0.7) ax2.fill_between(time_points/60, 0, errors, where=errors>0, facecolor=&#39;green&#39;, alpha=0.2, interpolate=True) ax2.fill_between(time_points/60, 0, errors, where=errors<0, facecolor=&#39;red&#39;, alpha=0.2, interpolate=True) ax2.axhline(y=0, color=&#39;k&#39;, linestyle=&#39;-&#39;, alpha=0.5) ax2.set_ylabel(&#39;误差 (°C)&#39;, fontsize=12) ax2.legend(loc=&#39;upper right&#39;) # 图表2: 误差分布直方图 ax3 = plt.subplot(gs[1, 0]) n, bins, patches = ax3.hist(errors, bins=20, color=&#39;skyblue&#39;, edgecolor=&#39;black&#39;, alpha=0.7, density=True) # 添加正态分布曲线 from scipy.stats import norm mu, std = norm.fit(errors) xmin, xmax = ax3.get_xlim() x = np.linspace(xmin, xmax, 100) p = norm.pdf(x, mu, std) ax3.plot(x, p, &#39;r-&#39;, linewidth=2) ax3.set_xlabel(&#39;误差 (°C)&#39;, fontsize=12) ax3.set_ylabel(&#39;概率密度&#39;, fontsize=12) ax3.set_title(&#39;误差分布直方图&#39;, fontsize=14) ax3.grid(True, linestyle=&#39;--&#39;, alpha=0.7) # 添加统计信息 textstr = &#39;\n&#39;.join(( f&#39;均值 = {np.mean(errors):.4f}°C&#39;, f&#39;标准差 = {np.std(errors):.4f}°C&#39;, f&#39;偏度 = {pd.Series(errors).skew():.4f}&#39;, f&#39;峰度 = {pd.Series(errors).kurtosis():.4f}&#39;)) props = dict(boxstyle=&#39;round&#39;, facecolor=&#39;wheat&#39;, alpha=0.5) ax3.text(0.95, 0.95, textstr, transform=ax3.transAxes, fontsize=10, verticalalignment=&#39;top&#39;, horizontalalignment=&#39;right&#39;, bbox=props) # 图表3: 绝对误差累积分布 ax4 = plt.subplot(gs[1, 1]) sorted_abs_errors = np.sort(abs_errors) cdf = np.arange(1, len(sorted_abs_abs_errors) + 1) / len(sorted_abs_abs_errors) ax4.plot(sorted_abs_errors, cdf, &#39;b-&#39;, linewidth=2) ax4.set_xlabel(&#39;绝对误差 (°C)&#39;, fontsize=12) ax4.set_ylabel(&#39;累积概率&#39;, fontsize=12) ax4.set_title(&#39;绝对误差累积分布&#39;, fontsize=14) ax4.grid(True, linestyle=&#39;--&#39;, alpha=0.7) # 添加关键点标记 for threshold in [0.1, 0.5, 0.9]: idx = np.argmax(cdf >= threshold) error_val = sorted_abs_errors[idx] ax4.plot([error_val, error_val], [0, threshold], &#39;r--&#39;) ax4.plot([0, error_val], [threshold, threshold], &#39;r--&#39;) ax4.text(error_val+0.02, threshold-0.05, f&#39;{error_val:.3f}°C&#39;, fontsize=10, color=&#39;red&#39;) # 图表4: 误差统计指标 ax5 = plt.subplot(gs[2, :]) metrics = { &#39;RMSE&#39;: min_rmse, &#39;MAE&#39;: np.mean(abs_errors), &#39;MaxAE&#39;: np.max(abs_errors), &#39;R²&#39;: 1 - np.var(errors) / np.var(measured_temps) } colors = [&#39;#1f77b4&#39;, &#39;#ff7f0e&#39;, &#39;#2ca02c&#39;, &#39;#d62728&#39;] ax5.bar(metrics.keys(), metrics.values(), color=colors) # 添加数值标签 for i, (k, v) in enumerate(metrics.items()): ax5.text(i, v + 0.01, f&#39;{v:.4f}&#39;, ha=&#39;center&#39;, fontsize=10) ax5.set_ylabel(&#39;误差值&#39;, fontsize=12) ax5.set_title(&#39;模型误差统计指标&#39;, fontsize=14) ax5.grid(True, axis=&#39;y&#39;, linestyle=&#39;--&#39;, alpha=0.7) # 添加模型参数信息 param_text = f"优化参数: k_e = {k_e_opt:.2f} W/(m²·°C), k_s = {k_s_opt:.2f} W/(m²·°C)" plt.figtext(0.5, 0.01, param_text, ha=&#39;center&#39;, fontsize=12, bbox=dict(facecolor=&#39;lightyellow&#39;, alpha=0.5)) plt.tight_layout(rect=[0, 0.05, 1, 0.95]) plt.savefig(&#39;error_analysis.png&#39;, dpi=300) plt.show() # === 主程序 === if __name__ == "__main__": # 从Excel文件读取数据 file_path = &#39;附件二.xlsx&#39; # 修改为你的Excel文件路径 df = pd.read_excel(file_path, usecols=[0, 1], names=[&#39;A&#39;, &#39;B&#39;]) # 读取前两列 time_points = df[&#39;A&#39;].values measured_temps = df[&#39;B&#39;].values print(f"加载实测数据: {len(measured_temps)}个数据点") # 步骤2: 定义参数边界 bounds = [(110, 120), (8, 12)] # (k_e_min, k_e_max), (k_s_min, k_s_max) # 步骤3: 运行优化算法 print("开始反演计算...") start_time = time.time() # 使用全局优化算法寻找初始值 from scipy.optimize import differential_evolution result_global = differential_evolution( objective_function, bounds, args=(measured_temps, time_points), strategy=&#39;best1bin&#39;, popsize=15, tol=0.01, mutation=(0.5, 1), recombination=0.7, seed=42, maxiter=100 ) # 使用局部优化算法进一步优化 result = minimize( objective_function, x0=result_global.x, args=(measured_temps, time_points), method=&#39;L-BFGS-B&#39;, bounds=bounds, options={&#39;maxiter&#39;: 50, &#39;ftol&#39;: 1e-4} ) # 提取优化结果 k_e_opt, k_s_opt = result.x min_rmse = result.fun elapsed_time = time.time() - start_time print(f"优化完成! 耗时: {elapsed_time:.2f}秒") print(f"优化结果: k_e = {k_e_opt:.2f} W/(m²·°C), k_s = {k_s_opt:.2f} W/(m²·°C)") print(f"最小均方根误差: {min_rmse:.4f}°C") # 步骤4: 验证优化结果 print("使用优化参数运行完整模型...") # 使用优化参数运行完整模型 full_skin_temps = solve_heat_transfer(k_e_opt, k_s_opt) full_time_points = np.linspace(0, total_time, len(full_skin_temps)) # 插值到实测时间点 interp_opt = np.interp(time_points, full_time_points, full_skin_temps) # 步骤5: 可视化结果 plt.figure(figsize=(12, 8)) # 绘制实测温度 plt.plot(time_points/60, measured_temps, &#39;bo&#39;, markersize=6, label=&#39;实测温度&#39;, alpha=0.7) # 绘制优化结果 plt.plot(time_points/60, interp_opt, &#39;r-&#39;, linewidth=2.5, label=f&#39;优化模型 (k_e={k_e_opt:.1f}, k_s={k_s_opt:.1f})&#39;) # 绘制初始猜测结果 initial_skin_temps = solve_heat_transfer(k_e_guess, k_s_guess) interp_initial = np.interp(time_points, full_time_points, initial_skin_temps) plt.plot(time_points/60, interp_initial, &#39;g--&#39;, linewidth=2, label=f&#39;初始猜测 (k_e={k_e_guess}, k_s={k_s_guess})&#39;) # 添加安全阈值线 plt.axhline(y=47, color=&#39;r&#39;, linestyle=&#39;--&#39;, alpha=0.7) plt.text(5, 47.5, &#39;安全阈值 (47°C)&#39;, color=&#39;r&#39;, fontsize=12) # 设置图表 plt.title(&#39;皮肤表面温度实测与模拟对比&#39;, fontsize=16) plt.xlabel(&#39;时间 (分钟)&#39;, fontsize=12) plt.ylabel(&#39;温度 (°C)&#39;, fontsize=12) plt.grid(True, linestyle=&#39;--&#39;, alpha=0.7) plt.legend(loc=&#39;best&#39;, fontsize=12) plt.ylim(35, max(measured_temps.max(), interp_opt.max()) + 2) # 添加误差统计 plt.figtext(0.15, 0.75, f"优化参数误差统计:\nRMSE = {min_rmse:.4f}°C\nMAE = {np.mean(np.abs(interp_opt - measured_temps)):.4f}°C", bbox=dict(facecolor=&#39;white&#39;, alpha=0.8)) plt.tight_layout() plt.savefig(&#39;inverse_optimization_result.png&#39;, dpi=300) plt.show() # 步骤6: 误差分析可视化 print("进行误差分析可视化...") visualize_errors(measured_temps, interp_opt, time_points, k_e_opt, k_s_opt, min_rmse) # 步骤7: 生成反演报告 report = f"""=== 反演计算结果报告 === 优化算法: 差分进化 + L-BFGS-B 计算耗时: {elapsed_time:.2f}秒 优化参数: 左侧对流换热系数 k_e = {k_e_opt:.2f} W/(m²·°C) 右侧对流换热系数 k_s = {k_s_opt:.2f} W/(m²·°C) 误差统计: 均方根误差 (RMSE) = {min_rmse:.4f}°C 平均绝对误差 (MAE) = {np.mean(np.abs(interp_opt - measured_temps)):.4f}°C 最大绝对误差 = {np.max(np.abs(interp_opt - measured_temps)):.4f}°C 决定系数 () = {1 - np.var(interp_opt - measured_temps)/np.var(measured_temps):.4f} 模型验证: 皮肤最终温度: {full_skin_temps[-1]:.2f}°C 超过安全阈值时间: {np.where(interp_opt > 47)[0][0]*dt/60:.1f}分钟 """ print(report) # 保存报告 with open(&#39;inversion_report.txt&#39;, &#39;w&#39;) as f: f.write(report) print("反演计算完成,结果已保存!") ``` ### 误差分析可视化功能说明 我添加了`visualize_errors`函数,用于创建全面的误差分析图表,包含以下四个子图: 1. **温度与误差时间序列图**: - 蓝色实线:实测温度 - 红色虚线:模拟温度 - 绿色区域:正误差(模拟值 > 实测值) - 红色区域:负误差(模拟值 < 实测值) - 双Y轴设计,同时显示温度和误差 2. **误差分布直方图**: - 显示误差的概率密度分布 - 红色曲线:拟合的正态分布 - 统计信息框:包含均值、标准差、偏度和峰度 3. **绝对误差累积分布图**: - 显示绝对误差的累积分布函数(CDF) - 标记关键百分位数(10%、50%、90%) - 直观展示误差分布特征 4. **误差统计指标图**: - 柱状图展示关键误差指标 - RMSE(均方根误差) - MAE(平均绝对误差) - MaxAE(最大绝对误差) - R²(决定系数) ### 使用说明 1. 运行程序后,首先进行参数反演优化 2. 生成温度对比图(包含初始猜测和优化结果) 3. 自动生成误差分析可视化图表并保存为`error_analysis.png` 4. 在报告中包含更全面的误差统计信息 此可视化分析有助于深入理解模型在不同时间段的预测精度、误差分布特征以及模型整体性能,为模型优化提供直观依据。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

万物琴弦光锥之外

给个0.1,恭喜老板发财

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值