'''
问题1
首先对附件1的数据进行预处理,使用fillna结合for循环将“催化剂组合编号”与“催化剂组合”信息与温度等一一对应,确保每个数据点都能对应到正确的催化剂组合
将乙醇转化率、C4烯烃选择性转化为数值类型,便于后续统计分析和可视化处理
使用最小二乘法求回归方程,使用LinearRegression模型对每组数据进行最小二乘拟合,分析温度与乙醇转化率、C4烯烃选择性等之间的线性关系。计算斜率、截距和R²值,评估拟合效果。
将数据以折线图、散点图、回归线的形式直观显示
问题2
①Co负载量 ②装料比 ③乙醇浓度 ④温度 ⑤乙醇转化率 ⑥C4烯烃选择性
使用statsmodels对特征进行标准化处理(消除量纲影响),构建多元线性回归模型,拟合乙醇转化率和C4烯烃选择性两个目标变量的回归模型
转化率=β 0+β 1(Co负载量)+β 2 (装料比)+β 3(温度)+ϵ
选择性=β 0+β 1(Co负载量)+β 2 (装料比)+β 3(温度)+ϵ (这两个式子的β数值不同)
通过OLS模型(普通最小二乘)的t值和p值判断参数显著性(p<0.05为显著)
箱线图:用于数据分布可视化,直观反映数据中位数、四分位数,适合对比不同组别的数据分布。箱体表示数据中间的50%
问题3
1、首先计算C4烯烃收率,据此找出C4烯烃收率最高的实验以及对应催化剂组合和温度。
2、然后分析不同温度下C4烯烃收率,得出温度低于350摄氏度时的最佳组合。
3、(分组统计)接着利用groupby对催化剂组合与温度进行分组,统计平均C4烯烃收率,找出每个催化剂组合的最佳温度(柱状图),找出前五个最佳催化剂组合(作为最优方案的参考,截图和表格都有)
4、然后解析催化剂成分,分别分析Co负载量、装料比、乙醇浓度对C4烯烃收率的影响(三张条形图以及对应数据)。最后找出温度低于350°C时的最佳组合
5、使用LinearRegression模型对温度与C4烯烃收率之间的关系进行拟合,计算斜率、截距和决定系数,评估变量间的线性相关性(方程与R方,拟合图)。
再进行非线性(多项式回归分析,r2,mse)
问题4
首先分析当前已经存在的实验,读取C4烯烃收率最高的实验,得出对应温度、Co装载量、装料比、乙醇浓度
实验设计原则:需要控制单变量,确保实验结果的可解释性;合理调整幅度;由前面几题的分析可得对反应影响最大的三个因素(温度、Co负载量、装料比)
新增实验设计:1、首先复现当前的最佳条件,确保实验结果的稳定性
2、调整温度
3、调整Co负载量
4、调整装料比
'''
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import StandardScaler
import statsmodels.api as sm
import os
import re
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
os.makedirs('./results', exist_ok=True)
os.makedirs('./results/problem2', exist_ok=True)
os.makedirs('./results/problem3', exist_ok=True)
os.makedirs('./processed_data', exist_ok=True)
os.makedirs('./results/problem4', exist_ok=True)
def preprocess_attachment1(file_path):
df = pd.read_excel(file_path)
df.fillna('', inplace=True)
cleaned_ids = []
cleaned_descs = []
current_id = ''
current_desc = ''
for index, row in df.iterrows():
catalyst_id = row['催化剂组合编号']
catalyst_desc = row['催化剂组合']
if catalyst_id != '':
current_id = catalyst_id
current_desc = catalyst_desc
cleaned_ids.append(current_id)
cleaned_descs.append(current_desc)
df['催化剂组合编号'] = cleaned_ids
df['催化剂组合'] = cleaned_descs
numeric_cols = df.columns[2:]
for col in numeric_cols:
df[col] = pd.to_numeric(df[col], errors='coerce')
return df
def preprocess_attachment2(file_path):
df = pd.read_excel(file_path)
df.columns = ['时间(min)', '乙醇转化率(%)', '乙烯选择性(%)', 'C4烯烃选择性(%)', '乙醛选择性(%)',
'碳数为4-12脂肪醇(%)', '甲基苯甲醛和甲基苯甲醇(%)', '其他(%)']
df['时间(min)'] = pd.to_numeric(df['时间(min)'], errors='coerce')
df.fillna(method='ffill', inplace=True)
for col in df.columns[1:]:
df[col] = pd.to_numeric(df[col], errors='coerce')
df.dropna(inplace=True)
df['C4烯烃收率(%)'] = df['乙醇转化率(%)'] * df['C4烯烃选择性(%)'] / 100
return df
# 线性回归函数
def linear_regression_fit(x, y):
if len(x) < 2:
return None, None, None
X = x.values.reshape(-1, 1)
model = LinearRegression()
model.fit(X, y)
slope = model.coef_[0]
intercept = model.intercept_
y_pred = model.predict(X)
r2 = r2_score(y, y_pred)
return slope, intercept, r2
def plot_grouped_line_with_regression(data, x_col, y_col, group_col, title, xlabel, ylabel, save_name=None):
plt.figure(figsize=(14, 8))
colors = plt.cm.tab20.colors
groups = data[group_col].unique()
results = []
for i, group in enumerate(groups):
subset = data[data[group_col] == group]
x = subset[x_col]
y = subset[y_col]
if len(x) < 2:
print(f"组 {group} 数据不足,跳过拟合")
continue
slope, intercept, r2 = linear_regression_fit(x, y)
if slope is None:
continue
y_fit = slope * x + intercept
color = colors[i % len(colors)]
plt.plot(x, y, marker='o', linestyle='--', color=color, label=f'{group} (原始数据)')
plt.plot(x, y_fit, color=color, linestyle='-', linewidth=2, label=f'{group} 拟合')
equation = f'y = {slope:.4f}x + {intercept:.4f}'
for xi, yi in zip(x, y):
results.append({
'组别': group,
'x': xi,
'y': yi,
'y_拟合': slope * xi + intercept,
'斜率': slope,
'截距': intercept,
'R2': r2,
'方程': equation
})
result_df = pd.DataFrame(results)
plt.title(title, fontsize=14)
plt.xlabel(xlabel, fontsize=12)
plt.ylabel(ylabel, fontsize=12)
plt.grid(True)
plt.legend(fontsize=10, bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
if save_name:
plt.savefig(f"./results/{save_name}.png", dpi=300, bbox_inches='tight')
result_df.to_csv(f"./results/{save_name}_回归结果.csv", index=False, encoding='utf_8_sig')
plt.show()
def plot_ethanol_vs_c4_with_regression(data, ethanol_col, c4_col, group_col, title, xlabel, ylabel, save_name=None):
plt.figure(figsize=(14, 8))
colors = plt.cm.tab20.colors
groups = data[group_col].unique()
results = []
for i, group in enumerate(groups):
subset = data[data[group_col] == group]
x = subset[ethanol_col]
y = subset[c4_col]
if len(x) < 2:
print(f"组 {group} 数据不足,跳过拟合")
continue
slope, intercept, r2 = linear_regression_fit(x, y)
if slope is None:
continue
y_fit = slope * x + intercept
color = colors[i % len(colors)]
plt.plot(x, y, marker='o', linestyle='--', color=color, label=f'{group} (原始数据)')
plt.plot(x, y_fit, color=color, linestyle='-', linewidth=2, label=f'{group} 拟合')
equation = f'y = {slope:.4f}x + {intercept:.4f}'
for xi, yi in zip(x, y):
results.append({
'组别': group,
'x': xi,
'y': yi,
'y_拟合': slope * xi + intercept,
'斜率': slope,
'截距': intercept,
'R2': r2,
'方程': equation
})
result_df = pd.DataFrame(results)
plt.title(title, fontsize=14)
plt.xlabel(xlabel, fontsize=12)
plt.ylabel(ylabel, fontsize=12)
plt.grid(True)
plt.legend(fontsize=10, bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
if save_name:
plt.savefig(f"./results/{save_name}.png", dpi=300, bbox_inches='tight')
result_df.to_csv(f"./results/{save_name}_回归结果.csv", index=False, encoding='utf_8_sig')
plt.show()
def analyze_attachment2_data(data):
# 乙醇转化率分析
plt.figure(figsize=(10, 6))
sns.lineplot(x='时间(min)', y='乙醇转化率(%)', data=data, marker='o', color='g', label='原始数据')
slope, intercept, r2 = linear_regression_fit(data['时间(min)'], data['乙醇转化率(%)'])
if slope is not None:
x_fit = data['时间(min)']
y_fit = slope * x_fit + intercept
plt.plot(x_fit, y_fit, color='orange', linestyle='--',
label=f'线性拟合\ny = {slope:.4f}x + {intercept:.4f}\nR² = {r2:.4f}')
plt.title('乙醇转化率随时间变化(350°C)', fontsize=14)
plt.xlabel('时间(min)', fontsize=12)
plt.ylabel('乙醇转化率 (%)', fontsize=12)
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig(f'./results/attachment2_乙醇转化率_线性拟合.png', dpi=300, bbox_inches='tight')
plt.show()
# C4烯烃选择性分析
plt.figure(figsize=(10, 6))
sns.lineplot(x='时间(min)', y='C4烯烃选择性(%)', data=data, marker='o', color='b', label='原始数据')
slope, intercept, r2 = linear_regression_fit(data['时间(min)'], data['C4烯烃选择性(%)'])
if slope is not None:
x_fit = data['时间(min)']
y_fit = slope * x_fit + intercept
plt.plot(x_fit, y_fit, color='red', linestyle='--',
label=f'线性拟合\ny = {slope:.4f}x + {intercept:.4f}\nR² = {r2:.4f}')
plt.title('C4烯烃选择性随时间变化(350°C)', fontsize=14)
plt.xlabel('时间(min)', fontsize=12)
plt.ylabel('C4烯烃选择性 (%)', fontsize=12)
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig(f'./results/attachment2_C4选择性_线性拟合.png', dpi=300, bbox_inches='tight')
plt.show()
# 各产物选择性分析
plt.figure(figsize=(12, 6))
for col in data.columns[2:-1]: # 排除时间列和收率列
sns.lineplot(x='时间(min)', y=col, data=data, label=col)
plt.title('各产物选择性随时间变化', fontsize=14)
plt.xlabel('时间(min)')
plt.ylabel('选择性 (%)')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(True)
plt.tight_layout()
plt.savefig('./results/attachment2_各产物选择性.png', dpi=300, bbox_inches='tight')
plt.show()
# 乙醇转化率与C4烯烃选择性关系
plt.figure(figsize=(10, 6))
sns.scatterplot(x='乙醇转化率(%)', y='C4烯烃选择性(%)', data=data)
plt.title('乙醇转化率与C4烯烃选择性关系', fontsize=14)
plt.xlabel('乙醇转化率 (%)')
plt.ylabel('C4烯烃选择性 (%)')
plt.grid(True)
plt.tight_layout()
plt.savefig('./results/attachment2_转化率与选择性关系.png', dpi=300, bbox_inches='tight')
plt.show()
# C4烯烃收率分析
plt.figure(figsize=(10, 6))
sns.lineplot(x='时间(min)', y='C4烯烃收率(%)', data=data, marker='o', color='purple', label='原始数据')
slope, intercept, r2 = linear_regression_fit(data['时间(min)'], data['C4烯烃收率(%)'])
if slope is not None:
x_fit = data['时间(min)']
y_fit = slope * x_fit + intercept
plt.plot(x_fit, y_fit, color='orange', linestyle='--',
label=f'线性拟合\ny = {slope:.4f}x + {intercept:.4f}\nR² = {r2:.4f}')
plt.title('C4烯烃收率随时间变化(350°C)', fontsize=14)
plt.xlabel('时间(min)', fontsize=12)
plt.ylabel('C4烯烃收率 (%)', fontsize=12)
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig(f'./results/attachment2_C4收率_线性拟合.png', dpi=300, bbox_inches='tight')
plt.show()
def parse_catalyst_components(df):
df['Co负载量(wt%)'] = np.nan
df['Co/SiO2质量(mg)'] = np.nan
df['HAP质量(mg)'] = np.nan
df['装料比(Co/SiO2:HAP)'] = np.nan
df['乙醇浓度(ml/min)'] = np.nan
df['装料方式'] = ['I' if 'A' in idx else 'II' for idx in df['催化剂组合编号']]
for idx, row in df.iterrows():
desc = row['催化剂组合']
# Co负载量
if 'wt%' in desc:
wt_percent = desc.split('wt%')[0].split()[-1]
try:
df.at[idx, 'Co负载量(wt%)'] = float(wt_percent)
except:
pass
# Co/SiO2质量和HAP质量
parts = desc.split('-')
for part in parts:
if 'mg' in part and 'Co/SiO2' in part:
try:
mass = float(part.split('mg')[0].strip())
df.at[idx, 'Co/SiO2质量(mg)'] = mass
except:
pass
elif 'mg' in part and 'HAP' in part:
try:
mass = float(part.split('mg')[0].strip())
df.at[idx, 'HAP质量(mg)'] = mass
except:
pass
elif '乙醇浓度' in part:
try:
conc = float(part.split('乙醇浓度')[1].split('ml/min')[0].strip())
df.at[idx, '乙醇浓度(ml/min)'] = conc
except:
pass
# 装料比
co_mass = df.at[idx, 'Co/SiO2质量(mg)']
hap_mass = df.at[idx, 'HAP质量(mg)']
if not np.isnan(co_mass) and not np.isnan(hap_mass) and hap_mass != 0:
df.at[idx, '装料比(Co/SiO2:HAP)'] = co_mass / hap_mass
return df
def analyze_catalyst_effects(df):
df = parse_catalyst_components(df)
df.to_csv('./processed_data/attachment1_with_catalyst_params.csv', index=False, encoding='utf_8_sig')
os.makedirs('./results/problem2', exist_ok=True)
# 移除缺失值
regression_df = df.dropna(subset=['Co负载量(wt%)', '装料比(Co/SiO2:HAP)',
'乙醇浓度(ml/min)', '温度',
'乙醇转化率(%)', 'C4烯烃选择性(%)'])
# 特征标准化
features = regression_df[['Co负载量(wt%)', '装料比(Co/SiO2:HAP)',
'乙醇浓度(ml/min)', '温度']]
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)
scaler_df = pd.DataFrame({
'特征': features.columns,
'均值': scaler.mean_,
'标准差': scaler.scale_
})
scaler_df.to_csv('./results/problem2/standardization_params.csv', index=False, encoding='utf_8_sig')
# 回归模型
X_conv = sm.add_constant(scaled_features)
y_conv = regression_df['乙醇转化率(%)']
model_conv = sm.OLS(y_conv, X_conv).fit()
y_select = regression_df['C4烯烃选择性(%)']
model_select = sm.OLS(y_select, X_conv).fit()
with open('./results/problem2/regression_results.txt', 'w') as f:
f.write("乙醇转化率回归分析结果:\n")
f.write(str(model_conv.summary()))
f.write("\n\n" + "=" * 80 + "\n\n")
f.write("C4烯烃选择性回归分析结果:\n")
f.write(str(model_select.summary()))
residuals_conv = model_conv.resid
residuals_select = model_select.resid
pd.DataFrame({'乙醇转化率残差': residuals_conv, 'C4烯烃选择性残差': residuals_select}).to_csv(
'./results/problem2/regression_residuals.csv', index=False, encoding='utf_8_sig'
)
# VIF 检验多重共线性
from statsmodels.stats.outliers_influence import variance_inflation_factor
vif_data = pd.DataFrame()
vif_data["特征"] = features.columns
vif_data["VIF"] = [variance_inflation_factor(scaled_features, i) for i in range(scaled_features.shape[1])]
vif_data.to_csv('./results/problem2/vif_values.csv', index=False, encoding='utf_8_sig')
# Durbin-Watson 检验自相关
with open('./results/problem2/durbin_watson.txt', 'w') as f:
f.write(f"乙醇转化率模型 Durbin-Watson 检验: {sm.stats.durbin_watson(residuals_conv):.4f}\n")
f.write(f"C4烯烃选择性模型 Durbin-Watson 检验: {sm.stats.durbin_watson(residuals_select):.4f}")
regression_coefficients = pd.DataFrame({
'特征': ['截距'] + list(features.columns),
'乙醇转化率系数': [model_conv.params[0]] + list(model_conv.params[1:]),
'C4烯烃选择性系数': [model_select.params[0]] + list(model_select.params[1:])
})
regression_coefficients.to_csv('./results/problem2/regression_coefficients.csv', index=False, encoding='utf_8_sig')
unique_categories = sorted(df['Co负载量(wt%)'].dropna().unique())
boxplot_stats_list = []
for val in unique_categories:
stats = df[df['Co负载量(wt%)'] == val]['乙醇转化率(%)'].describe()
stats_dict = {
'Co负载量(wt%)': val,
'count': stats['count'],
'mean': stats['mean'],
'std': stats['std'],
'min': stats['min'],
'25%': stats['25%'],
'50%': stats['50%'],
'75%': stats['75%'],
'max': stats['max']
}
boxplot_stats_list.append(stats_dict)
boxplot_stats = pd.DataFrame(boxplot_stats_list)
boxplot_stats.to_csv('./results/problem2/boxplot_stats.csv', index=False, encoding='utf_8_sig')
boxplot_stats.to_pickle('./results/problem2/boxplot_stats.pkl')
# 温度与Co负载量交互影响
temp_co_conversion = df.groupby(['温度', 'Co负载量(wt%)'])['乙醇转化率(%)'].agg(['mean', 'std', 'count']).reset_index()
temp_co_conversion.columns = ['温度', 'Co负载量(wt%)', '平均乙醇转化率(%)', '标准差', '样本量']
temp_co_conversion.to_csv('./results/problem2/interaction_temp_co_loading.csv', index=False, encoding='utf_8_sig')
plt.figure(figsize=(10, 6))
sns.lineplot(x='温度', y='乙醇转化率(%)', hue='Co负载量(wt%)', ci='sd', data=df)
plt.title('温度与Co负载量对乙醇转化率的交互影响(含标准差)')
plt.grid(True)
plt.tight_layout()
plt.savefig('./results/problem2/temp_vs_conversion_by_co_loading_with_error.png', dpi=300, bbox_inches='tight')
plt.show()
return {
'regression_coefficients': regression_coefficients,
'boxplot_stats': boxplot_stats,
'vif_data': vif_data,
'temp_co_conversion': temp_co_conversion
}
#问题3
def analyze_c4_olefin_yield(df):
# 计算C4烯烃收率
df['C4烯烃收率(%)'] = df['乙醇转化率(%)'] * df['C4烯烃选择性(%)'] / 100
# 找出最高C4烯烃收率的实验
max_yield_row = df.loc[df['C4烯烃收率(%)'].idxmax()]
print(f"\n最高C4烯烃收率为{max_yield_row['C4烯烃收率(%)']:.2f}%")
print(f"对应的催化剂组合为:{max_yield_row['催化剂组合']}")
print(f"对应的温度为:{max_yield_row['温度']}°C")
# 分析不同温度下的C4烯烃收率
plt.figure(figsize=(12, 6))
sns.lineplot(x='温度', y='C4烯烃收率(%)', hue='催化剂组合编号', data=df, marker='o')
plt.title('不同催化剂组合下C4烯烃收率随温度变化趋势', fontsize=14)
plt.xlabel('温度 (°C)', fontsize=12)
plt.ylabel('C4烯烃收率 (%)', fontsize=12)
plt.grid(True)
plt.legend(title='催化剂组合', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.savefig('./results/problem3/c4_olefin_yield_vs_temperature.png', dpi=300, bbox_inches='tight')
plt.show()
# 温度与C4烯烃收率的回归分析
results = []
catalysts = df['催化剂组合编号'].unique()
for catalyst in catalysts:
catalyst_df = df[df['催化剂组合编号'] == catalyst]
catalyst_df = catalyst_df.dropna(subset=['温度', 'C4烯烃收率(%)'])
if len(catalyst_df) < 2:
continue
X = catalyst_df['温度'].values.reshape(-1, 1)
y = catalyst_df['C4烯烃收率(%)'].values
model = LinearRegression()
model.fit(X, y)
slope = model.coef_[0]
intercept = model.intercept_
y_pred = model.predict(X)
r2 = r2_score(y, y_pred)
equation = f'y = {slope:.4f}x + {intercept:.4f}'
results.append({
'催化剂组合': catalyst,
'斜率': slope,
'截距': intercept,
'R2': r2,
'方程': equation
})
plt.figure(figsize=(10, 6))
plt.scatter(X, y, color='blue', label='原始数据')
plt.plot(X, y_pred, color='red', label=f'线性拟合\nR²={r2:.4f}\n{equation}')
plt.title(f'{catalyst} 催化剂下温度与C4烯烃收率的关系', fontsize=14)
plt.xlabel('温度 (°C)', fontsize=12)
plt.ylabel('C4烯烃收率 (%)', fontsize=12)
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig(f'./results/problem3/regression_{catalyst}.png', dpi=300, bbox_inches='tight')
plt.show()
regression_results_df = pd.DataFrame(results)
regression_results_df.to_csv('./results/problem3/temperature_vs_c4_regression_results.csv', index=False, encoding='utf_8_sig')
# 找出温度低于350°C时的最佳组合
low_temp_df = df[df['温度'] < 350]
if not low_temp_df.empty:
max_yield_low_temp_row = low_temp_df.loc[low_temp_df['C4烯烃收率(%)'].idxmax()]
print(f"\n温度低于350°C时最高C4烯烃收率为{max_yield_low_temp_row['C4烯烃收率(%)']:.2f}%")
print(f"对应的催化剂组合为:{max_yield_low_temp_row['催化剂组合']}")
print(f"对应的温度为:{max_yield_low_temp_row['温度']}°C")
else:
print("\n没有温度低于350°C的实验数据")
df.to_csv('./processed_data/attachment1_with_yield.csv', index=False, encoding='utf_8_sig')
return max_yield_row, max_yield_low_temp_row if 'max_yield_low_temp_row' in locals() else None
def plot_c4_olefin_yield_comparison(df):
# 按催化剂组合和温度分组统计平均C4烯烃收率
grouped = df.groupby(['催化剂组合编号', '温度'])['C4烯烃收率(%)'].mean().reset_index()
# 找出每个催化剂组合的最佳温度
best_temp_per_catalyst = grouped.loc[grouped.groupby('催化剂组合编号')['C4烯烃收率(%)'].idxmax()]
best_temp_per_catalyst = best_temp_per_catalyst.sort_values(by='C4烯烃收率(%)', ascending=False)
# 创建包含温度信息的DataFrame
best_temp_df = df.loc[df.groupby('催化剂组合编号')['C4烯烃收率(%)'].idxmax()]
best_temp_df = best_temp_df[['催化剂组合编号', '温度', 'C4烯烃收率(%)']]
# 创建双Y轴图表
fig, ax1 = plt.subplots(figsize=(14, 8))
ax2 = ax1.twinx()
sns.barplot(x='催化剂组合编号', y='C4烯烃收率(%)', data=best_temp_df,
palette='viridis', ax=ax1)
sns.lineplot(x='催化剂组合编号', y='温度', data=best_temp_df,
color='red', marker='o', ax=ax2, label='最佳温度')
plt.title('不同催化剂组合的最佳C4烯烃收率及对应温度', fontsize=16)
plt.xlabel('催化剂组合编号', fontsize=14)
ax1.set_ylabel('最佳C4烯烃收率 (%)', fontsize=14)
ax2.set_ylabel('温度 (°C)', fontsize=14)
plt.figure(figsize=(14, 8))
sns.barplot(x='催化剂组合编号', y='C4烯烃收率(%)', data=best_temp_per_catalyst, palette='viridis')
plt.title('不同催化剂组合的最佳C4烯烃收率对比', fontsize=16)
plt.xlabel('催化剂组合编号', fontsize=14)
plt.ylabel('最佳C4烯烃收率 (%)', fontsize=14)
plt.xticks(rotation=45)
plt.grid(True, axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.savefig('./results/problem3/best_c4_olefin_yield_by_catalyst.png', dpi=300, bbox_inches='tight')
plt.show()
# 找出前5个最佳催化剂组合
top_5_catalysts = best_temp_per_catalyst.head(5)
print("\n前5个最佳催化剂组合:")
for i, (_, row) in enumerate(top_5_catalysts.iterrows()):
print(f"{i+1}. 催化剂组合:{row['催化剂组合编号']}")
print(f" 最佳温度:{row['温度']}°C")
print(f" C4烯烃收率:{row['C4烯烃收率(%)']:.2f}%")
print()
return best_temp_per_catalyst
def analyze_catalyst_components_impact(df):
# 解析催化剂成分
df = parse_catalyst_components(df)
# C4烯烃收率
df['C4烯烃收率(%)'] = df['乙醇转化率(%)'] * df['C4烯烃选择性(%)'] / 100
# Co负载量的影响
co_loading_impact = df.groupby('Co负载量(wt%)')['C4烯烃收率(%)'].mean().reset_index()
plt.figure(figsize=(10, 6))
sns.barplot(x='Co负载量(wt%)', y='C4烯烃收率(%)', data=co_loading_impact, palette='viridis')
plt.title('Co负载量对C4烯烃收率的影响', fontsize=14)
plt.xlabel('Co负载量 (wt%)', fontsize=12)
plt.ylabel('平均C4烯烃收率 (%)', fontsize=12)
plt.grid(True, axis='y')
plt.tight_layout()
plt.savefig('./results/problem3/co_loading_impact.png', dpi=300, bbox_inches='tight')
plt.show()
# 装料比的影响
feed_ratio_impact = df.groupby('装料比(Co/SiO2:HAP)')['C4烯烃收率(%)'].mean().reset_index()
plt.figure(figsize=(10, 6))
sns.barplot(x='装料比(Co/SiO2:HAP)', y='C4烯烃收率(%)', data=feed_ratio_impact, palette='viridis')
plt.title('装料比对C4烯烃收率的影响', fontsize=14)
plt.xlabel('装料比 (Co/SiO2:HAP)', fontsize=12)
plt.ylabel('平均C4烯烃收率 (%)', fontsize=12)
plt.grid(True, axis='y')
plt.tight_layout()
plt.savefig('./results/problem3/feed_ratio_impact.png', dpi=300, bbox_inches='tight')
plt.show()
# 乙醇浓度的影响
ethanol_conc_impact = df.groupby('乙醇浓度(ml/min)')['C4烯烃收率(%)'].mean().reset_index()
plt.figure(figsize=(10, 6))
sns.barplot(x='乙醇浓度(ml/min)', y='C4烯烃收率(%)', data=ethanol_conc_impact, palette='viridis')
plt.title('乙醇浓度对C4烯烃收率的影响', fontsize=14)
plt.xlabel('乙醇浓度 (ml/min)', fontsize=12)
plt.ylabel('平均C4烯烃收率 (%)', fontsize=12)
plt.grid(True, axis='y')
plt.tight_layout()
plt.savefig('./results/problem3/ethanol_conc_impact.png', dpi=300, bbox_inches='tight')
plt.show()
return co_loading_impact, feed_ratio_impact, ethanol_conc_impact
def additional_output_problem3(df, max_yield_row, max_yield_low_temp_row, best_temp_per_catalyst):
max_yield_df = pd.DataFrame([max_yield_row])
max_yield_df.to_csv('./results/problem3/max_c4_olefin_yield_experiment.csv', index=False, encoding='utf_8_sig')
if max_yield_low_temp_row is not None:
max_yield_low_temp_df = pd.DataFrame([max_yield_low_temp_row])
max_yield_low_temp_df.to_csv('./results/problem3/max_c4_olefin_yield_low_temp.csv', index=False, encoding='utf_8_sig')
else:
with open('./results/problem3/max_c4_olefin_yield_low_temp.txt', 'w') as f:
f.write("没有温度低于350°C的实验数据")
best_temp_per_catalyst.to_csv('./results/problem3/best_temp_per_catalyst.csv', index=False, encoding='utf_8_sig')
top_5_catalysts = best_temp_per_catalyst.head(5)
top_5_catalysts.to_csv('./results/problem3/top_5_catalysts.csv', index=False, encoding='utf_8_sig')
yield_stats = df.groupby('催化剂组合编号')['C4烯烃收率(%)'].agg(['mean', 'std', 'max', 'min', 'count'])
yield_stats.to_csv('./results/problem3/c4_olefin_yield_statistics.csv', index=True, encoding='utf_8_sig')
co_loading_impact = df.groupby('Co负载量(wt%)')['C4烯烃收率(%)'].mean().reset_index()
co_loading_impact.to_csv('./results/problem3/co_loading_impact.csv', index=False, encoding='utf_8_sig')
feed_ratio_impact = df.groupby('装料比(Co/SiO2:HAP)')['C4烯烃收率(%)'].mean().reset_index()
feed_ratio_impact.to_csv('./results/problem3/feed_ratio_impact.csv', index=False, encoding='utf_8_sig')
ethanol_conc_impact = df.groupby('乙醇浓度(ml/min)')['C4烯烃收率(%)'].mean().reset_index()
ethanol_conc_impact.to_csv('./results/problem3/ethanol_conc_impact.csv', index=False, encoding='utf_8_sig')
def polynomial_regression(df, degree=2):
results = []
for catalyst in df['催化剂组合编号'].unique():
catalyst_df = df[df['催化剂组合编号'] == catalyst]
catalyst_df = catalyst_df.dropna(subset=['温度', 'C4烯烃收率(%)'])
if len(catalyst_df) < degree + 1:
continue
X = catalyst_df[['温度']]
y = catalyst_df['C4烯烃收率(%)']
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=degree, include_bias=False)
X_poly = poly.fit_transform(X)
model = LinearRegression()
model.fit(X_poly, y)
y_pred = model.predict(X_poly)
r2 = r2_score(y, y_pred)
mse = mean_squared_error(y, y_pred)
coef = model.coef_
equation = f'y = {model.intercept_:.4f}'
for i, c in enumerate(coef):
equation += f' + {c:.4f}*x^{i+1}'
results.append({
'催化剂组合': catalyst,
'多项式次数': degree,
'系数': model.coef_,
'截距': model.intercept_,
'R2': r2,
'MSE': mse,
'方程': equation
})
plt.figure(figsize=(10, 6))
plt.scatter(X, y, color='blue', label='原始数据')
X_range = np.linspace(X.min(), X.max(), 100).reshape(-1, 1)
X_range_poly = poly.transform(X_range)
y_range = model.predict(X_range_poly)
plt.plot(X_range, y_range, color='red', label=f'多项式拟合\nR²={r2:.4f}\n{equation}')
plt.title(f'{catalyst} 催化剂下温度与C4烯烃收率的非线性关系', fontsize=14)
plt.xlabel('温度 (°C)', fontsize=12)
plt.ylabel('C4烯烃收率 (%)', fontsize=12)
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig(f'./results/problem3/polynomial_regression_{catalyst}.png', dpi=300, bbox_inches='tight')
plt.show()
results_df = pd.DataFrame(results)
results_df.to_csv(f'./results/problem3/polynomial_regression_results_degree_{degree}.csv', index=False, encoding='utf_8_sig')
return results_df
#问题4
def design_additional_experiments(df, num_experiments=5):
"""
设计新增实验方案
"""
# 解析催化剂成分
df = parse_catalyst_components(df)
# 计算C4烯烃收率
df['C4烯烃收率(%)'] = df['乙醇转化率(%)'] * df['C4烯烃选择性(%)'] / 100
# 获取最佳实验条件
max_yield_row = df.loc[df['C4烯烃收率(%)'].idxmax()]
# 分析最佳条件的特征
best_temp = max_yield_row['温度']
best_co_loading = max_yield_row['Co负载量(wt%)']
best_ratio = max_yield_row['装料比(Co/SiO2:HAP)']
best_ethanol = max_yield_row['乙醇浓度(ml/min)']
print(f"\n当前最高C4烯烃收率出现在:")
print(f"温度:{best_temp}°C")
print(f"Co负载量:{best_co_loading}wt%")
print(f"装料比:{best_ratio:.2f}")
print(f"乙醇浓度:{best_ethanol}ml/min")
# 设计新增实验
experiments = []
# 1. 在最佳条件附近微调
experiments.append({
'实验编号': f'New1',
'温度': best_temp,
'Co负载量(wt%)': best_co_loading,
'装料比(Co/SiO2:HAP)': best_ratio,
'乙醇浓度(ml/min)': best_ethanol,
'设计理由': '基准实验,验证最佳条件可重复性'
})
# 2-4. 在最佳条件附近进行小幅度调整
experiments.append({
'实验编号': f'New2',
'温度': best_temp - 10,
'Co负载量(wt%)': best_co_loading,
'装料比(Co/SiO2:HAP)': best_ratio,
'乙醇浓度(ml/min)': best_ethanol,
'设计理由': '测试最佳温度-10°C的效果'
})
experiments.append({
'实验编号': f'New3',
'温度': best_temp + 10,
'Co负载量(wt%)': best_co_loading,
'装料比(Co/SiO2:HAP)': best_ratio,
'乙醇浓度(ml/min)': best_ethanol,
'设计理由': '测试最佳温度+10°C的效果'
})
experiments.append({
'实验编号': f'New4',
'温度': best_temp,
'Co负载量(wt%)': best_co_loading * 1.2,
'装料比(Co/SiO2:HAP)': best_ratio,
'乙醇浓度(ml/min)': best_ethanol,
'设计理由': '测试Co负载量增加20%的效果'
})
experiments.append({
'实验编号': f'New5',
'温度': best_temp,
'Co负载量(wt%)': best_co_loading,
'装料比(Co/SiO2:HAP)': best_ratio * 1.2,
'乙醇浓度(ml/min)': best_ethanol,
'设计理由': '测试装料比增加20%的效果'
})
experiments_df = pd.DataFrame(experiments)
experiments_df.to_csv('./results/problem4/additional_experiments_design.csv', index=False, encoding='utf_8_sig')
with open('./results/problem4/experiment_design_reasons.txt', 'w', encoding='utf-8') as f:
f.write("新增实验设计说明:\n\n")
f.write("1. 实验New1:基准实验\n")
f.write(" 重复当前最佳条件,验证实验结果的可重复性\n")
f.write(" 确认最佳条件的稳定性\n\n")
f.write("2. 实验New2:温度降低10°C\n")
f.write(" 测试最佳温度范围,了解温度对C4烯烃收率的敏感度\n")
f.write(" 验证是否可能存在更优的温度条件\n\n")
f.write("3. 实验New3:温度升高10°C\n")
f.write(" 测试最佳温度范围,了解高温对反应的影响\n")
f.write(" 验证温度上限和热稳定性\n\n")
f.write("4. 实验New4:Co负载量增加20%\n")
f.write(" 测试Co负载量对反应的影响\n")
f.write(" 验证是否存在更优的Co负载量\n\n")
f.write("5. 实验New5:装料比增加20%\n")
f.write(" 测试装料比对反应的影响\n")
f.write(" 验证是否存在更优的装料比\n\n")
f.write("设计原则:\n")
f.write("1. 以当前最佳条件为基础进行小范围调整\n")
f.write("2. 每次只改变一个变量,便于分析变量影响\n")
f.write("3. 聚焦于关键因素:温度、Co负载量、装料比\n")
f.write("4. 保持乙醇浓度不变,避免过多变量变化\n")
f.write("5. 实验设计幅度参考了现有实验范围和分布")
return experiments_df
def plot_experiment_comparison_heatmap(df):
"""
绘制实验参数对比热图
"""
# 解析催化剂成分
df = parse_catalyst_components(df)
# 计算C4烯烃收率
df['C4烯烃收率(%)'] = df['乙醇转化率(%)'] * df['C4烯烃选择性(%)'] / 100
# 选择关键参数
features = ['温度', 'Co负载量(wt%)', '装料比(Co/SiO2:HAP)', '乙醇浓度(ml/min)', 'C4烯烃收率(%)']
df_features = df[features]
# 计算相关性矩阵
corr_matrix = df_features.corr()
# 绘制热图
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5)
plt.title('实验参数对比热图', fontsize=16)
plt.tight_layout()
plt.savefig('./results/problem4/experiment_comparison_heatmap.png', dpi=300, bbox_inches='tight')
plt.show()
# 保存相关性数据
corr_matrix.to_csv('./results/problem4/experiment_correlation_matrix.csv', index=True, encoding='utf_8_sig')
def plot_experiment_condition_distribution(df):
"""
绘制实验条件分布直方图
"""
# 解析催化剂成分
df = parse_catalyst_components(df)
# 计算C4烯烃收率
df['C4烯烃收率(%)'] = df['乙醇转化率(%)'] * df['C4烯烃选择性(%)'] / 100
# 创建子图
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
# 绘制直方图
sns.histplot(df['温度'], ax=axes[0, 0], kde=True, color='skyblue')
sns.histplot(df['Co负载量(wt%)'], ax=axes[0, 1], kde=True, color='salmon')
sns.histplot(df['装料比(Co/SiO2:HAP)'], ax=axes[1, 0], kde=True, color='lightgreen')
sns.histplot(df['乙醇浓度(ml/min)'], ax=axes[1, 1], kde=True, color='gold')
# 设置标题
axes[0, 0].set_title('温度分布')
axes[0, 1].set_title('Co负载量分布')
axes[1, 0].set_title('装料比分布')
axes[1, 1].set_title('乙醇浓度分布')
plt.tight_layout()
plt.savefig('./results/problem4/experiment_condition_distribution.png', dpi=300, bbox_inches='tight')
plt.show()
def plot_parameter_trend_lines(df, experiments_df):
"""
绘制参数变化趋势折线图
"""
# 解析催化剂成分
df = parse_catalyst_components(df)
# 计算C4烯烃收率
df['C4烯烃收率(%)'] = df['乙醇转化率(%)'] * df['C4烯烃选择性(%)'] / 100
# 获取最佳实验条件
max_yield_row = df.loc[df['C4烯烃收率(%)'].idxmax()]
best_temp = max_yield_row['温度']
# 创建子图
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
# 温度趋势
sns.lineplot(data=df, x='温度', y='C4烯烃收率(%)', hue='Co负载量(wt%)',
ax=axes[0, 0], palette='viridis')
axes[0, 0].set_title('温度对C4烯烃收率的影响')
axes[0, 0].axvline(x=best_temp, color='red', linestyle='--', label=f'最佳温度: {best_temp}°C')
axes[0, 0].legend()
# Co负载量趋势
sns.lineplot(data=df, x='Co负载量(wt%)', y='C4烯烃收率(%)', hue='温度',
ax=axes[0, 1], palette='viridis')
axes[0, 1].set_title('Co负载量对C4烯烃收率的影响')
best_co = experiments_df[experiments_df['设计理由'] == '测试Co负载量增加20%的效果']['Co负载量(wt%)'].values[0]
axes[0, 1].axvline(x=best_co, color='red', linestyle='--', label=f'新增Co负载量: {best_co:.2f}wt%')
axes[0, 1].legend()
# 装料比趋势
sns.lineplot(data=df, x='装料比(Co/SiO2:HAP)', y='C4烯烃收率(%)', hue='温度',
ax=axes[1, 0], palette='viridis')
axes[1, 0].set_title('装料比对C4烯烃收率的影响')
best_ratio = experiments_df[experiments_df['设计理由'] == '测试装料比增加20%的效果']['装料比(Co/SiO2:HAP)'].values[0]
axes[1, 0].axvline(x=best_ratio, color='red', linestyle='--', label=f'新增装料比: {best_ratio:.2f}')
axes[1, 0].legend()
# 乙醇浓度趋势
sns.lineplot(data=df, x='乙醇浓度(ml/min)', y='C4烯烃收率(%)', hue='温度',
ax=axes[1, 1], palette='viridis')
axes[1, 1].set_title('乙醇浓度对C4烯烃收率的影响')
best_ethanol = experiments_df[experiments_df['设计理由'] == '基准实验']['乙醇浓度(ml/min)'].values[0]
axes[1, 1].axvline(x=best_ethanol, color='red', linestyle='--', label=f'基准乙醇浓度: {best_ethanol:.2f}ml/min')
axes[1, 1].legend()
plt.tight_layout()
plt.savefig('./results/problem4/parameter_trend_lines.png', dpi=300, bbox_inches='tight')
plt.show()
def plot_optimal_experiment_comparison(df, experiments_df):
"""
绘制最优实验与新增实验对比
"""
# 解析催化剂成分
df = parse_catalyst_components(df)
# 计算C4烯烃收率
df['C4烯烃收率(%)'] = df['乙醇转化率(%)'] * df['C4烯烃选择性(%)'] / 100
# 获取最佳实验条件
max_yield_row = df.loc[df['C4烯烃收率(%)'].idxmax()]
# 创建对比数据
comparison_df = pd.concat([max_yield_row, experiments_df.iloc[0]], axis=1).T
comparison_df.index = ['最佳历史实验', '新增基准实验']
# 绘制雷达图
categories = ['温度', 'Co负载量(wt%)', '装料比(Co/SiO2:HAP)', '乙醇浓度(ml/min)']
num_vars = len(categories)
# 标准化数据
max_values = df[categories].max()
comparison_data = comparison_df[categories] / max_values
# 构造雷达图
angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()
comparison_data = np.concatenate((comparison_data, comparison_data[:,[0]]), axis=1)
fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(polar=True))
# 绘制历史最佳实验
ax.plot(angles, comparison_data[0], color='blue', linewidth=2, linestyle='solid', label='最佳历史实验')
ax.fill(angles, comparison_data[0], color='blue', alpha=0.25)
# 绘制新增基准实验
ax.plot(angles, comparison_data[1], color='red', linewidth=2, linestyle='solid', label='新增基准实验')
ax.fill(angles, comparison_data[1], color='red', alpha=0.25)
# 设置雷达图标签
ax.set_thetagrids(np.degrees(angles), categories)
ax.set_rlabel_position(30)
# 设置雷达图范围
plt.ylim(0, 1.2)
plt.legend(loc='upper right', bbox_to_anchor=(0.1, 0.1))
plt.title('最佳历史实验与新增基准实验对比')
plt.tight_layout()
plt.savefig('./results/problem4/experiment_comparison_radar_chart.png', dpi=300, bbox_inches='tight')
plt.show()
# 保存对比数据
comparison_df.to_csv('./results/problem4/experiment_comparison_data.csv', index=True, encoding='utf_8_sig')
def plot_experiment_design_space(df, experiments_df):
"""
可视化实验设计空间
"""
# 解析催化剂成分
df = parse_catalyst_components(df)
# 计算C4烯烃收率
df['C4烯烃收率(%)'] = df['乙醇转化率(%)'] * df['C4烯烃选择性(%)'] / 100
# 创建子图
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
# 温度 vs Co负载量
sns.scatterplot(data=df, x='温度', y='Co负载量(wt%)', size='C4烯烃收率(%)',
sizes=(20, 200), color='blue', alpha=0.6, label='历史实验', ax=axes[0, 0])
sns.scatterplot(data=experiments_df, x='温度', y='Co负载量(wt%)',
color='red', size='C4烯烃收率(%)', sizes=(100, 100),
label='新增实验', ax=axes[0, 0])
axes[0, 0].set_title('温度 vs Co负载量')
# 温度 vs 装料比
sns.scatterplot(data=df, x='温度', y='装料比(Co/SiO2:HAP)', size='C4烯烃收率(%)',
sizes=(20, 200), color='blue', alpha=0.6, label='历史实验', ax=axes[0, 1])
sns.scatterplot(data=experiments_df, x='温度', y='装料比(Co/SiO2:HAP)',
color='red', size='C4烯烃收率(%)', sizes=(100, 100),
label='新增实验', ax=axes[0, 1])
axes[0, 1].set_title('温度 vs 装料比')
# Co负载量 vs 装料比
sns.scatterplot(data=df, x='Co负载量(wt%)', y='装料比(Co/SiO2:HAP)', size='C4烯烃收率(%)',
sizes=(20, 200), color='blue', alpha=0.6, label='历史实验', ax=axes[1, 0])
sns.scatterplot(data=experiments_df, x='Co负载量(wt%)', y='装料比(Co/SiO2:HAP)',
color='red', size='C4烯烃收率(%)', sizes=(100, 100),
label='新增实验', ax=axes[1, 0])
axes[1, 0].set_title('Co负载量 vs 装料比')
# 三维散点图
ax = fig.add_subplot(2, 2, 4, projection='3d')
ax.scatter(df['温度'], df['Co负载量(wt%)'], df['装料比(Co/SiO2:HAP)'],
c=df['C4烯烃收率(%)'], cmap='viridis', alpha=0.6, label='历史实验')
ax.scatter(experiments_df['温度'], experiments_df['Co负载量(wt%)'], experiments_df['装料比(Co/SiO2:HAP)'],
c='red', s=100, alpha=1, label='新增实验')
ax.set_xlabel('温度 (°C)')
ax.set_ylabel('Co负载量 (wt%)')
ax.set_zlabel('装料比')
ax.set_title('实验设计空间分布')
ax.legend()
plt.tight_layout()
plt.savefig('./results/problem4/experiment_design_space.png', dpi=300, bbox_inches='tight')
plt.show()
def analyze_catalyst_parameters(df,experiments_df):
"""
分析催化剂参数之间的关系
"""
# 解析催化剂成分
df = parse_catalyst_components(df)
# 创建参数组合图
g = sns.PairGrid(df[['温度', 'Co负载量(wt%)', '装料比(Co/SiO2:HAP)', '乙醇浓度(ml/min)']])
g.map_upper(sns.scatterplot, color='blue', alpha=0.6)
g.map_lower(sns.scatterplot, color='blue', alpha=0.6)
g.map_diag(sns.histplot, color='blue', alpha=0.6)
# 添加新增实验点
for i, row in df.iterrows():
if row['催化剂组合编号'] in experiments_df['催化剂组合编号'].values:
for j, col in enumerate(g.diag_axes):
col.axvline(x=row[g.diag_names[j]], color='red', linestyle='--', alpha=0.5)
plt.savefig('./results/problem4/catalyst_parameter_relations.png', dpi=300, bbox_inches='tight')
plt.show()
def plot_experiment_parameter_changes(experiments_df):
# 查找基准实验
base_exp = experiments_df[experiments_df['设计理由'] == '基准实验']
# 如果未找到,使用第一行作为基准
if base_exp.empty:
print("警告:未找到设计理由为'基准实验'的实验,使用第一行作为基准")
base_exp = experiments_df.iloc[[0]] # 使用 iloc[[0]] 保持 DataFrame 结构一致
# 安全提取参数
try:
base_temp = base_exp['温度'].iloc[0]
base_co = base_exp['Co负载量(wt%)'].iloc[0]
base_ratio = base_exp['装料比(Co/SiO2:HAP)'].iloc[0]
base_ethanol = base_exp['乙醇浓度(ml/min)'].iloc[0]
except IndexError:
raise ValueError("基准实验数据为空,无法提取参数,请检查输入数据")
# 创建参数变化数据
parameters = ['温度', 'Co负载量', '装料比', '乙醇浓度']
param_changes = []
for _, exp in experiments_df.iterrows():
param_changes.append([
exp['温度'] - base_temp,
exp['Co负载量(wt%)'] - base_co,
exp['装料比(Co/SiO2:HAP)'] - base_ratio,
exp['乙醇浓度(ml/min)'] - base_ethanol
])
change_df = pd.DataFrame(param_changes, columns=parameters)
change_df['实验编号'] = experiments_df['实验编号']
# 绘图
change_df.set_index('实验编号').plot(kind='bar', figsize=(12, 6))
plt.title('新增实验参数变化趋势')
plt.ylabel('参数变化值(相对于基准实验)')
plt.xticks(rotation=0)
plt.axhline(y=0, color='black', linestyle='--', linewidth=1)
plt.tight_layout()
plt.savefig('./results/problem4/experiment_parameter_changes.png', dpi=300, bbox_inches='tight')
plt.show()
# 保存数据
change_df.to_csv('./results/problem4/experiment_parameter_changes.csv', index=False, encoding='utf_8_sig')
if __name__ == "__main__":
df1 = preprocess_attachment1('C:/Users/Yeah/Desktop/数模/第五题/B/附件1.xlsx')
df2 = preprocess_attachment2('C:/Users/Yeah/Desktop/数模/第五题/B/附件2.xlsx')
df1.to_csv('./processed_data/attachment1_processed.csv', index=False, encoding='utf_8_sig')
df2.to_csv('./processed_data/attachment2_processed.csv', index=False, encoding='utf_8_sig')
plot_grouped_line_with_regression(
data=df1,
x_col='温度',
y_col='乙醇转化率(%)',
group_col='催化剂组合编号',
title='不同催化剂组合下乙醇转化率随温度变化趋势(含线性拟合)',
xlabel='温度 (°C)',
ylabel='乙醇转化率 (%)',
save_name='ethanol_conversion_regression'
)
plot_grouped_line_with_regression(
data=df1,
x_col='温度',
y_col='C4烯烃选择性(%)',
group_col='催化剂组合编号',
title='不同催化剂组合下C4烯烃选择性随温度变化趋势(含线性拟合)',
xlabel='温度 (°C)',
ylabel='C4烯烃选择性 (%)',
save_name='c4_selectivity_regression'
)
plot_ethanol_vs_c4_with_regression(
data=df1,
ethanol_col='乙醇转化率(%)',
c4_col='C4烯烃选择性(%)',
group_col='催化剂组合编号',
title='乙醇转化率与C4烯烃选择性关系图(按催化剂组合)',
xlabel='乙醇转化率 (%)',
ylabel='C4烯烃选择性 (%)',
save_name='ethanol_vs_c4_regression'
)
analyze_attachment2_data(df2)
# 问题2
analyze_catalyst_effects(df1)
# 问题3
max_yield_row, max_yield_low_temp_row = analyze_c4_olefin_yield(df1)
best_temp_per_catalyst = plot_c4_olefin_yield_comparison(df1)
analyze_catalyst_components_impact(df1)
additional_output_problem3(df1, max_yield_row, max_yield_low_temp_row, best_temp_per_catalyst)
polynomial_regression(df1, degree=2)
#问题4
additional_experiments_df = design_additional_experiments(df1)
print("\n新增实验设计方案:")
print(additional_experiments_df[['实验编号', '温度', 'Co负载量(wt%)', '装料比(Co/SiO2:HAP)', '乙醇浓度(ml/min)', '设计理由']])
# 新增可视化
plot_experiment_condition_distribution(df1)
plot_experiment_parameter_changes(additional_experiments_df)
plot_parameter_trend_lines(df1, additional_experiments_df)
plot_experiment_design_space(df1, additional_experiments_df)
plot_optimal_experiment_comparison(df1, additional_experiments_df)