How do I rename a data file

本文介绍两种在Oracle中移动或重命名数据文件的方法:通过ALTER DATABASE或ALTER TABLESPACE命令。详细步骤包括实例关闭、文件系统级的移动及数据库级别的更新。

在论坛看到一个帖子,说是renamedatafile报错了,第一感觉他的语法有问题,也很少用这个命令,就google了一下,发现语法没有问题,可能他的操作顺序有问题。因为rename的时候数据文件必须是offline的,或者数据库是mount状态。

在仔细想了想,以前也整理过一篇rename的文章,不过那篇blog的标题是移动数据文件,和今天的这篇的唯一区别就是在Rename的同时还改了下路径。还有就是这篇是英文的,就当练习一下英语吧。

Oracle移动数据文件的操作方法

http://blog.youkuaiyun.com/tianlesoftware/archive/2009/11/30/4899172.aspx

以下内容引自网络:

Datafilescanbemovedorrenamedusingoneoftwomethods:alterdatabaseoraltertablespace.

ThemaindifferencebetweenthemisthataltertablespaceonlyappliestodatafilesthatdonotcontaintheSYSTEMtablespace,activerollbacksegments,ortemporarysegments,buttheprocedurecanbeperformedwhiletheinstanceisstillrunning.Thealterdatabasemethodworksforanydatafile,buttheinstancemustbeshutdown.

1.Thealterdatabasemethod:

1.Shutdowntheinstance.

2.Renameand/ormovethedatafileusingoperatingsystemcommands.

3.Mountthedatabaseandusealterdatabasetorenamethefilewithinthedatabase.Afullyqualifiedfilenameisrequiredinthesyntaxofyouroperatingsystem.Forexampletorenameafilecalled'data01.dbf'to'data04.dbf'andmoveittoanewlocationatthesametime(atthispointintheexampletheinstancehasbeenshutdown)and;

4.Starttheinstance.

SVRMGR>connectsys/oracleassysdba;

SVRMGR>startupmountU1;

SVRMGR>alterdatabaserenamefile'/u01/oracle/U1/data01.dbf'TO'/u02/oracle/U1/data04.dbf';

SVRMGR>alterdatabaseopen;

Noticethesinglequotesaroundthefullyqualifiedfilenamesandremember,thefilesmustexistatthesourceanddestinationpaths.Theinstanceisnowopenusingthenewlocationandnameforthedatafile.

2.Thealtertablespacemethod:

Thismethodhastheadvantagethatitdoesn'trequireshuttingdowntheinstance,butitonlyworkswithnon-SYSTEMtablespaces.Further,itcan'tbeusedfortablespacesthatcontainactiverollbacksegmentsortemporarysegments.

1.Takethetablespaceoffline.

2.Renameand/ormovethedatafileusingoperatingsystemcommands.

3.Usethealtertablespacecommandtorenamethefileinthedatabase.

4.Bringthetablespacebackonline.

SVRMGR>connectsys/oracleassysdba

SVRMGR>altertablespaceapp_dataoffline;

SVRMGR>altertablespaceapp_daterenamedatafile'/u01/oracle/U1/data01.dbf'TO'/u02/oracle/U1/data04.dbf';

SVRMGR>altertablespaceapp_dataonline;

Thetablespacewillbebackonlineusingthenewnameand/orlocationofthedatafile.

BothofthesemethodologiescanbeusedwithinOracleEnterpriseManageralso.

<!--EndFragment-->
class FastbtiParser(IdvgParser): """_summary_ Args: IdvgParser (_type_): _description_ """ def update_data(self): """_summary_""" self.init_data() dev_num = self.args["test_cond"]["num"] # 2. update, divide by device number self.data["ID"] = self.data["ID"].divide(dev_num) col_map = {"TSTRESS": "TIME(s)", "ID": "ID(A)"} self.data = self.data.rename(columns=col_map) def parse_idsamp(self, data): """_summary_ Returns: _type_: _description_ """ idsamp = data[data["MTYPE"] == "Idsamp"].copy() vgg = JFETPARAMS["Vgg"] vdd = JFETPARAMS["Vdsat"] is_sat = (idsamp["VG"].iloc[0] == vgg) and (idsamp["VD"].iloc[0] == vdd) id_name = "Idlin" if is_sat: id_name = "Idsat" # calculate IdDegrad idsamp = idsamp.sort_values(by="TIME(s)", key=lambda x: pd.to_numeric(x)) idsamp[f"{id_name}Deg(NA)"] = abs(idsamp["ID(A)"] / idsamp["ID(A)"].iloc[0] - 1) idsamp = idsamp.rename(columns={"ID(A)": f"{id_name}(A)"}) return idsamp[["TIME(s)", f"{id_name}(A)", f"{id_name}Deg(NA)"]] def get_vth_cc(self, data, name): """_summary_ Args: data (_type_): _description_ name (_type_): _description_ Returns: _type_: _description_ """ res = [] # get Calculate the required value. W = self.args["tsk"]["W"] L = self.args["tsk"]["L"] Itgt = 1e-8 * W / L if name == "1nA": Itgt = 1e-9 for t, grp in data.groupby("TIME(s)", sort=False): s_data = grp.sort_values(by="VG") s_data = s_data[s_data["VG"].between(0.25, 2)] if Itgt < s_data["ID(A)"].iloc[0]: vth = s_data["VG"].iloc[0] res.append({"TIME(s)": t, f"Vt{name}(V)": vth}) elif Itgt > s_data["ID(A)"].iloc[-1]: vth = s_data["VG"].iloc[-1] res.append({"TIME(s)": t, f"Vt{name}(V)": vth}) else: # get interpolate try: itpl_id_vg = interp1d( s_data["ID(A)"], s_data["VG"], kind="slinear", fill_value="extrapolate", ) vth = itpl_id_vg(Itgt).item() res.append({"TIME(s)": t, f"Vt{name}(V)": round(vth, 6)}) except ValueError: pass res = pd.DataFrame(res) res = res.sort_values(by="TIME(s)", key=lambda x: pd.to_numeric(x)) res[f"Vt{name}Deg(mV)"] = ( res[f"Vt{name}(V)"] - res[f"Vt{name}(V)"].iloc[0] ) * 1000 res[f"Vt{name}Deg(mV)"] = res[f"Vt{name}Deg(mV)"].map(lambda x: round(x, 3)) return res def parse_idvg(self, data): """Get Vtlin/Vtsat from idvg Args: data (_type_): _description_ """ idvg = data[data["MTYPE"] == "IdVg"].copy() vdd = JFETPARAMS["Vdsat"] vdlin = JFETPARAMS["Vdlin"] lin_data = idvg[idvg["VD"] == vdlin] lins = self.get_vth_cc(lin_data, "lin") vt1nAs = self.get_vth_cc(lin_data, "1nA") lins = lins.merge(vt1nAs, on="TIME(s)", how="outer") # for vtsat if necessary sats = None sat_data = idvg[idvg["VD"] == vdd] if not sat_data.empty: sats = self.get_vth_cc(sat_data, "sat") if sats: lins = lins.merge(sats, on="TIME(s)", how="outer") return lins def process(self): """_summary_""" # update data, pop cate and dio_type from test_cond self.update_data() # result and raw datafile prop_file = f"{self.args['dev_id']}_result" raw_file = f"{self.args['dev_id']}_{self.args['tt']}" # group condition grps = ["TEMP", "SVG", "DUTY", "PERIOD", "VGB"] if self.args["tt"] == "fasthci": grps = ["TEMP", "SVG", "SVD", "DUTY", "PERIOD"] for grp_val, grp_data in self.data.groupby(grps, sort=False): # 2.1 update test_cond self.args["test_cond"].update(dict(zip(grps, grp_val))) # 2.2 save raw data raw = grp_data.rename(columns={"TIME(s)": "TIME", "VG": "VG(V)"}) self.build_save_info( raw, raw_file, by_cols=["MTYPE", "TIME", "VG(V)", "VD"] ) # 2.3 parse idsampling res = self.parse_idsamp(grp_data) # 2.4 parse IdVg for Vth idvg = self.parse_idvg(grp_data) res = res.merge(idvg, how="outer", on="TIME(s)") self.build_save_info(res, prop_file, by_cols=["TIME(s)"]) 帮我分析这个代码的逻辑,并注释
06-18
import os import pandas as pd import numpy as np from sklearn.ensemble import RandomForestRegressor from sklearn.preprocessing import StandardScaler from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score from sklearn.model_selection import train_test_split import joblib # -------------------------- 1. 路径定义与基础工具函数 -------------------------- # 固定数据路径 data_dir = r"./" file_paths = { "a_raw": os.path.join(data_dir, "a站点_风廓线原始数据.csv"), "a_mwr": os.path.join(data_dir, "a站点_微波辐射计数据.csv"), "b_raw": os.path.join(data_dir, "b站点_风廓线原始数据.csv"), "b_mwr": os.path.join(data_dir, "b站点_微波辐射计数据.csv") } # 检查文件存在性 for name, path in file_paths.items(): if not os.path.exists(path): raise FileNotFoundError(f"关键数据文件缺失: {path}") # 自动检测文件分隔符 def detect_delimiter(file_path, sample_lines=5): import csv with open(file_path, "r", encoding="utf-8-sig") as f: sample = "".join([f.readline() for _ in range(sample_lines) if f.readline()]) delimiters = [",", ";", "\t", "|"] counts = {d: sample.count(d) for d in delimiters} return max(counts, key=counts.get) # 缺测值处理函数 def handle_missing(data, fill_method="linear"): # 风廓线缺测:空值/9999.0;微波辐射计缺测:无(题目数据无异常码) data = data.replace(9999.0, np.nan) if fill_method == "linear": return data.interpolate(method="linear", axis=0).fillna(method="bfill").fillna(method="ffill") return data.dropna() # -------------------------- 2. 数据预处理模块 -------------------------- class DataPreprocessor: def __init__(self): # 风廓线5波束参数 self.beam_params = { 1: {"azimuth": 90.0, "elevation": 15.0}, # 东波束 2: {"azimuth": 180.0, "elevation": 15.0}, # 南波束 3: {"azimuth": 270.0, "elevation": 15.0}, # 西波束 4: {"azimuth": 0.0, "elevation": 15.0}, # 北波束 5: {"azimuth": 0.0, "elevation": 90.0} # 天顶波束 } def process_mwr(self, mwr_path): """处理微波辐射计数据,提取温度廓线用于位温计算""" delimiter = detect_delimiter(mwr_path) mwr_df = pd.read_csv(mwr_path, delimiter=delimiter) # 筛选温度廓线,保留时间和高度列 temp_profile = mwr_df.copy() if "廓线类型" in mwr_df.columns: temp_profile = mwr_df[mwr_df["廓线类型"].str.contains("温度", na=False)].copy() # 提取高度列(列名含"km"且是数字开头)并转换为米 height_cols = [] for col in temp_profile.columns: # 只保留以数字开头且包含km的列(排除CloudBase等非高度列) if "km" in col and col[0].isdigit(): height_cols.append(col) # 处理高度列 for col in height_cols: try: # 提取数字部分并转换为米 height_str = col.replace("(km)", "").strip() height_m = float(height_str) * 1000 # km→m temp_profile.rename(columns={col: f"Temp_{int(height_m)}m"}, inplace=True) except ValueError: print(f"跳过无法转换的列: {col}") continue # 时间格式标准化(保留到分钟,与风廓线同步) if "DateTime" in temp_profile.columns: temp_profile["DateTime"] = pd.to_datetime(temp_profile["DateTime"], errors='coerce').dt.floor("min") elif "时间" in temp_profile.columns: temp_profile["DateTime"] = pd.to_datetime(temp_profile["时间"], errors='coerce').dt.floor("min") else: # 如果没有时间列,创建一个默认时间列 temp_profile["DateTime"] = pd.date_range(start="2025-09-21 00:00:00", periods=len(temp_profile), freq="6min") # 移除时间为空的行 temp_profile = temp_profile.dropna(subset=["DateTime"]) # 选择需要保留的列 keep_cols = ["DateTime"] if "SurPre(hPa)" in temp_profile.columns: keep_cols.append("SurPre(hPa)") elif "地面气压(hPa)" in temp_profile.columns: keep_cols.append("地面气压(hPa)") keep_cols.extend([col for col in temp_profile.columns if "Temp_" in col]) return temp_profile[keep_cols] def process_wind_profile(self, wind_path): """处理风廓线原始数据,反演三维风场""" delimiter = detect_delimiter(wind_path) wind_df = pd.read_csv(wind_path, delimiter=delimiter) # 筛选低模式数据(低空湍流核心关注),剔除无效信噪比 if "模式" in wind_df.columns: wind_df = wind_df[wind_df["模式"] == "低模式"].copy() wind_df = handle_missing(wind_df, fill_method="linear") # 数据类型转换 height_col = "采样高度(m)" if "采样高度(m)" in wind_df.columns else "高度(m)" wind_df[height_col] = pd.to_numeric(wind_df[height_col], errors="coerce") speed_col = "径向速度(m/s)" if "径向速度(m/s)" in wind_df.columns else "速度(m/s)" wind_df[speed_col] = pd.to_numeric(wind_df[speed_col], errors="coerce") width_col = "速度谱宽(m/s)" if "速度谱宽(m/s)" in wind_df.columns else "谱宽(m/s)" wind_df[width_col] = pd.to_numeric(wind_df[width_col], errors="coerce") # 反演三维风场(u:东西向,v:南北向,w:垂直向) wind_df = self._invert_3d_wind(wind_df, height_col, speed_col, width_col) # 处理时间列 if "DateTime" in wind_df.columns: wind_df["DateTime"] = pd.to_datetime(wind_df["DateTime"], errors='coerce').dt.floor("min") elif "时间" in wind_df.columns: wind_df["DateTime"] = pd.to_datetime(wind_df["时间"], errors='coerce').dt.floor("min") else: # 如果没有时间列,创建一个默认时间列 wind_df["DateTime"] = pd.date_range(start="2025-09-21 00:00:00", periods=len(wind_df), freq="6min") # 移除时间为空的行 wind_df = wind_df.dropna(subset=["DateTime"]) # 重命名列名以保持一致性 wind_df = wind_df.rename(columns={ height_col: "采样高度(m)", speed_col: "径向速度(m/s)", width_col: "速度谱宽(m/s)" }) snr_col = "信噪比(dB)" if "信噪比(dB)" in wind_df.columns else "SNR(dB)" if snr_col not in wind_df.columns: # 如果没有信噪比列,创建一个默认值 wind_df[snr_col] = 30.0 # 假设默认信噪比为30dB return wind_df[["DateTime", "采样高度(m)", "u", "v", "w", "速度谱宽(m/s)", snr_col]] def _invert_3d_wind(self, wind_df, height_col, speed_col, width_col): """基于多波束径向速度反演三维风场""" # 按高度分组,每个高度下5波束数据联立方程 wind_list = [] for height, group in wind_df.groupby(height_col): if len(group) < 5: continue # 波束数据不全,跳过 # 构建系数矩阵A和观测向量b A = [] b = [] for _, row in group.iterrows(): try: # 提取波束编号 if "波束" in row: beam_str = str(row["波束"]) beam_id = int(''.join(filter(str.isdigit, beam_str))) if beam_id < 1 or beam_id > 5: continue else: # 如果没有波束列,假设按顺序是1-5波束 beam_id = len(A) + 1 az = np.radians(self.beam_params[beam_id]["azimuth"]) el = np.radians(self.beam_params[beam_id]["elevation"]) A.append([np.sin(el)*np.cos(az), np.sin(el)*np.sin(az), np.cos(el)]) b.append(row[speed_col]) except (ValueError, KeyError): continue # 最小二乘解u, v, w if len(A) >= 3: # 至少需要3个波束才能求解 A = np.array(A) b = np.array(b) try: x, _, _, _ = np.linalg.lstsq(A, b, rcond=None) row = group.iloc[0].copy() row["u"] = x[0] row["v"] = x[1] row["w"] = x[2] wind_list.append(row) except np.linalg.LinAlgError: continue return pd.DataFrame(wind_list) def sync_data(self, wind_df, mwr_df): """时间-高度双维度数据匹配""" # 1. 时间同步:微波辐射计重采样到风廓线时间点 mwr_sync = mwr_df.set_index("DateTime").resample("6min").mean().reset_index() # 确保时间列有效 if mwr_sync["DateTime"].isna().any(): mwr_sync = mwr_sync.dropna(subset=["DateTime"]) if mwr_sync.empty: raise ValueError("微波辐射计数据处理后为空,请检查数据格式") # 2. 高度匹配:提取风廓线高度对应的温度 merged_data = [] # 转换微波时间为numpy数组(兼容旧版pandas) mwr_times = mwr_sync["DateTime"].values for _, wind_row in wind_df.iterrows(): # 获取风廓线数据时间 wind_time = wind_row["DateTime"] if pd.isna(wind_time): continue # 匹配同时间的微波数据 time_mask = mwr_sync["DateTime"] == wind_time mwr_matched = mwr_sync[time_mask] if mwr_matched.empty: # 如果没有完全匹配的时间,找最近的 # 计算时间差(转换为数值进行比较) # 将时间转换为Unix时间戳(秒) mwr_ts = mwr_sync["DateTime"].apply(lambda x: x.timestamp()).values wind_ts = wind_time.timestamp() time_diff = np.abs(mwr_ts - wind_ts) closest_idx = np.argmin(time_diff) mwr_matched = mwr_sync.iloc[[closest_idx]] # 匹配最接近的高度温度(风廓线高度→微波辐射计高度) wind_h = wind_row["采样高度(m)"] mwr_temp_cols = [col for col in mwr_matched.columns if "Temp_" in col] if not mwr_temp_cols: # 如果没有温度列,跳过 continue mwr_heights = [int(col.replace("Temp_", "").replace("m", "")) for col in mwr_temp_cols] closest_h_idx = np.argmin(np.abs(np.array(mwr_heights) - wind_h)) closest_temp_col = mwr_temp_cols[closest_h_idx] # 获取地面气压 sur_pre = None if "SurPre(hPa)" in mwr_matched.columns: sur_pre = mwr_matched["SurPre(hPa)"].values[0] elif "地面气压(hPa)" in mwr_matched.columns: sur_pre = mwr_matched["地面气压(hPa)"].values[0] if sur_pre is None or np.isnan(sur_pre): continue # 获取温度值 temp_value = mwr_matched[closest_temp_col].values[0] if np.isnan(temp_value): continue # 合并数据 merged_row = { "DateTime": wind_row["DateTime"], "Height(m)": wind_h, "u": wind_row["u"], "v": wind_row["v"], "w": wind_row["w"], "Sigma": wind_row["速度谱宽(m/s)"], "SNR": wind_row[[col for col in wind_row.index if "信噪比" in col or "SNR" in col][0]], "Temp(℃)": temp_value, "SurPre(hPa)": sur_pre } merged_data.append(merged_row) result_df = pd.DataFrame(merged_data) if result_df.empty: print("警告:数据同步后为空,可能是因为时间或高度匹配失败") return result_df # -------------------------- 3. 特征工程与模型构建 -------------------------- class TurbulenceModel: def __init__(self): self.g = 9.8 # 重力加速度 self.model_a = RandomForestRegressor(n_estimators=200, max_depth=10, random_state=42) self.model_b = RandomForestRegressor(n_estimators=200, max_depth=10, random_state=42) self.scaler_a = StandardScaler() # 模型a专用标准化器 self.scaler_b = StandardScaler() # 模型b专用标准化器 def calculate_merged_features(self, data): """计算融合数据的特征(包含温度相关特征)""" if data.empty: raise ValueError("输入数据为空,无法计算特征") data = data.copy() # 1. 位温θ(K):θ = T*(1000/P)^0.286(T转换为开尔文) data["Temp(K)"] = data["Temp(℃)"] + 273.15 data["Theta"] = data["Temp(K)"] * (1000 / data["SurPre(hPa)"]) ** 0.286 # 2. 垂直梯度(相邻高度差) data = data.sort_values(["DateTime", "Height(m)"]) for col in ["u", "v", "Theta"]: data[f"d{col}_dz"] = data.groupby("DateTime")[col].diff() / data.groupby("DateTime")["Height(m)"].diff() # 3. 理查逊数Ri data["Ri"] = (self.g / data["Theta"] * data["dTheta_dz"]) / (data["du_dz"]**2 + data["dv_dz"]**2) data["Ri"] = data["Ri"].replace([np.inf, -np.inf], np.nan).fillna(0) # 4. 湍流动能TKE(目标变量:σ→TKE=3σ²/2) data["TKE"] = 1.5 * (data["Sigma"] ** 2) return handle_missing(data) def calculate_wind_only_features(self, data): """计算仅风廓线数据的特征(不包含温度相关特征)""" if data.empty: raise ValueError("输入数据为空,无法计算特征") data = data.copy() # 1. 垂直梯度(仅基于风场) data = data.sort_values(["DateTime", "Height(m)"]) for col in ["u", "v", "w"]: data[f"d{col}_dz"] = data.groupby("DateTime")[col].diff() / data.groupby("DateTime")["Height(m)"].diff() # 2. 湍流动能TKE(目标变量:σ→TKE=3σ²/2) data["TKE"] = 1.5 * (data["Sigma"] ** 2) return handle_missing(data) def train_model_a(self, merged_data): """模型a:融合风廓线+微波辐射计数据""" # 计算融合特征 feature_data = self.calculate_merged_features(merged_data) # 特征与目标变量 features_a = ["u", "v", "w", "Sigma", "SNR", "Ri", "dTheta_dz", "du_dz", "dv_dz"] # 确保所有特征都存在 missing_features = [f for f in features_a if f not in feature_data.columns] if missing_features: print(f"警告:模型a缺少特征 {missing_features},将使用可用特征") features_a = [f for f in features_a if f in feature_data.columns] X = feature_data[features_a] y = feature_data["TKE"] # 标准化与训练 X_scaled = self.scaler_a.fit_transform(X) X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42) self.model_a.fit(X_train, y_train) # 预测与评估 y_pred = self.model_a.predict(X_test) metrics_a = { "RMSE": np.sqrt(mean_squared_error(y_test, y_pred)), "MAE": mean_absolute_error(y_test, y_pred), "R2": r2_score(y_test, y_pred) } print("模型a(融合数据)评估指标:", metrics_a) # 返回带预测结果的完整数据 feature_data["TKE_a"] = self.model_a.predict(self.scaler_a.transform(X)) return feature_data, metrics_a def train_model_b(self, wind_data, model_a_result): """模型b:仅风廓线数据,以模型a结果为标准""" # 计算仅风数据的特征 wind_feature_data = self.calculate_wind_only_features(wind_data) # 特征(无微波相关特征) features_b = ["u", "v", "w", "Sigma", "SNR", "du_dz", "dv_dz", "dw_dz"] # 确保所有特征都存在 missing_features = [f for f in features_b if f not in wind_feature_data.columns] if missing_features: print(f"警告:模型b缺少特征 {missing_features},将使用可用特征") features_b = [f for f in features_b if f in wind_feature_data.columns] X_b = wind_feature_data[features_b] # 目标变量:模型a的TKE结果(按高度和时间匹配) # 创建匹配键:时间+高度 model_a_result["match_key"] = model_a_result["DateTime"].astype(str) + "_" + model_a_result["Height(m)"].astype(str) wind_feature_data["match_key"] = wind_feature_data["DateTime"].astype(str) + "_" + wind_feature_data["Height(m)"].astype(str) # 合并获取目标值 merged_for_target = pd.merge( wind_feature_data[["match_key"]], model_a_result[["match_key", "TKE_a"]], on="match_key", how="left" ) y_b = merged_for_target["TKE_a"].values # 处理可能的NaN值 valid_mask = ~np.isnan(y_b) if np.sum(valid_mask) < 10: # 确保有足够的有效数据 raise ValueError("模型b训练数据不足,请检查数据匹配情况") X_b = X_b[valid_mask] y_b = y_b[valid_mask] # 标准化与训练 X_b_scaled = self.scaler_b.fit_transform(X_b) X_train_b, X_test_b, y_train_b, y_test_b = train_test_split(X_b_scaled, y_b, test_size=0.2, random_state=42) self.model_b.fit(X_train_b, y_train_b) # 预测与优化 y_pred_b = self.model_b.predict(X_test_b) metrics_b = { "RMSE": np.sqrt(mean_squared_error(y_test_b, y_pred_b)), "MAE": mean_absolute_error(y_test_b, y_pred_b), "R2": r2_score(y_test_b, y_pred_b) } print("模型b(仅风廓线)初始评估指标:", metrics_b) # 结果合并 wind_feature_data["TKE_b"] = np.nan wind_feature_data.loc[valid_mask, "TKE_b"] = self.model_b.predict(self.scaler_b.transform(X_b)) return wind_feature_data, metrics_b # -------------------------- 4. 主执行流程 -------------------------- if __name__ == "__main__": try: # 1. 数据预处理 preprocessor = DataPreprocessor() print("正在处理微波辐射计数据...") mwr_data = preprocessor.process_mwr(file_paths["a_mwr"]) print("正在处理风廓线数据...") wind_data = preprocessor.process_wind_profile(file_paths["a_raw"]) # 重命名风数据列以保持一致性 wind_data_renamed = wind_data.rename(columns={ "采样高度(m)": "Height(m)", "速度谱宽(m/s)": "Sigma", [col for col in wind_data.columns if "信噪比" in col or "SNR" in col][0]: "SNR" }) print("正在同步数据...") merged_data = preprocessor.sync_data(wind_data, mwr_data) if merged_data.empty: raise ValueError("数据同步后为空,请检查输入数据格式") # 2. 特征计算与模型训练 turbulence_model = TurbulenceModel() # 模型a训练(使用融合数据) print("正在训练模型a...") model_a_result, metrics_a = turbulence_model.train_model_a(merged_data) # 模型b训练与优化(仅使用风数据) print("正在训练模型b...") model_b_result, metrics_b = turbulence_model.train_model_b( wind_data=wind_data_renamed, model_a_result=model_a_result ) # 3. 结果保存 # 3.1 模型评估指标 metrics_df = pd.DataFrame({ "模型": ["模型a(融合数据)", "模型b(仅风廓线)"], "RMSE": [metrics_a["RMSE"], metrics_b["RMSE"]], "MAE": [metrics_a["MAE"], metrics_b["MAE"]], "R²": [metrics_a["R2"], metrics_b["R2"]] }) metrics_df.to_csv(os.path.join(data_dir, "模型评估指标.csv"), index=False, encoding="utf-8-sig") # 3.2 湍流强度廓线结果(按高度排序) # 确保两个结果有相同的匹配键 model_a_result = model_a_result[["match_key", "Height(m)", "TKE_a"]].drop_duplicates() model_b_result = model_b_result[["match_key", "Height(m)", "TKE_b"]].drop_duplicates() profile_result = pd.merge( model_a_result[["Height(m)", "TKE_a"]].rename(columns={"TKE_a": "湍流强度_模型a"}), model_b_result[["Height(m)", "TKE_b"]].rename(columns={"TKE_b": "湍流强度_模型b"}), on="Height(m)", how="inner" ).sort_values("Height(m)") profile_result.to_csv(os.path.join(data_dir, "湍流强度廓线结果.csv"), index=False, encoding="utf-8-sig") # 3.3 模型参数保存 joblib.dump(turbulence_model.model_b, os.path.join(data_dir, "优化后模型b.pkl")) print("="*50) print("问题一计算完成!输出文件:") print(f"1. 模型评估指标.csv -> {data_dir}") print(f"2. 湍流强度廓线结果.csv -> {data_dir}") print(f"3. 优化后模型b.pkl -> {data_dir}") print("="*50) except Exception as e: print(f"执行过程中出错: {str(e)}") # 打印详细错误信息 import traceback traceback.print_exc() 加入可视化模块
09-25
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值