一个用于处理分隔","的函数tf_split_str

本文详细介绍了一种SQL函数的设计与实现,该函数用于解析逗号分隔的字符串,并将其映射到对应的类型名称上。通过具体的表结构与数据插入操作,展示了如何创建测试用例。此外,还提供了一个实用的函数示例,该函数能够从定义好的类型表中查找并返回相应的类型名称。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

--1. 建立测试数据
--
1.1 create table TypeAdv
CREATE TABLE TypeAdv 
(
    id 
int,
    TypeName 
varchar(200)
)
--1.2 insert data
insert into TypeAdv(id,TypeName) values(1,'电脑网络')
insert into TypeAdv(id,TypeName) values(2,'游戏动漫')
insert into TypeAdv(id,TypeName) values(3,'软件下载')
insert into TypeAdv(id,TypeName) values(4,'商务商铺')
insert into TypeAdv(id,TypeName) values(5,'建站服务')
insert into TypeAdv(id,TypeName) values(6,'门户综合')
insert into TypeAdv(id,TypeName) values(7,'影视音乐')
insert into TypeAdv(id,TypeName) values(8,'休闲娱乐')
insert into TypeAdv(id,TypeName) values(9,'生活资讯')
insert into TypeAdv(id,TypeName) values(10,'文学小说')
    
--1.3 create table TypeAdv
CREATE TABLE GroupAdv 
(
    id 
int,
    GroupName 
varchar(200),
    IntentionSet 
varchar(200)
)
--1.4 insert data
insert into GroupAdv(id,GroupName,IntentionSet) values(5,'广告分组一','1,2,3,4,5,6,7,10')
insert into GroupAdv(id,GroupName,IntentionSet) values(6,'广告分组二','1,2,3,4,5,6')
--2. 创建分隔“,”的函数
CREATE FUNCTION tf_split_str
(
    
@string        NVARCHAR(4000)
)
RETURNS varchar(4000)
AS
BEGIN
    
DECLARE @object_id     nvarchar(400)
    
DECLARE    @i          INT
    
DECLARE    @len        INT
    
declare @return     varchar(400)
    
set @return=''

    
IF (@string IS NULLOR (LTRIM(@string= '')
        
RETURN @return
    
    
WHILE CHARINDEX(',',@string> 0
        
BEGIN
            
SET @len = LEN(@string)
            
SET @i = CHARINDEX(','@string)
            
SET @object_id = LEFT(@string@i-1)
            
select @return = @return+','+TypeName from TypeAdv where id = @object_id
            
SET @string = RIGHT(@string@len - @i)

        
END

    
SET @object_id = @string
    
select @return = @return+','+TypeName from TypeAdv where id = @object_id
    
set @return = stuff(@return,1,1,'')
    
RETURN @return
END
--3.测试
select b.id,b.GroupName,dbo.tf_split_str(b.IntentionSet) from GroupAdv b
import os import matplotlib.pylab as plt import tensorflow as tf import pathlib # 在URL上下载数据集 flowers_root = tf.keras.utils.get_file( 'flower_photos', 'https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz', untar=True) # untar=True表示自动解压下载的压缩包 flowers_root = pathlib.Path(flowers_root) print("看一下flowers_root的路径:", flowers_root) # 获取每个类下的文件数据 list_ds = tf.data.Dataset.list_files(str(flowers_root / '*/*')) def parse_image(filename): parts = tf.strings.split(filename, os.sep) # 分割数据,使用 tf.strings.split 函数将文件路径 filename 按照操作系统的路径分隔符 os.sep 进行分割 label = parts[-2] image = tf.io.read_file(filename) # 读取并输出输入文件名的全部内容 image = tf.image.decode_jpeg(image) # 编码解码处理 image = tf.image.convert_image_dtype(image, tf.float32) # 转换为float类型 image = tf.image.resize(image, [128, 128]) # 尺寸调整为128*128 return image, label file_path = next(iter(list_ds)) # 找到文件路径 image, label = parse_image(file_path) print('image的内容', image) print('***********************************************') print('label的内容', label) # 自定义函数绘制图像图形 plt.figure() plt.imshow(image) plt.title(label.numpy().decode('utf-8')) plt.axis('off') plt.show() images_ds = list_ds.map(parse_image) for image, label in images_ds.take(1): plt.figure() plt.imshow(image) plt.title(label.numpy().decode('utf-8')) plt.axis('off') plt.show() 检查错误
03-20
特征提取环节调用 CountVectorizer 函数将文本中的词语转换为词频矩阵,矩 阵中的元素 a[i][j]表示 j 词在 i 类文本下的词频;调用 TfidfTransformer 函数计算 TF-IDF 权值并转化为矩阵,矩阵中元素 w[i][j]表示 j 词在 i 类文本中的 TF-IDF 权重。由于本节选取 4 分类,因此,选用 4 个中心点。随后进行模型的训练,调用 fit 函数将数据输入到分类器中,训练完成后保存模型,并查看训练集的准确率分 类情况。 import pandas as pd import re import jieba import numpy as np import matplotlib.pyplot as plt from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.cluster import KMeans from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score, v_measure_score from sklearn.model_selection import train_test_split from sklearn.decomposition import TruncatedSVD from sklearn.pipeline import make_pipeline from sklearn.preprocessing import Normalizer # --------------------- 1. 数据读取与预处理 --------------------- try: df = pd.read_csv(r'C:\Users\陈梓沂的小伙伴\Desktop\自然语言处理\自然语言处理\课设\文本聚类\news.csv') df.columns = ['text', 'category'] except FileNotFoundError: raise FileNotFoundError("错误:数据文件不存在,请检查路径是否正确!") except pd.errors.ParserError as e: raise RuntimeError(f"解析失败,请检查分隔符是否正确:{e}") except ValueError: raise ValueError("数据格式错误:至少需要两列(文本和类别)") def preprocess_text(text): """文本清洗、分词、去停用词""" if not isinstance(text, str): return '' # 去除HTML标签、URL等特殊内容 text = re.sub(r'<[^>]+>', '', text) # 移除HTML标签 text = re.sub(r'https?://\S+', '', text) # 移除URL text = re.sub(r'[^\u4e00-\u9fa5a-zA-Z0-9]', ' ', text) # 保留中英文和数字 # 分词 words = jieba.cut(text, cut_all=False) # 加载停用词 try: with open(r'C:\Users\陈梓沂的小伙伴\Desktop\自然语言处理\自然语言处理\课设\文本聚类\stopword1.txt', 'r', encoding='utf-8-sig') as f: stopwords = {line.strip() for line in f if line.strip()} except Exception: stopwords = {'的', '了', '在', '是', '我', '有', '和', '就', '不', '人'} # 过滤停用词和短词 filtered_words = [word for word in words if word not in stopwords and len(word) > 1] return ' '.join(filtered_words) df['clean_text'] = df['text'].apply(preprocess_text) # 划分训练集和测试集 (80%训练, 20%测试) train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['category']) # --------------------- 2. 特征提取 --------------------- # 使用TF-IDF提取特征,调整参数以优化特征质量 tfidf = TfidfVectorizer( max_features=2000, # 增加特征数量 min_df=5, # 忽略出现少于5次的词 max_df=0.8, # 忽略出现在80%以上文档中的词 ngram_range=(1, 2) # 考虑一元和二元词组 ) # 在训练集上拟合TF-IDF X_train = tfidf.fit_transform(train_df['clean_text']) X_test = tfidf.transform(test_df['clean_text']) # 使用SVD降维 (可选,可提高聚类性能) svd = TruncatedSVD(100) # 降到100维 normalizer = Normalizer(copy=False) lsa = make_pipeline(svd, normalizer) X_train_lsa = lsa.fit_transform(X_train) X_test_lsa = lsa.transform(X_test) # --------------------- 3. 确定最佳聚类数 --------------------- # 使用肘部法则确定最佳聚类数 distortions = [] K_range = range(2, 15) for k in K_range: kmeans = KMeans(n_clusters=k, random_state=42, n_init=10) kmeans.fit(X_train_lsa) distortions.append(kmeans.inertia_) # 绘制肘部图 plt.figure(figsize=(10, 6)) plt.plot(K_range, distortions, 'bx-') plt.xlabel('聚类数量 (k)') plt.ylabel('距离平方和') plt.title('肘部法则确定最佳聚类数') plt.grid(True) plt.savefig(r'C:\Users\陈梓沂的小伙伴\Desktop\自然语言处理\自然语言处理\课设\文本聚类\elbow_plot.png') plt.close() # 自动选择拐点 (当下降幅度小于阈值时) diff = np.diff(distortions) diff_r = diff[1:] / diff[:-1] best_k = np.argmin(diff_r) + 3 # +3 因为从k=2开始 print(f"根据肘部法则确定的最佳聚类数: {best_k}") # --------------------- 4. 聚类模型训练与预测 --------------------- # 在训练集上训练聚类模型 kmeans = KMeans(n_clusters=best_k, random_state=42, n_init=10) kmeans.fit(X_train_lsa) # 在训练集和测试集上进行预测 train_df['cluster'] = kmeans.predict(X_train_lsa) test_df['cluster'] = kmeans.predict(X_test_lsa) # --------------------- 5. 结果保存 --------------------- # 保存完整结果 df_result = pd.concat([train_df, test_df]) df_result[['text', 'category', 'cluster']].to_csv( r'C:\Users\陈梓沂的小伙伴\Desktop\自然语言处理\自然语言处理\课设\文本聚类\news_clustered.csv', index=False, encoding='utf-8-sig' ) # --------------------- 6. 模型评估 --------------------- # 将类别标签转换为数值 train_df['category_code'] = pd.Categorical(train_df['category']).codes test_df['category_code'] = pd.Categorical(test_df['category']).codes # 在测试集上评估模型性能 test_metrics = { 'ARI(调整兰德系数)': adjusted_rand_score(test_df['category_code'], test_df['cluster']), 'NMI(标准化互信息)': normalized_mutual_info_score(test_df['category_code'], test_df['cluster']), 'V-measure': v_measure_score(test_df['category_code'], test_df['cluster']) } # 在训练集上评估模型性能(参考) train_metrics = { 'ARI(调整兰德系数)': adjusted_rand_score(train_df['category_code'], train_df['cluster']), 'NMI(标准化互信息)': normalized_mutual_info_score(train_df['category_code'], train_df['cluster']), 'V-measure': v_measure_score(train_df['category_code'], train_df['cluster']) } print("\n--- 训练集评估结果 ---") for name, value in train_metrics.items(): print(f"{name}:{value:.4f}") print("\n--- 测试集评估结果 ---") for name, value in test_metrics.items(): print(f"{name}:{value:.4f}") # 保存评估结果 eval_df = pd.DataFrame({ '指标': list(test_metrics.keys()), '训练集': list(train_metrics.values()), '测试集': list(test_metrics.values()) }) eval_df.to_csv( r'C:\Users\陈梓沂的小伙伴\Desktop\自然语言处理\自然语言处理\课设\文本聚类\evaluation_results.csv', index=False, encoding='utf-8-sig' )
06-17
import tkinter as tk from tkinter import ttk, filedialog, messagebox import pandas as pd import numpy as np import matplotlib as mpl import matplotlib.pyplot as plt from matplotlib.font_manager import FontProperties from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg from sklearn.preprocessing import MinMaxScaler import tensorflow as tf from tensorflow.keras.models import Sequential from tensorflow.keras.layers import LSTM, Dense from tensorflow.keras.optimizers import Adam from tensorflow.keras.callbacks import EarlyStopping import os plt.rcParams['font.sans-serif'] = ['SimHei'] # 使用黑体 plt.rcParams['axes.unicode_minus'] = False class DamSeepageModel: def __init__(self, root): self.root = root self.root.title("大坝渗流预测模型") self.root.geometry("1200x800") # 初始化数据 self.train_df = None self.test_df = None self.model = None self.scaler = MinMaxScaler(feature_range=(0, 1)) # 创建主界面 self.create_widgets() def create_widgets(self): # 创建主框架 main_frame = ttk.Frame(self.root, padding=10) main_frame.pack(fill=tk.BOTH, expand=True) # 左侧控制面板 control_frame = ttk.LabelFrame(main_frame, text="模型控制", padding=10) control_frame.pack(side=tk.LEFT, fill=tk.Y, padx=5, pady=5) # 文件选择部分 file_frame = ttk.LabelFrame(control_frame, text="数据文件", padding=10) file_frame.pack(fill=tk.X, pady=5) # 训练集选择 ttk.Label(file_frame, text="训练集:").grid(row=0, column=0, sticky=tk.W, pady=5) self.train_file_var = tk.StringVar() ttk.Entry(file_frame, textvariable=self.train_file_var, width=30, state='readonly').grid(row=0, column=1, padx=5) ttk.Button(file_frame, text="选择文件", command=lambda: self.select_file("train")).grid(row=0, column=2) # 测试集选择 ttk.Label(file_frame, text="测试集:").grid(row=1, column=0, sticky=tk.W, pady=5) self.test_file_var = tk.StringVar() ttk.Entry(file_frame, textvariable=self.test_file_var, width=30, state='readonly').grid(row=1, column=1, padx=5) ttk.Button(file_frame, text="选择文件", command=lambda: self.select_file("test")).grid(row=1, column=2) # 参数设置部分 param_frame = ttk.LabelFrame(control_frame, text="模型参数", padding=10) param_frame.pack(fill=tk.X, pady=10) # 时间窗口大小 ttk.Label(param_frame, text="时间窗口大小:").grid(row=0, column=0, sticky=tk.W, pady=5) self.window_size_var = tk.IntVar(value=60) ttk.Spinbox(param_frame, from_=10, to=200, increment=5, textvariable=self.window_size_var, width=10).grid(row=0, column=1, padx=5) # LSTM单元数量 ttk.Label(param_frame, text="LSTM单元数:").grid(row=1, column=0, sticky=tk.W, pady=5) self.lstm_units_var = tk.IntVar(value=50) ttk.Spinbox(param_frame, from_=10, to=200, increment=10, textvariable=self.lstm_units_var, width=10).grid(row=1, column=1, padx=5) # 训练轮次 ttk.Label(param_frame, text="训练轮次:").grid(row=2, column=0, sticky=tk.W, pady=5) self.epochs_var = tk.IntVar(value=100) ttk.Spinbox(param_frame, from_=10, to=500, increment=10, textvariable=self.epochs_var, width=10).grid(row=2, column=1, padx=5) # 批处理大小 ttk.Label(param_frame, text="批处理大小:").grid(row=3, column=0, sticky=tk.W, pady=5) self.batch_size_var = tk.IntVar(value=32) ttk.Spinbox(param_frame, from_=16, to=128, increment=16, textvariable=self.batch_size_var, width=10).grid(row=3, column=1, padx=5) # 控制按钮 btn_frame = ttk.Frame(control_frame) btn_frame.pack(fill=tk.X, pady=10) ttk.Button(btn_frame, text="训练模型", command=self.train_model).pack(side=tk.LEFT, padx=5) ttk.Button(btn_frame, text="预测结果", command=self.predict).pack(side=tk.LEFT, padx=5) ttk.Button(btn_frame, text="保存结果", command=self.save_results).pack(side=tk.LEFT, padx=5) ttk.Button(btn_frame, text="重置", command=self.reset).pack(side=tk.RIGHT, padx=5) # 状态栏 self.status_var = tk.StringVar(value="就绪") status_bar = ttk.Label(control_frame, textvariable=self.status_var, relief=tk.SUNKEN, anchor=tk.W) status_bar.pack(fill=tk.X, side=tk.BOTTOM) # 右侧结果显示区域 result_frame = ttk.Frame(main_frame) result_frame.pack(side=tk.RIGHT, fill=tk.BOTH, expand=True, padx=5, pady=5) # 创建标签页 self.notebook = ttk.Notebook(result_frame) self.notebook.pack(fill=tk.BOTH, expand=True) # 损失曲线标签页 self.loss_frame = ttk.Frame(self.notebook) self.notebook.add(self.loss_frame, text="训练损失") # 预测结果标签页 self.prediction_frame = ttk.Frame(self.notebook) self.notebook.add(self.prediction_frame, text="预测结果") # 初始化绘图区域 self.fig, self.ax = plt.subplots(figsize=(10, 6)) self.canvas = FigureCanvasTkAgg(self.fig, master=self.prediction_frame) self.canvas.get_tk_widget().pack(fill=tk.BOTH, expand=True) self.loss_fig, self.loss_ax = plt.subplots(figsize=(10, 4)) self.loss_canvas = FigureCanvasTkAgg(self.loss_fig, master=self.loss_frame) self.loss_canvas.get_tk_widget().pack(fill=tk.BOTH, expand=True) # 文件选择 def select_file(self, file_type): """选择Excel文件""" file_path = filedialog.askopenfilename( title=f"选择{file_type}集Excel文件", filetypes=[("Excel文件", "*.xlsx *.xls"), ("所有文件", "*.*")] ) if file_path: try: # 读取Excel文件 df = pd.read_excel(file_path) # 时间特征列 time_features = ['year', 'month', 'day'] missing_time_features = [feat for feat in time_features if feat not in df.columns] if '水位' not in df.columns: messagebox.showerror("列名错误", "Excel文件必须包含'水位'列") return if missing_time_features: messagebox.showerror("列名错误", f"Excel文件缺少预处理后的时间特征列: {', '.join(missing_time_features)}\n" "请确保已使用预处理功能添加这些列") return # 创建完整的时间戳列 # 处理可能缺失的小时、分钟、秒数据 if 'hour' in df.columns and 'minute' in df.columns and 'second' in df.columns: df['datetime'] = pd.to_datetime( df[['year', 'month', 'day', 'hour', 'minute', 'second']] ) elif 'hour' in df.columns and 'minute' in df.columns: df['datetime'] = pd.to_datetime( df[['year', 'month', 'day', 'hour', 'minute']].assign(second=0) ) else: df['datetime'] = pd.to_datetime(df[['year', 'month', 'day']]) # 设置时间索引 df = df.set_index('datetime') # 保存数据 if file_type == "train": self.train_df = df self.train_file_var.set(os.path.basename(file_path)) self.status_var.set(f"已加载训练集: {len(self.train_df)}条数据") else: self.test_df = df self.test_file_var.set(os.path.basename(file_path)) self.status_var.set(f"已加载测试集: {len(self.test_df)}条数据") except Exception as e: messagebox.showerror("文件错误", f"读取文件失败: {str(e)}") def create_dataset(self, data, window_size): """创建时间窗口数据集""" X, y = [], [] for i in range(len(data) - window_size): X.append(data[i:(i + window_size), 0]) y.append(data[i + window_size, 0]) return np.array(X), np.array(y) def create_dynamic_plot_callback(self): """创建动态绘图回调实例,用于实时显示训练损失曲线""" class DynamicPlotCallback(tf.keras.callbacks.Callback): def __init__(self, gui_app): self.gui_app = gui_app # 引用主GUI实例 self.train_loss = [] # 存储训练损失 self.val_loss = [] # 存储验证损失 def on_epoch_end(self, epoch, logs=None): """每个epoch结束时更新图表""" logs = logs or {} # 收集损失数据 self.train_loss.append(logs.get('loss')) self.val_loss.append(logs.get('val_loss')) # 更新GUI中的图表(在主线程中执行) self.gui_app.root.after(0, self._update_plot) def _update_plot(self): """实际更新图表的函数""" try: # 清除现有图表 self.gui_app.loss_ax.clear() # 绘制训练和验证损失曲线 epochs = range(1, len(self.train_loss) + 1) self.gui_app.loss_ax.plot(epochs, self.train_loss, 'b-', label='训练损失') self.gui_app.loss_ax.plot(epochs, self.val_loss, 'r-', label='验证损失') # 设置图表属性 self.gui_app.loss_ax.set_title('模型训练损失') self.gui_app.loss_ax.set_xlabel('轮次') self.gui_app.loss_ax.set_ylabel('损失', rotation=0) self.gui_app.loss_ax.legend(loc='upper right') self.gui_app.loss_ax.grid(True, alpha=0.3) # 自动调整Y轴范围 all_losses = self.train_loss + self.val_loss min_loss = max(0, min(all_losses) * 0.9) max_loss = max(all_losses) * 1.1 self.gui_app.loss_ax.set_ylim(min_loss, max_loss) # 刷新画布 self.gui_app.loss_canvas.draw() # 更新状态栏显示最新损失 current_epoch = len(self.train_loss) if current_epoch > 0: latest_train_loss = self.train_loss[-1] latest_val_loss = self.val_loss[-1] if self.val_loss else 0 self.gui_app.status_var.set( f"训练中 | 轮次: {current_epoch} | " f"训练损失: {latest_train_loss:.6f} | " f"验证损失: {latest_val_loss:.6f}" ) self.gui_app.root.update() except Exception as e: print(f"更新图表时出错: {str(e)}") # 返回回调实例 return DynamicPlotCallback(self) def train_model(self): """训练LSTM模型""" if self.train_df is None: messagebox.showwarning("警告", "请先选择训练集文件") return try: self.status_var.set("正在预处理数据...") self.root.update() # 数据预处理 train_scaled = self.scaler.fit_transform(self.train_df[['水位']]) # 创建时间窗口数据集 window_size = self.window_size_var.get() X_train, y_train = self.create_dataset(train_scaled, window_size) # 调整LSTM输入格式 X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1)) # 构建LSTM模型 self.model = Sequential() self.model.add(LSTM( self.lstm_units_var.get(), return_sequences=True, input_shape=(window_size, 1) )) self.model.add(LSTM(self.lstm_units_var.get())) self.model.add(Dense(1)) self.model.compile( optimizer=Adam(learning_rate=0.001), loss='mean_squared_error' ) # 添加早停机制 early_stopping = EarlyStopping( monitor='val_loss', # 监控验证集损失 patience=20, # 连续10轮无改善则停止 min_delta=0.0001, # 最小改善阈值 restore_best_weights=True, # 恢复最佳权重 verbose=1 # 显示早停信息 ) # 训练模型 self.status_var.set("正在训练模型...") self.root.update() history = self.model.fit( X_train, y_train, epochs=self.epochs_var.get(), batch_size=self.batch_size_var.get(), validation_split=0.2, # 使用20%数据作为验证集 callbacks=[early_stopping], # 添加早停回调 verbose=0 ) # 绘制损失曲线 self.loss_ax.clear() self.loss_ax.plot(history.history['loss'], label='训练损失') self.loss_ax.plot(history.history['val_loss'], label='验证损失') self.loss_ax.set_title('模型训练损失') self.loss_ax.set_xlabel('轮次') self.loss_ax.set_ylabel('损失',rotation=0) self.loss_ax.legend() self.loss_ax.grid(True) self.loss_canvas.draw() # 根据早停情况更新状态信息 if early_stopping.stopped_epoch > 0: stopped_epoch = early_stopping.stopped_epoch best_epoch = early_stopping.best_epoch final_loss = history.history['loss'][-1] best_loss = min(history.history['val_loss']) self.status_var.set( f"训练在{stopped_epoch + 1}轮提前终止 | " f"最佳模型在第{best_epoch + 1}轮 | " f"最终损失: {final_loss:.6f} | " f"最佳验证损失: {best_loss:.6f}" ) messagebox.showinfo( "训练完成", f"模型训练提前终止!\n" f"最佳模型在第{best_epoch + 1}轮\n" f"最佳验证损失: {best_loss:.6f}" ) else: final_loss = history.history['loss'][-1] self.status_var.set(f"模型训练完成 | 最终损失: {final_loss:.6f}") messagebox.showinfo("训练完成", "模型训练成功完成!") except Exception as e: messagebox.showerror("训练错误", f"模型训练失败:\n{str(e)}") self.status_var.set("训练失败") def predict(self): """使用模型进行预测""" if self.model is None: messagebox.showwarning("警告", "请先训练模型") return if self.test_df is None: messagebox.showwarning("警告", "请先选择测试集文件") return try: self.status_var.set("正在生成预测...") self.root.update() # 预处理测试数据 test_scaled = self.scaler.transform(self.test_df[['水位']]) # 创建测试集时间窗口 window_size = self.window_size_var.get() X_test, y_test = self.create_dataset(test_scaled, window_size) X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1)) # 进行预测 test_predict = self.model.predict(X_test) # 反归一化 test_predict = self.scaler.inverse_transform(test_predict) y_test_orig = self.scaler.inverse_transform([y_test]).T # 创建时间索引 test_time = self.test_df.index[window_size:window_size + len(test_predict)] # 绘制结果 self.ax.clear() self.ax.plot(self.train_df.index, self.train_df['水位'], 'b-', label='训练集数据') self.ax.plot(test_time, self.test_df['水位'][window_size:window_size + len(test_predict)], 'g-', label='测试集数据') self.ax.plot(test_time, test_predict, 'r--', label='模型预测') # 添加分隔线 split_point = test_time[0] self.ax.axvline(x=split_point, color='k', linestyle='--', alpha=0.5) self.ax.text(split_point, self.ax.get_ylim()[0] * 0.9, ' 训练/测试分界', rotation=90) self.ax.set_title('大坝渗流水位预测结果') self.ax.set_xlabel('时间') self.ax.set_ylabel('测压管水位',rotation=0) self.ax.legend() self.ax.grid(True) self.ax.tick_params(axis='x', rotation=45) self.fig.tight_layout() self.canvas.draw() self.status_var.set("预测完成,结果已显示") except Exception as e: messagebox.showerror("预测错误", f"预测失败:\n{str(e)}") self.status_var.set("预测失败") def save_results(self): """保存预测结果""" if not hasattr(self, 'test_predict') or self.test_predict is None: messagebox.showwarning("警告", "请先生成预测结果") return save_path = filedialog.asksaveasfilename( defaultextension=".xlsx", filetypes=[("Excel文件", "*.xlsx"), ("所有文件", "*.*")] ) if not save_path: return try: # 创建包含预测结果的DataFrame window_size = self.window_size_var.get() test_time = self.test_df.index[window_size:window_size + len(self.test_predict)] result_df = pd.DataFrame({ '时间': test_time, '实际水位': self.test_df['水位'][window_size:window_size + len(self.test_predict)].values, '预测水位': self.test_predict.flatten() }) # 保存到Excel result_df.to_excel(save_path, index=False) # 保存图表 chart_path = os.path.splitext(save_path)[0] + "_chart.png" self.fig.savefig(chart_path, dpi=300) self.status_var.set(f"结果已保存至: {os.path.basename(save_path)}") messagebox.showinfo("保存成功", f"预测结果和图表已保存至:\n{save_path}\n{chart_path}") except Exception as e: messagebox.showerror("保存错误", f"保存结果失败:\n{str(e)}") def reset(self): """重置程序状态""" self.train_df = None self.test_df = None self.model = None self.train_file_var.set("") self.test_file_var.set("") self.ax.clear() self.loss_ax.clear() self.canvas.draw() self.loss_canvas.draw() self.data_text.delete(1.0, tk.END) self.status_var.set("已重置,请选择新数据") messagebox.showinfo("重置", "程序已重置,可以开始新的分析") if __name__ == "__main__": root = tk.Tk() app = DamSeepageModel(root) root.mainloop() 这个代码是大坝渗流时间——水位预测训练模型 现在我想: 1.添加MSE,RMSE,MAE,MAPE,R²作为评估指标并且投入到模型训练中 2.想再添加一个窗口名称为“评估指标”,绘制出评估指标随着训练轮次的变化曲线 3.想进一步优化模型,使得拟合情况更优秀 4.请你在写完代码后检查一下看看有没有能优化的地方,比如减少冗余,优化运行空间
最新发布
07-18
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值