sklearn.utils.shuffle 用法小技巧

本文介绍了一种使用Python中的sklearn库实现多个数组统一打乱的方法,通过示例展示了如何对不同数组进行同步随机排序,确保每个数组中元素对应位置的一致性,这对于机器学习数据预处理尤其有用。

是可以打乱多组数据的,不局限与x,y。

import numpy as np
import random
from sklearn.utils import shuffle

a = np.array([1,2,3,4,5])
b = np.array([0.1,0.2,0.3,0.4,0.5])
c = np.array([-1,-2,-3,-4,-5])

a,b,c = shuffle(a,b,c)
print(a)
print(b)
print(c)

# output:
# [1 4 2 5 3]
# [0.1 0.4 0.2 0.5 0.3]
# [-1 -4 -2 -5 -3]

import numpy as np import matplotlib.pyplot as plt import pandas as pd import torch import torch.nn as nn from sklearn.preprocessing import StandardScaler from torch.utils.data import Dataset, DataLoader # 加载数据集 data = pd.read_csv('pfyh.csv') df = pd.DataFrame(data) dataset = df.iloc[:, 2:].to_numpy() df.head() # 可视化数据 # 简单数据可视化 plt.plot(df.iloc[:, 2]) plt.title("Data Visualization") plt.show() # 提取特征和标签 X = np.array(dataset[:, :-1]) y = np.array(dataset[:, -1]) # 数据标准化和归一化 scaler = StandardScaler() X = scaler.fit_transform(X) y = y / 1000 # 划分训练集和测试集(90%训练,10%测试) split_index = int(len(X) * 0.9) X_train, X_test = X[:split_index], X[split_index:] y_train, y_test = y[:split_index], y[split_index:] # 自定义PyTorch数据集类 class TimeSeriesDataset(Dataset): def __init__(self, x, y, sequence_length): self.x = x self.y = y self.sequence_length = sequence_length def __len__(self): return len(self.x) - self.sequence_length def __getitem__(self, idx): return ( torch.tensor(self.x[idx:idx + self.sequence_length], dtype=torch.float), torch.tensor(self.y[idx + self.sequence_length], dtype=torch.float) ) # 创建数据集和数据加载器 sequence_length = 14 train_dataset = TimeSeriesDataset(X_train, y_train, sequence_length) train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True) test_dataset = TimeSeriesDataset(X_test, y_test, sequence_length) test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False) # 定义LSTM模型 class LSTMModel(nn.Module): def __init__(self, input_size, hidden_size, num_layers, output_size): super(LSTMModel, self).__init__() self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True) self.fc = nn.Linear(hidden_size, output_size) self.init_weights() def forward(self, x): out, _ = self.lstm(x) out = self.fc(out[:, -1, :]) return out def init_weights(self): torch.manual_seed(42)
03-31
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值