np.random.seed的有效期及固定的种子会有固定的顺序

本文探讨了如何在Python中使用numpy库固定随机数生成的种子,以确保实验的可重复性。通过设置相同的随机种子,可以得到一致的随机序列,这对于机器学习模型的训练和验证尤为重要。

1  生成数据 

import numpy as np
arr = np.arange(1,17).reshape(4,4)
arr
array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12],
       [13, 14, 15, 16]])

2  设置一次固定的seed,其结果表明它的有效期只有一次 

print(arr)
np.random.seed(1234)
np.random.shuffle(arr)
print(arr)
np.random.shuffle(arr)
print(arr)
np.random.shuffle(arr)
print(arr)
[[ 5  6  7  8]
 [ 9 10 11 12]
 [13 14 15 16]
 [ 1  2  3  4]]
[[ 5  6  7  8]
 [ 9 10 11 12]
 [13 14 15 16]
 [ 1  2  3  4]]
[[ 9 10 11 12]
 [13 14 15 16]
 [ 1  2  3  4]
 [ 5  6  7  8]]
[[ 9 10 11 12]
 [ 1  2  3  4]
 [ 5  6  7  8]
 [13 14 15 16]]

3 想要保存相同的随机顺序,需要每次使用前都要相同的seed

np.random.seed(1234)
np.random.shuffle(arr)
print(arr)
np.random.seed(1234)
np.random.shuffle(arr)
print(arr)
np.random.seed(1234)
np.random.shuffle(arr)
print(arr)

[[ 9 10 11 12]
 [ 1  2  3  4]
 [ 5  6  7  8]
 [13 14 15 16]]
[[ 9 10 11 12]
 [ 1  2  3  4]
 [ 5  6  7  8]
 [13 14 15 16]]
[[ 9 10 11 12]
 [ 1  2  3  4]
 [ 5  6  7  8]
 [13 14 15 16]]

4 相同的随机种子会有相同的顺序,如下所示:

SEED = 555
np.random.seed(SEED)
np.random.shuffle(arr1)
print(arr1)
np.random.seed(SEED)
np.random.shuffle(arr2)
print(arr2)
arr_new = np.concatenate((arr1,arr2),axis=1)
print(arr_new)
[[21 22 23 24]
 [11 12 13 14]
 [ 1  2  3  4]
 [ 6  7  8  9]
 [16 17 18 19]]
[[25]
 [15]
 [ 5]
 [10]
 [20]]
[[21 22 23 24 25]
 [11 12 13 14 15]
 [ 1  2  3  4  5]
 [ 6  7  8  9 10]
 [16 17 18 19 20]]


SEED = 0
np.random.seed(SEED)
np.random.shuffle(arr1)
print(arr1)
np.random.seed(SEED)
np.random.shuffle(arr2)
print(arr2)
arr_new = np.concatenate((arr1,arr2),axis=1)
print(arr_new)
[[21 22 23 24]
 [ 1  2  3  4]
 [16 17 18 19]
 [11 12 13 14]
 [ 6  7  8  9]]
[[25]
 [ 5]
 [20]
 [15]
 [10]]
[[21 22 23 24 25]
 [ 1  2  3  4  5]
 [16 17 18 19 20]
 [11 12 13 14 15]
 [ 6  7  8  9 10]]


SEED = 5555555
np.random.seed(SEED)
np.random.shuffle(arr1)
print(arr1)
np.random.seed(SEED)
np.random.shuffle(arr2)
print(arr2)
arr_new = np.concatenate((arr1,arr2),axis=1)
print(arr_new)
[[ 1  2  3  4]
 [21 22 23 24]
 [ 6  7  8  9]
 [11 12 13 14]
 [16 17 18 19]]
[[ 5]
 [25]
 [10]
 [15]
 [20]]
[[ 1  2  3  4  5]
 [21 22 23 24 25]
 [ 6  7  8  9 10]
 [11 12 13 14 15]
 [16 17 18 19 20]]

 

!pip install seaborn !pip install scikit-learn import torch import torch.nn as nn import torch.optim as optim import torch.nn.functional as F import torchvision import torchvision.transforms as transforms import matplotlib.pyplot as plt import numpy as np import seaborn as sns from sklearn.metrics import confusion_matrix import warnings warnings.filterwarnings(‘ignore’) 设置随机种子保证可重复性 torch.manual_seed(42) np.random.seed(42) 检查GPU可用性 device = torch.device(“cuda” if torch.cuda.is_available() else “cpu”) print(f"Using device: {device}") 数据预处理 - 改进版本使用更强的数据增强 basic_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) improved_transform = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(p=0.5), transforms.RandomRotation(15), transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), transforms.RandomErasing(p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3)), ]) 加载数据集 basic_trainset = torchvision.datasets.CIFAR10( root=‘./data’, train=True, download=True, transform=basic_transform) basic_trainloader = torch.utils.data.DataLoader( basic_trainset, batch_size=128, shuffle=True, num_workers=2) improved_trainset = torchvision.datasets.CIFAR10( root=‘./data’, train=True, download=True, transform=improved_transform) improved_trainloader = torch.utils.data.DataLoader( improved_trainset, batch_size=128, shuffle=True, num_workers=2) testset = torchvision.datasets.CIFAR10( root=‘./data’, train=False, download=True, transform=basic_transform) testloader = torch.utils.data.DataLoader( testset, batch_size=100, shuffle=False, num_workers=2) classes = (‘plane’, ‘car’, ‘bird’, ‘cat’, ‘deer’, ‘dog’, ‘frog’, ‘horse’, ‘ship’, ‘truck’)# 怎么改下载地址为https://storage.googleapis.com/kaggle-data-sets/11102/15444/bundle/archive.zip?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20251025%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20251025T055716Z&X-Goog-Expires=259200&X-Goog-SignedHeaders=host&X-Goog-Signature=6b8093599ad1dea9cec88f64dae36b55580b36851d43000b5bf15bcf71a97bbcdb1056ab2a1b6472c085b46557b6ce2f4a6f3f8d15675ddcb1286ca4ca964ed7ee8424fbc02db5a555b7a722111ac976ab0d03824d89304a15d91a544f533c767339fd916c3f4ff472d7ca3a6897303d29ebb3b2732844a448966b4077e1038aad31df2e3a2f007f93daeadac287f86c5f8581b23b78669d3295e6a2d7a2bd6a31a82d1d823e1bee3cb48b282d6c989fbc4d9ed753b55d4569939d41c52475c1ffcb40e8263de196da20ec5c677ee832d7e8b8f4fb8c94798392d747f6fdbf50cb7b802c60e2efb43f8765266b51a883a2dff92c7d6abfb0244b7289b28d40ad
最新发布
10-26
import numpy as np import pandas as pd from scipy import stats from scipy.linalg import eig from sklearn.neighbors import KernelDensity import os class DualFactorAHP: """ 双因子融合AHP (PD4-AHP + Spe-AHP) 实现类 适用于88家公司、5年、10个二元指标的面板数据 """ def __init__(self, n_companies=88, n_years=5, n_indicators=10): self.n_companies = n_companies self.n_years = n_years self.n_indicators = n_indicators self.RI = 1.49 # n=10时的随机一致性指标 def load_data_from_csv(self, file_path, company_col='company', year_col='year'): """ 从CSV文件加载数据 假设CSV格式为长格式: company, year, indicator1, indicator2, ..., indicator10 """ print(f"从文件加载数据: {file_path}") if not os.path.exists(file_path): raise FileNotFoundError(f"数据文件不存在: {file_path}") # 读取CSV文件 df = pd.read_csv(file_path) # 验证必要的列存在 required_columns = [company_col, year_col] + [f'X{i+1}' for i in range(self.n_indicators)] missing_cols = [col for col in required_columns if col not in df.columns] if missing_cols: raise ValueError(f"缺少必要的列: {missing_cols}") # 重塑为3D数组 (公司×年份×指标) data_3d = np.full((self.n_companies, self.n_years, self.n_indicators), np.nan) companies = sorted(df[company_col].unique()) years = sorted(df[year_col].unique()) for i, company in enumerate(companies[:self.n_companies]): for j, year in enumerate(years[:self.n_years]): mask = (df[company_col] == company) & (df[year_col] == year) if mask.any(): row = df[mask].iloc[0] for k in range(self.n_indicators): data_3d[i, j, k] = row[f'X{k+1}'] return data_3d def load_data_from_excel(self, file_path, sheet_name=0, company_col='company', year_col='year'): """ 从Excel文件加载数据 """ print(f"从Excel文件加载数据: {file_path}") if not os.path.exists(file_path): raise FileNotFoundError(f"数据文件不存在: {file_path}") # 读取Excel文件 df = pd.read_excel(file_path, sheet_name=sheet_name) # 验证必要的列存在 required_columns = [company_col, year_col] + [f'X{i+1}' for i in range(self.n_indicators)] missing_cols = [col for col in required_columns if col not in df.columns] if missing_cols: raise ValueError(f"缺少必要的列: {missing_cols}") # 重塑为3D数组 data_3d = np.full((self.n_companies, self.n_years, self.n_indicators), np.nan) companies = sorted(df[company_col].unique()) years = sorted(df[year_col].unique()) for i, company in enumerate(companies[:self.n_companies]): for j, year in enumerate(years[:self.n_years]): mask = (df[company_col] == company) & (df[year_col] == year) if mask.any(): row = df[mask].iloc[0] for k in range(self.n_indicators): data_3d[i, j, k] = row[f'X{k+1}'] return data_3d def load_data_from_3d_array(self, file_path): """ 直接从.npy文件加载3D数组 """ print(f"从npy文件加载3D数组: {file_path}") return np.load(file_path) def create_sample_data(self): """ 生成示例数据(当没有真实数据时使用) """ print("生成示例数据...") np.random.seed(42) return np.random.choice([0, 1], size=(self.n_companies, self.n_years, self.n_indicators), p=[0.3, 0.7]) def data_preprocessing(self, raw_data): """ 步骤1: 数据预处理 """ print("步骤1: 数据预处理...") data_processed = raw_data.copy() missing_mask = np.isnan(data_processed) if np.any(missing_mask): print(f"发现 {np.sum(missing_mask)} 个缺失值,进行填充...") # 使用同年度该指标的均值四舍五入填充 for year in range(self.n_years): year_data = data_processed[:, year, :] for indicator in range(self.n_indicators): indicator_data = year_data[:, indicator] if np.any(np.isnan(indicator_data)): mean_val = np.nanmean(indicator_data) fill_val = np.round(mean_val) nan_mask = np.isnan(indicator_data) data_processed[nan_mask, year, indicator] = fill_val # 验证数据为二元数据 unique_vals = np.unique(data_processed) assert set(unique_vals).issubset({0, 1}), "数据必须为二元数据(0/1)" return data_processed # 其他方法保持不变... def calculate_spearman_matrix(self, data): """步骤2: 构造Spe-AHP指标相关矩阵""" # 实现同上... pass def calculate_pd4_time_matrix(self, data): """步骤3: 构造PD4-AHP时间因子矩阵""" # 实现同上... pass def dual_factor_fusion(self, spearman_matrices, time_matrices): """步骤4: 双因子矩阵融合""" # 实现同上... pass def consistency_check(self, judgment_matrices): """步骤5: 一致性检验""" # 实现同上... pass # 使用示例 - 多种数据输入方式 def main(): # 创建AHP处理器 ahp_processor = DualFactorAHP(n_companies=88, n_years=5, n_indicators=10) # 方式1: 从CSV文件加载(推荐) try: # 在这里填入你的CSV文件路径 csv_file_path = "data/company_panel_data.csv" # ← 在这里填入你的数据地址 raw_data = ahp_processor.load_data_from_csv( file_path=csv_file_path, company_col='company', # 公司列名 year_col='year' # 年份列名 ) print("成功从CSV文件加载数据") except FileNotFoundError: print("CSV文件未找到,尝试Excel文件...") # 方式2: 从Excel文件加载 try: # 在这里填入你的Excel文件路径 excel_file_path = "data/company_panel_data.xlsx" # ← 在这里填入你的数据地址 raw_data = ahp_processor.load_data_from_excel( file_path=excel_file_path, sheet_name=0, # 工作表名称或索引 company_col='company', year_col='year' ) print("成功从Excel文件加载数据") except FileNotFoundError: print("Excel文件未找到,尝试npy文件...") # 方式3: 从npy文件加载(如果是3D数组格式) try: npy_file_path = "data/panel_data_3d.npy" # ← 在这里填入你的数据地址 raw_data = ahp_processor.load_data_from_3d_array(npy_file_path) print("成功从npy文件加载数据") except FileNotFoundError: print("所有数据文件均未找到,使用示例数据...") # 方式4: 生成示例数据 raw_data = ahp_processor.create_sample_data() print("使用生成的示例数据") # 验证数据形状 print(f"数据形状: {raw_data.shape} (公司×年份×指标)") # 执行AHP流程 processed_data = ahp_processor.data_preprocessing(raw_data) spearman_matrices = ahp_processor.calculate_spearman_matrix(processed_data) time_matrices = ahp_processor.calculate_pd4_time_matrix(processed_data) judgment_matrices = ahp_processor.dual_factor_fusion(spearman_matrices, time_matrices) consistency_results = ahp_processor.consistency_check(judgment_matrices) # 输出结果 print("\n=== 分析完成 ===") for result in consistency_results: status = "通过" if result['passed'] else "未通过" print(f"第{result['period']}期: CR={result['CR']:.4f} ({status})") # 简单使用示例(如果你知道确切的数据格式) def simple_example(): """ 如果你知道确切的数据格式,可以使用这个简化版本 """ ahp = DualFactorAHP() # 直接指定文件路径 data_path = "C:/你的路径/公司数据.csv" # ← 在这里填入你的确切数据地址 if data_path.endswith('.csv'): raw_data = ahp.load_data_from_csv(data_path) elif data_path.endswith(('.xlsx', '.xls')): raw_data = ahp.load_data_from_excel(data_path) elif data_path.endswith('.npy'): raw_data = ahp.load_data_from_3d_array(data_path) else: raise ValueError("不支持的文件格式") # 继续执行后续步骤... processed_data = ahp.data_preprocessing(raw_data) # ... 其他步骤 if __name__ == "__main__": main()
10-21
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值