代码
import math
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
spambase=pd.read_csv(r"C:\Users\hina\Desktop\spambase\spambase.data",header=None)
spambasedata = np.array(spambase)
np.random.shuffle(spambasedata)
indexs=np.arange(spambasedata.shape[0])
kf=KFold(n_splits=5,shuffle=False)
data=[]
for train_index , test_index in kf.split(indexs):
data.append((spambasedata[train_index], spambasedata[test_index]))
n=0
for train, test in data:
n+=1
print(f"第{
n}折交叉验证")
tarin_lable0=train[train[:,-1]==0]
p0=tarin_lable0.shape[0]/train.shape[0]
tarin_lable1=train[train[:,-1]==1]
p1=tarin_lable1.shape[0]/train.shape[0]
print("训练集中01分别占的比例(概率)为",p0,p1)
mean0=np.mean(tarin_lable0[:,:-1],axis=0)
std0=np.std(tarin_lable0[:,:-1],axis=0