import csv
import sklearn
from sklearn import preprocessing
from sklearn.decomposition import PCA
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.cross_decomposition import PLSRegression
import numpy as np
import math
import matplotlib.pyplot as plt
data_path = '//Users//modelin//Desktop//data_SNV.csv' #数据
label_path = '//Users//modelin//Desktop//tenderness.csv' #标签(嫩度)
data = np.loadtxt(open(data_path, 'rb'), dtype=np.float64, delimiter=',', skiprows=0)
label = np.loadtxt(open(label_path, 'rb'), dtype=np.float64, delimiter=',', skiprows=0)
# 绘制图片
plt.figure(1)
x_col = np.linspace(0,len(data[0,:]),len(data[0,:])) #数组逆序
y_col = np.transpose(data)
plt.plot(x_col, y_col)
plt.xlabel("Wavenumber(nm)")
plt.ylabel("Absorbance")
plt.title("The spectrum of the pigmeat dataset",fontweight= "semibold",fontsize='x-large')
plt.savefig('//Users//modelin//Desktop//0527-10-预处理//预处理后.png')
plt.show()
#随机划分数据集
x_data = np.array(data)
y_data = np.array(label[0])
test_ratio = 0.25
X_train,X_test, y_train, y_test =sklearn.model_selection.train_test_split(x_data,y_data, test_size=test_ratio)
最后一行这里这里会报错
这个大概是什么意思呢