1. 导包:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
import numpy as np
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from sklearn.metrics import precision_score, recall_score, f1_score
2. 导入数据:
# 导入数据
file = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data',
header=None)
df=file
# 提取DataFrame中所有行的第2列及之后所有列的操作,将其转换为一个NumPy数组
X = df.loc[:, 2:].values
y = df.loc[:, 1].values
le = LabelEncoder()
y = le.fit_transform(y) # 类标整数化
df
是一个DataFrame对象,loc
是用于按标签选择数据的方法。[:, 2:]
表示选择所有行(冒号表示
所有行)和从第2列到最后一列的所有列。