from __future__ import print_function from sklearn import preprocessing from sklearn.cross_validation import train_test_split from sklearn.datasets.samples_generator import make_classification from sklearn.svm import SVC import matplotlib.pyplot as plt # n_samples=300表示有300个样例,n_features=2表示有2个特征, # n_informative=2表示有2个比较相关的属性,random_state=22表示数据是随机产生的,但每次run时产生的data是一样的 X, y = make_classification(n_samples=300, n_features=2 , n_redundant=0, n_informative=2, random_state=22, n_clusters_per_class=1, scale=100) # scatter表示用点绘图 plt.scatter(X[:, 1], X[:, 0], c=y) plt.show() # # normalization处理后的输出,feature_range=(0,1)把值处理到0到1这个范围 # X = preprocessing.minmax_scale(X,feature_range=(0,1)) X = preprocessing.scale(X) # train_test_split把数据切分成训练集和测试集,训练集占70% X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3) clf = SVC() clf.fit(X_train, y_train) # 对预测进行打分,用data_X预测的值和data_y进行对比 print(clf.score(X_test, y_test))
normalization数据标准化
最新推荐文章于 2025-03-29 10:57:46 发布