-- coding: utf-8 --
import csv
import numpy
from sklearn import preprocessing
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn import metrics
import xlwt
from sklearn.preprocessing import Imputer
if name == ‘main‘:
读取数据
with open(‘E:/1.csv’, ‘r’) as CreditSocringData:
read1 = csv.reader(CreditSocringData)
read1= numpy.array(list(read1), dtype=’float64’)
CreditSocringData.close()
数据预处理
X=read1[0:read1.shape[0],0:read1.shape[1]]
最大最小值归一化
min_max_scaler = preprocessing.MinMaxScaler()
X_min_max = min_max_scaler.fit_transform(X)
散点图
plt.scatter(X_min_max[:,0], X_min_max[:, 1], marker=’o’)
plt.show()
聚类分析
y_pred = KMeans(n_clusters=5,random_state=9).fit_predict(X_min_max)
plt.scatter(X_min_max[:,0 ], X_min_max[:, 1], c=y_pred)
print( y_pred)
保存所聚的类别
book=xlwt.Workbook(encoding=’utf-8’,style_compression=0)
sheet=book.add_sheet(‘sheet1’,cell_overwrite_ok=True)
for i in range(y_pred.shape[0]):
sheet.write(i,0,y_pred[i])
book.save(‘E:/Kmeanslabel.xls’)