机器学习分析鸢尾花数据

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

def nomalizeData(X):
    return (X - X.mean(axis=0))/X.std(axis=0)
def zeroMean(dataMat):
    meanVal=np.mean(dataMat,axis=0)
    newData=dataMat-meanVal
    return newData,meanVal

def percentage2n(eigVals,percentage):
    sortArray=np.sort(eigVals)
    sortArray=sortArray[-1::-1]
    arraySum=sum(sortArray)
    tmpSum=0
    num=0
    
    for i in sortArray:
        tmpSum+=i
        num+=1
        if tmpSum>=arraySum*percentage:
            return num
def pca(dataMat,percentage=0.99):
    newData,meanVal=zeroMean(dataMat)
    covMat=np.cov(newData,rowvar=0)
    eigVals,eigVects=np.linalg.eig(np.mat(covMat))
    n=percentage2n(eigVals,percentage)
    eigValIndice=np.argsort(eigVals)
    n_eigValIndice=eigValIndice[-1:-(n+1):-1]
    n_eigVect=eigVects[:,n_eigValIndice]
    lowDDataMat=newData*n_eigVect
    reconMat=(lowDDataMat*n_eigVect.T)+meanVal
    return lowDDataMat,reconMat

def main():
    data = pd.read_csv('C:/Users/Administrator/Desktop/Irisdata.csv')
    data.columns = ['sepal_len','sepal_wid','petal_len','petal_wid','classes']
    
    X=data[['sepal_len','sepal_wid','petal_len','petal_wid']].values
    y=data['classes'].values
    feature_names = ['sepal_len','sepal_wid','petal_len','petal_wid']
    label_names=data['classes'].unique()
    
    for feature in range(len(feature_names)):
        plt.subplot(2,2,feature+1)
        for label in label_names:
            plt.hist(X[y==label,feature],bins=10,alpha=0.5,label=label)
        plt.legend(loc='best')
    plt.show()
    
    std_feature=nomalizeData(X)
    newData,meanVal=zeroMean(std_feature)
    lowDDataMat,reconMat=pca(newData)
    print(lowDDataMat) 
    
    figure = plt.figure(figsize=(8,6))
    for label,c in zip(label_names,['red','green','black']):
        plt.scatter(std_feature[y==label][:,0],std_feature[y==label][:,1],c=c,label=label,alpha=0.6,s=20)
        leg = plt.legend(loc='best')
        leg.get_frame().set_alpha(0.6)
        plt.xlabel(feature_names[0])
        plt.ylabel(feature_names[1])
    plt.show()
    figure = plt.figure(figsize=(8,6))
    for label,c in zip(label_names,['red','green','black']):
        plt.scatter((lowDDataMat[y==label][:,0]).tolist(),(lowDDataMat[y==label][:,1]).tolist(),c=c,label=label,alpha=0.6,s=20)
        leg = plt.legend(loc='best')
        leg.get_frame().set_alpha(0.6)
        plt.xlabel(feature_names[0])
        plt.ylabel(feature_names[1])
    plt.show()

if __name__=='__main__':
    main()

实现结果:

在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值