#!/usr/bin/python #-*-coding:utf-8 -*- #贝叶斯实现 def createDataSET(): dataSet=[[1,"S",-1], [1,"M",-1], [1,"M",1], [1,"S",1], [1,"S",-1], [2,"S",-1], [2,"M",-1], [2,"M",1], [2,"L",1], [2,"L",1], [3,"L",1], [3,"M",1], [3,"M",1], [3,"L",1], [3,"L",-1], ] labels=["no surfacing","flippers"] return dataSet,labels # 划分数据,以第i 特征值划分数据 def splitDataSet(dataSet,i,values): ''' :param dataSet: 数据集 :param i: 以哪一个特征值划分数据 :param labels:标记这个剩下的特征值对应的名字 :param values:当前的特征值对应的值 :return:划分后的数据 ''' splitData=[] for data in dataSet: if data[i]==values: nowData=[] before=data[:i] after=data[i+1:] nowData.extend(before) nowData.extend(after) splitData.append(nowData) #对特征标签进行处理 # print("splitData---->"+str(splitData)) # print(str(splitData)+" ----") return splitData #得到最大似然估计 def getConPro(j,feaValu,y,dataSet): """ :param j: 第几个特征,从零开始计算 :param feaValu: 特征值对应的值 :param y:类标记Ck的值 :param dataSet:数据集 :return: """ #得到某一特征下的所有值 fearList=[data[j] for data in dataSet] uniqulist=set(fearList) #极大似然估计 MLE_data=splitDataSet(dataSet,j,feaValu) print("MLE_data---"+str(MLE_data)) before=0 for data in MLE_data: if data[-1]==y: before+=1 after=0 for data in dataSet: if data[-1]==y: after+=1 # print("after----"+str(after)+"before----"+str(before)) # print("NOWMLE----"+str(float(before)/after)) MLE=float(before)/after # print("MLE"+str(MLE)) return MLE #得到先验概率 def getPrior(dataSet,y): ''' :param dataSet: :param y: 类标记Ck的值 :return: 先验概率 ''' numData=len(dataSet) #先验概率 prior_data=splitDataSet(dataSet,len(dataSet[0])-1,y) prior=float(len(prior_data))/numData return prior def getBaseClass(fenValu): dataSet,labels=createDataSET() beastEntity=0 besty=0 ClassList=[data[-1] for data in dataSet] uniquClass=set(ClassList) for cla in uniquClass: proportion=1 for i in range(len(dataSet[0])-1): proportion*=getConPro(i,fenValu[i],cla,dataSet) prior=getPrior(dataSet,cla) print(str(prior)+"prior....."+str(proportion)) EntityClass=prior*proportion if beastEntity<EntityClass: beastEntity=EntityClass besty=cla print(besty) getBaseClass([2,"S"])