经典的分类模型,鸾尾花的分类。
数据集样例:
length | sepal width | petal length | petal width | class |
---|---|---|---|---|
5.1 | 3.5 | 1.4 | 0.2 | Iris-setosa |
4.9 | 3 | 1.4 | 0.2 | Iris-versicolor |
4.7 | 3.2 | 1.3 | 0.2 | Iris-virginica |
主要是使用softmax来做多类别分类,
残差函数使用的是熵: -y*log(predicY)
代码:
__author__ = 'jmh081701'
#coding:utf-8
import tensorflow as tf
import numpy as np
import pandas as pd
import os
W =tf.Variable(np.random.randn(4,3),dtype=tf.float32,name='W')
b =tf.Variable(0,dtype=tf.float32,name='bias')
def softmax(x):
x=tf.to_float(x)
y=[]
y=tf.exp(x)
z=tf.reduce_sum(y,1)
z=tf.transpose([z,z,z])
z=y/z
#tensorflow的矩阵除法:同一位置的元素相除
return z
def load_csv(filename):
df=pd.read_csv(filename)
return df
def inputs(df,tables):
X=[]
Y=[]
length =len(df)
for i in range(0,length):
x=[df.ix[i]['sepal length'],df.ix[i]['sepal width'],df.ix[i]['petal length'],df.ix[i]['petal length']]
y=df.ix[i]['class']
if(y=='Iris-setosa'):
y=[1,0,0]
elif (y=='Iris-versicolor'):
y=[0,1,0]
else:
y=[0,0,1]
X.append(x)
Y.append(y)
X=tf.to_float(X)
Y=tf.to_float(Y)
return X,Y
def combine(X):
rst = tf.add(tf.matmul(X,W),b,name='combine_add_as_x')
#print(sess.run([X,rst]))
return rst
def loss(X,Y):
predictY=inference(X)
logy=tf.log(predictY)
l=-Y*logy
rst=tf.reduce_sum(l)
#print("rst",sess.run(rst))
return rst
def train(total_loss):
rate=0.0001
return tf.train.GradientDescentOptimizer(learning_rate=rate).minimize(total_loss)
def evaluate(X,Y,sess=None):
#评估函数
predictY=inference(X)
predictY=tf.to_int32(tf.arg_max(predictY,1)) #按列取最大下标
Y=tf.to_int32(Y)
Y=tf.to_int32(tf.arg_max(Y,1))
#按列取最大下标
rate =sess.run(tf.reduce_mean(tf.to_float(tf.equal(Y,predictY))))
#reduce_mean是将矩阵的每一个元素相加求和
print("evaluate:",rate)
return rate
def load_model(sess,saver,filedir,filename):
ckpt=tf.train.get_checkpoint_state(filedir)
if( ckpt and ckpt.model_checkpoint_path):
saver.restore(sess,filedir+".\\"+filename) #载入模型
def inference(x):
return softmax(combine(x))
if __name__ == '__main__':
saver=tf.train.Saver()
dir=r"D:\\data\\Iris\\"
modelfile="my-model.cpt"
with tf.Session() as sess:
init =tf.initialize_all_variables()
sess.run(init)
coord= tf.train.Coordinator()
threads =tf.train.start_queue_runners(sess,coord)
df =load_csv(dir+"train.csv")
X,Y=inputs(df, None)
total_loss =loss(X,Y)
train_op=train(total_loss)
step =0
ratesum=0
i=0
sameMax=20
load_model(sess,saver,dir,modelfile)
while True:
step=step+1
l,op=sess.run([total_loss,train_op])
if(step%20==0):
rate=evaluate(X,Y,sess)
ratesum=ratesum+rate
i=i+1
if(i%sameMax==0):
#如果sameMax次的测试率都是一样的,近似于收敛,那么停止训练
ratesum=ratesum/sameMax
if(ratesum==rate):
break
saver.save(sess,dir+modelfile)
else:
ratesum=0
i=0
if(step>10000):
saver.save(sess,dir+modelfile)
break
coord.request_stop()
coord.join(threads)
print("training over!!!")
evaluate(X,Y,sess)
saver.save(sess,dir+modelfile)
最后准确率是94%左右