from svm import *
import numpy as np
import pandas as pd
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.wrappers.scikit_learn import KerasClassifier
#from keras.layers.advanced_activations import PReLU
from keras.utils import np_utils
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.preprocessing import LabelEncoder
# load dataset
dataframe = pd.read_csv("Train.csv", header=None)
dataset = dataframe.values
X = dataset[:, 0:85].astype(float)
Y = dataset[:, 85].astype(float)
# encode class values as integers
encoder = LabelEncoder()
encoded_Y = encoder.fit_transform(Y)
# convert integers to dummy variables (one hot encoding)
dummy_y = np_utils.to_categorical(encoded_Y)
X_train, X_test, Y_train, Y_test = train_test_split(X,dummy_y, test_size=0.3)#, random_state=True)
weightzero = sum(Y_train[:,1]==1)/(sum(Y_train[:,1]==1)+sum(Y_train[:,1]==0))
weightone = sum(Y_train[:,1]==0)/(sum(Y_train[:,1]==1)+sum(Y_train[:,1]==0))
################data preprocessing for svm
# In[12]:
f2=open('./dataset/train.txt','w+')
#fobj.close()
#f2 = open('./dataset/train.txt','r+')
for i in range(len(X_train)):
if i!=0:
f2.write('\n')
f2.write(str(Y_train[:,1][i]))
for j in range(len(X_train[0])):
f2.write(' '+str(j+1)+':'+str(X_train[i][j]))
f2.close()
f2=open('./dataset/test.txt','w+')
#fobj.close()
#f2 = open('./dataset/test.txt','r+')
for i in range(len(X_test)):
if i!=0:
f2.write('\n')
f2.write(str(Y_test[:,1][i]))
for j in range(len(X_test[0])):
f2.write(' '+str(j+1)+':'+str(X_test[i][j]))
f2.close()
# In[12]:
##############################################################
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt
from thundersvmScikit import *
from sklearn.datasets import *
import time
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
x,y = load_svmlight_file('./dataset/train.txt')
#clf = SVC(verbose=True, gamma=0.5, C=100,probability=True,class_weight={0:weightzero , 1: weightone})#class_weight={1:17} need use int don't support float
svc = SVC(probability=True,class_weight={0:weightzero , 1: weightone})
parameters = [
{
'C': [1, 3, 5, 7, 9, 11, 13, 15, 17, 19],
'gamma': [0.00001, 0.0001, 0.001, 0.1, 1, 10, 100, 1000],
'kernel': ['rbf']
},
{
'C': [1, 3, 5, 7, 9, 11, 13, 15, 17, 19],
'kernel': ['linear']
}
]
clf = GridSearchCV(svc, parameters, cv=5, n_jobs=1)
clock = time.clock()
wall = time.time()
clf.fit(x, y)
costclock = time.clock() - clock
costwall = time.time() - wall
print('costclock:',costclock)
print('costwall:',costwall)
print(clf.best_params_)
svm_clf = clf.best_estimator_
#clf.fit(x,y)
x2,y2=load_svmlight_file("./dataset/test.txt")
y1_test_pred_SVM = svm_clf.predict(x2)
print("LR Confusion matrix (test):\n {0}\n".format(confusion_matrix(y2, y1_test_pred_SVM)))
print("LR Classification report (test):\n {0}".format(classification_report(y2, y1_test_pred_SVM)))
LR_pred = svm_clf.predict_proba(X_test)
fpr_svm1, tpr_svm1, thresholds_svm1 = roc_curve(Y_test[:,1], LR_pred[:,1])
roc_auc_svm1 = auc(fpr_svm1, tpr_svm1)
#score=clf.score(x2,y2)
svm_clf.save_to_file('./modelsvm')
#print ("test score is ", score)
plt.plot(fpr_svm1, tpr_svm1, lw=2, alpha=.6)
#plt.plot(fpr_svm1, tpr_svm1, lw=2, alpha=.6)
#plt.legend(["MLP Reg 1 (AUC {:.4f})".format(roc_auc_dnn1)], fontsize=8, loc=2)
plt.plot([0, 1], [0, 1], lw=2, linestyle="--")
plt.xlim([0, 1])
plt.ylim([0, 1.05])
plt.xlabel("FPR")
plt.ylabel("TPR")
plt.title("ROC curve")
plt.legend(["SVM {:.4f})".format(roc_auc_svm1)], fontsize=8, loc=2)