import org.apache.log4j.{Level, Logger}
import org.apache.spark.mllib.classification.{SVMModel, SVMWithSGD}
import org.apache.spark.mllib.evaluation.{BinaryClassificationMetrics, MulticlassMetrics}
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.optimization.HingeGradient
import org.apache.spark.mllib.optimization.SquaredL2Updater
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.sql.SparkSession
object SVM {
def main(args: Array[String]): Unit = {
Logger.getLogger("org.apache.spark").setLevel(Level.ERROR)
val sparkSession = SparkSession.builder().master("local[4]").appName("SVM").getOrCreate()
val sc = sparkSession.sparkContext
val data = sc.textFile("G:\\mldata\\iris.txt")
val pddata = data.map(str => str.split('|')).map(arr =>(arr(0).toDouble,arr(1).toDouble,arr(2).toDouble,arr(3).toDouble,arr(4)))
.filter(tuple =>tuple._5.equals("Iris-setosa") || tuple._5.equals("Iris-versicolor"))
.map(tuple => if(tuple._5.equals("Iris-setosa")) (tuple._1,tuple._2,tuple._3,tuple._4,0) else (tuple._1,tuple._2,tuple._3,tuple._4,1))
.map(tuple =>LabeledPoint(tuple._5,Vectors.dense(tuple._1,tuple._2,tuple._3,tuple._4)))
val splitdata = pddata.randomSplit(Array(0.8,0.2))
val traindata = splitdata(0).cache()
val testdata = splitdata(1)
val model = new SVMWithSGD()
model.optimizer
.setNumIterations(1000)
.setRegParam(0.1)
.setStepSize(0.3)
.setMiniBatchFraction(0.5)
.setGradient(new HingeGradient())
.setUpdater(new SquaredL2Updater)
val svmModel = model.run(traindata)
svmModel.save(sc,"C:\\users\\Java_Man_China\\desktop\\model1")
val sameModel = SVMModel.load(sc,"C:\\users\\Java_Man_China\\desktop\\model1")
val score = sameModel.predict(testdata.map(lab => lab.features))
val scoreAndLabel = score.zip(testdata.map(lab => lab.label))
scoreAndLabel.foreach(println(_))
val metrics = new BinaryClassificationMetrics(scoreAndLabel)
val auroc = metrics.areaUnderROC()
println(auroc)
val metric = new MulticlassMetrics(scoreAndLabel)
val ac = metric.accuracy
println(ac)
}
}