http://spark.apache.org/docs/latest/ml-features.html#tf-idf
import org.apache.spark.ml.feature._
import org.apache.spark.ml.linalg.SparseVector
import org.apache.spark.sql.SparkSession
import scala.collection.mutable
import scala.io.Source
/**
* Created by xubc on 2017/6/3.
*/
object TestX {
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder
.master("local[5]")
.appName(this.getClass.getName().stripSuffix("$"))
.getOrCreate()
val sentenceData = spark.createDataFrame(Seq(
(0.0, "Hi I heard about are Spark"),
(1.0, "I wish Java could use case spark classes"),
(2.0, "Logistic regression regression models are neat I")
)).toDF("label", "sentence")
val tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words")
val wordsData = tok