def transform(document: Iterable[_]): Vector = {
val keywordMap = scala.collection.mutable.Map(1 -> "name")
val termFrequencies = mutable.HashMap.empty[Int, Double]
val setTF = if (binary) (i: Int) => 1.0 else (i: Int) => termFrequencies.getOrElse(i, 0.0) + 1.0
val hashFunc: Any => Int = getHashFunction
document.foreach { term =>
val i = nonNegativeMod(hashFunc(term), numFeatures)
keywordMap += (i -> term.toString)
termFrequencies.put(i, setTF(i))
}
println(keywordMap.size)
Vectors.sparse(numFeatures, termFrequencies.toSeq)
}
def tran(document: Iterable[_]): mutable.Map[Int,String] = {
val termFrequencies = mutable.HashMap.empty[Int, String]
val hashFunc: Any => Int = getHashFunction
document.foreach { term =>
val i = nonNegativeMod(hashFunc(term), numFeatures)
termFrequencies.put(i, term.toString)
}
termFrequencies
}
这个错。。。。。。。。。。
def getdict[D <: Iterable[_]](dataset: RDD[D]): mutable.Map[Int,String] = {
var keyMap = scala.collection.mutable.Map(1 -> "name")
val a: RDD[mutable.Map[Int,String]]=dataset.map(this.tran)
a.foreach{line=>
keyMap=keyMap.++(line)
}
keyMap
}
def getdict[D <: Iterable[_]](dataset: RDD[D]): mutable.Map[Int,String] = {
val keyMap = mutable.HashMap.empty[Int, String]
val a: RDD[mutable.Map[Int,String]]=dataset.map(this.tran)
a.foreach{line=>line.foreach{x=>
keyMap.put(x._1,x._2)
}
println(keyMap.size)
}
println(keyMap.size)
keyMap
}