推荐系统-01-简单逻辑回归

import org.apache.spark.ml.feature._
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.classification.LogisticRegression
import org.apache.spark.ml.{Pipeline,PipelineModel}
import org.apache.spark.ml.linalg.{Vector, Vectors}
import org.apache.spark.sql.Row
import org.apache.spark.sql.SparkSession


object BasicStastic{
def main(args: Array[String]) {
  

val spark = SparkSession.builder().
            master("local").
            appName("my App Name").
            getOrCreate()
            
// 创建数据帧(id, 内容,标签)
val training = spark.createDataFrame(Seq(
            (0.0, Vectors.dense(2.0, 1.1, 0.1)),
            (1.0, Vectors.dense(0.0, 1.0, -1.0)),
            (2.0, Vectors.dense(0.0, 1.3, 1.0)),
            (3.0, Vectors.dense(2.0, 1.2, -0.5))
        )).toDF("label", "features")            
        
// 相关参数,可以在官方文档有介绍
// http://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.ml.classification.LogisticRegression       
val lr = new LogisticRegression().
            setMaxIter(10).
            setRegParam(0.01)       
        
val model1 = lr.fit(training)           
model1.parent.extractParamMap

val paramMap = ParamMap(lr.maxIter -> 20).put(lr.regParam -> 0.1, lr.threshold -> 0.55)
val paramMap2 = ParamMap(lr.probabilityCol -> "myProbability")
val paramMapCombined = paramMap ++ paramMap2

val model2 = lr.fit(training, paramMapCombined)         
model2.parent.extractParamMap

// 测试数据
val test = spark.createDataFrame(Seq(
            (3.0, Vectors.dense(-1.0, 1.5, 1.3)),
            (0.0, Vectors.dense(3.0, 2.0, -0.1)),
            (1.0, Vectors.dense(0.0, 2.2, -1.5))            
        )).toDF("label", "features")            
        
 val result = model1.transform(test)
 // 显示结果
 result.show(false)
 result.select("label", "features", "probability", "prediction").show(false)
 result.select("label", "features", "probability", "prediction").collect().foreach{case Row(label:Double, features:Vector, probability:Vector, prediction:Double) => println(s"($features, $label) ->  probability=$probability, prediction=$prediction")}
 
 }}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值