没有做训练测试集划分,直接全量训练,全量测试
一、引入 Spark 环境
from pyspark.sql import SparkSession
spark = SparkSession.builder.master("local[*]").getOrCreate()
二、设置模型评估方法
# 评估
from pyspark.ml.evaluation import MulticlassClassificationEvaluator
def check(train_eval):
f1_score = MulticlassClassificationEvaluator(predictionCol='prediction', labelCol='Type_idx', metricName='f1').evaluate(train_eval)
acc_score = MulticlassClassificationEvaluator(predictionCol='prediction', labelCol='Type_idx', metricName='accuracy').evaluate(train_eval)
loss = MulticlassClassificationEvaluator(predictionCol='prediction', labelCol='Type_idx', metricName='logLoss').evaluate(train_eval)
precision = MulticlassClassificationEvaluator(predictionCol='prediction', labelCol='Type_idx', metricName='weightedPrecision').evaluate(train_eval)
recall = MulticlassClassificationEvaluator(predictionCol='prediction', labelCol='Type_idx', metricName='weightedRecall').evaluate(train_eval)
return pd