创建 SparkSession
from pyspark.sql import SparkSession
spark = SparkSession.builder \
.appName("Word Count") \
.getOrCreate()
构建 sc
from pyspark.sql import SparkSession
spark = SparkSession.builder \
.appName("Word Count") \
.getOrCreate()
sc = spark.sparkContext
创建RDD
从内存生成创建
data = sc.parallelize([('a',1),('b',