import pandas as pd
from pyspark.sql import SQLContext
from pyspark import SparkContext
from pyspark.sql import SparkSession
sc = SparkContext()#连接spark
sqlContest = SQLContext(sc)#连接sparksql
pd_df = pd.DataFrame()#创建pandas dataframe
spark_df = sqlContest.createDataFrame(pd_df)#pandas dataframe转为sparksql dataframe
y
=
spark_df.rdd.
map
(
lambda
x: (x, x
*
2
))
y.collect()
spark = SparkSession.builder.appName("sparksql_DataFrame").getOrCreate()
sparksql_df = spark.createDataFrame()#创建sparksql dataframe
pandas_df = sparksql_df.toPandas()#sparksql dataframe转为pandas dataframe