PySpark 实战只模式 local standalone模式下的pyspark和spark-submit 和 yarn模式
清除防火墙
[root@hadoop000 python]# systemctl stop firewalld.service
[root@hadoop000 python]# systemctl disable firewalld.service
[root@hadoop000 conf]# vim ~/.bash_profile
export JAVA_HOME=/root/app/jdk1.8.0_211
export PATH=$JAVA_HOME/bin:$PATH
export SCALA_HOME=/root/app/scala-2.11.8
export PATH=$SCALA_HOME/bin:$PATH
export HADOOP_HOME=/root/app/hadoop-2.6.0-cdh5.7.0
export PATH=$HADOOP_HOME/bin:$PATH
export MAVEN_HOME=/root/app/apache-maven-3.3.9
export PATH=$MAVEN_HOME/bin:$PATH
export PATH=/root/app/python3/bin:$PATH
export PYSPARK_PYTHON=python3
export SPARK_HOME=/root/app/spark-2.3.0-bin-2.6.0-cdh5.7.0
export PATH=$SPARK_HOME/bin:$PATH
export HIVE_HOME=/root/app/hive-1.1.0-cdh5.7.0
export PATH=$HIVE_HOME/bin:$PATH
[root@hadoop000 conf]# source ~/.bash_profile
一、Local模式:
开发
http://spark.apache.org/docs/latest/submitting-applications.html
简单的集群管理,自带的
–master
–name
–py-files
#ni.txt
nihao woqu nihao
hello welcome
woqu hahah
#spark0406.py
import sys
from pyspark import SparkConf, SparkContext
if __name__ == '__main__':
if len(sys.argv) != 3:
print("Usage: wordcount <input> <output>", file=sys.stderr)
sys.exit(-1)
conf = SparkConf()
sc = SparkContext(conf=conf)
def printResult():
counts = sc.textFile