##参考资料:
https://blog.youkuaiyun.com/Haidaiya/article/details/81230636
https://www.cnblogs.com/yjmyzz/p/4694219.html
https://my.oschina.net/u/3825598/blog/1789861
https://www.jianshu.com/p/160cefd354a2
具体步骤如下:
1、安装IntelliJ IDEA
2、安装Java SKD
3、安装Scala
4、添加配置文件,如hadoop-common-2.6.0-bin-master.zip等
5、测试HelloWorld代码demo
//demo one
package com.xx.HelloWorld
object Test {
def sum(args: Int*) = {
var result = 0
for (arg <- args) result += arg
result
}
def main(args: Array[String]) {
val mylist = List(1,2,3,4,5)
println("Final Results: " + sum(mylist: _*))
}
}
//demo two
package com.xx.feature_engineer
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types._
object demo {
def main(args: Array[String]) {
val sparkConf = new SparkConf().setAppName("ReadData").setMaster("local").set("spark.sql.warehouse.dir", "file:///C:/Users/username/IdeaProjects/spark_demo/spark-warehouse")
val sparkSession = SparkSession.builder().config(sparkConf).getOrCreate()
val input_path = "D:/spark_data/datatypes.csv"
val colTypes = sparkSession.sqlContext.read.format("csv")
.option("sep", ",")
.option("inferSchema", "false")
.option("header", "true")
.load(input_path)
//println(colTypes.printSchema)
println("*******************************")
//println(colTypes.show)
val df = sparkSession.sqlContext.read.format("csv")
.option("sep", ",")
.option("inferSchema", "false")
.option("header", "true")
.load("D:/spark_data/train.csv")
val typeMap = colTypes.rdd.collect().map(line => (line.getString(0) -> line.getString(1))).toMap
val cols = df.columns.map{f=> if(typeMap.getOrElse(f,"")=="continuous") col(f).cast(IntegerType) else col(f)}
val results = df.select(cols: _*)
results.dtypes.map{line=>println(line)}
sparkSession.stop()
}
}