伪代码,学习笔记
import jodd.io.findfile.FindFile.FileNameComparator
import org.apache.calcite.avatica.ColumnMetaData.StructType
import org.apache.spark.SparkConf
import org.apache.spark.rdd.RDD
import org.apache.spark.sql._
import org.apache.spark.sql.functions.col
import org.apache.spark.sql.types.{StringType, StructField, StructType}
import scala.collection.mutable.ListBuffer
object T {
def main(args: Array[String]): Unit = {
if (args(0).equals("csv")){
if (args(1).equals("utf-8")){
if (args(2).contains(":")){
var dataFrame=getSpark2.read.option("inferSchema","true")
.option("header","true")
.csv("")
val map2=args(3).split(",").map(data=>{
val arr=data.split(":")
(arr(0),arr(1))
}).toMap
val frame1=dataFrame.select(dataFrame.columns.map(t=> col(t).as(map2.getOrElse(t,t))):_*)
for (elem <- frame1.dtypes) {
if (elem._2.equals("TimestampType")){
frame1.withColumn(elem._1+"aaa",frame1(elem._1).cast("String"))
frame1.drop(elem._1).withColumnRenamed(elem._1+"aaa",elem._1)
}
}
frame1.createOrReplaceTempView("temp")
getSpark2.sql(" create table tablename as select * from temp1")
getSpark2.sql("alter table tablename set serdeproperties('serialization.null.format'='')")
}
}
var dataFrame=getSpark2.read.option("inferSchema","true")
.option("header","true")
.option("encoding","gbk")
.csv("")
val map2=args(3).split(",").map(data=>{
val arr=data.split(":")
(arr(0),arr(1))
}).toMap
val frame1=dataFrame.select(dataFrame.columns.map(t=> col(t).as(map2.getOrElse(t,t))):_*)
for (elem <- frame1.dtypes) {
if (elem._2.equals("TimestampType")){
frame1.withColumn(elem._1+"aaa",frame1(elem._1).cast("String"))
frame1.drop(elem._1).withColumnRenamed(elem._1+"aaa",elem._1)
}
}
frame1.createOrReplaceTempView("temp")
getSpark2.sql(" create table tablename as select * from temp1")
getSpark2.sql("alter table tablename set serdeproperties('serialization.null.format'='')")
}else if (args(0).equals("Excel")){
if (args(2).contains(":")){
var dataFrame=getSpark2.read.option("inferSchema","true")
.option("header","true")
.load("")
val map2=args(3).split(",").map(data=>{
val arr=data.split(":")
(arr(0),arr(1))
}).toMap
val frame1=dataFrame.select(dataFrame.columns.map(t=> col(t).as(map2.getOrElse(t,t))):_*)
for (elem <- frame1.dtypes) {
if (elem._2.equals("TimestampType")){
frame1.withColumn(elem._1+"aaa",frame1(elem._1).cast("String"))
frame1.drop(elem._1).withColumnRenamed(elem._1+"aaa",elem._1)
}
}
frame1.createOrReplaceTempView("temp")
getSpark2.sql(" create table tablename as select * from temp")
getSpark2.sql("alter table tablename set serdeproperties('serialization.null.format'='')")
}else{
val columns: Array[String] = args(2).split(",")
var dataFrame=getSpark2.read.option("inferSchema","true")
.option("header","true")
.load("").toDF(columns: _*)
var writeFrame=dataFrame
for (elem <- dataFrame.dtypes) {
if (elem._2.equals("TimestampType")){
dataFrame.withColumn(elem._1+"aaa",dataFrame(elem._1).cast("String"))
dataFrame.drop(elem._1).withColumnRenamed(elem._1+"aaa",elem._1)
}
}
writeFrame.createGlobalTempView("temp2")
}
}else if(args(0).equals("text")){
val columns: Array[String] = args(3).split(",")
val structTypes = new ListBuffer[StructField]
for (i<-0 until columns.length){
val col: Array[String] = columns(i).split(":")
structTypes.+=(StructField(col(col.length-1),StringType,true))
}
val structType=StructType(structTypes)
var frame=getSpark2.sparkContext.textFile("").map(data=>{
Row.fromSeq(data.split("分隔符",-1).map(x=>{
var r:String=null
if ("0".equals(" ")){
r=x
}else if ("1".equals(" ")){
r=x.trim()
}
if ("0".equals(" ")){
r=x
}else if ("1".equals(" ")){
r=x.replace("\"","")
}
r
}))
})
var rdd:RDD[Row]=null
if ("0".equals("")){
rdd=frame
}else if("1".equals("")){
val header: Row = frame.first()
rdd= frame.filter(_!=header)
}
val collength=columns.length
if ("1".equals("")){
rdd=rdd.filter(f =>{
if (f.length!=collength){
false
}else{
true
}
})
}
getSpark2.createDataFrame(rdd,structType).createOrReplaceTempView("temp2")
}
// val frame = getSpark2.read.excel(header = true,inferSchema = true).load("hdfs://hadoop102:9000/ss.xlsx")
// frame.show()
}
def getSpark2={
val sparkSession: SparkSession = SparkSession.builder()
.config("spark.sql.warehouse.dir","hdfs://hadoop102:9000/user/hive/warehouse")
.master("local")
.enableHiveSupport()
.getOrCreate()
sparkSession
}
}