org.apache.spark.repl.Main.interp.command("""
class MySchemaRDD(rdd:org.apache.spark.sql.SchemaRDD) extends java.io.Serializable {
def go() = {
var startstr = ""
var endstr = RECORD_SEPERATOR
val result = rdd.collect
result.foreach( x =>
print(x.mkString(startstr,FIELD_SEPERATOR,endstr))
)
}
def saveto(output: String) = {
import org.apache.hadoop.io.{NullWritable,Text}
var startstr = ""
var endstr = RECORD_SEPERATOR
if(output.startsWith("hdfs:")) {
val outputpath = AutoFileUtil.regularFile(output)
FileUtil.deletePath(outputpath)
rdd.map(x =>
(NullWritable.get(), new Text(x.mkString(FIELD_SEPERATOR)))
).saveAsHadoopFile[
org.apache.hadoop.mapred.TextOutputFormat[NullWritable, Text]
](outputpath)
} else {
val outputpath = FileUtil.regularFile(output)
AutoFileUtil.deletePath(outputpath)
val result = rdd.collect()
val writer = new java.io.FileWriter(output)
result.foreach(x =>
writer.write(x.mkString(startstr,FIELD_SEPERATOR,endstr))
)
writer.close()
}
}
}
object MySchemaRDD {
implicit def toMySchemaRDD(rdd:org.apache.spark.sql.SchemaRDD) = new MySchemaRDD(rdd)
}
""")
spark定制之三:MySchemaRDD
最新推荐文章于 2025-02-17 21:13:16 发布