在rdd中,赋值:
例子0: 使用if else 判断,不符合条件的就赋相应的值
import org.apache.spark._
import SparkContext._
object WordCount {
def main(args: Array[String]){
if (args.length == 0) {
System.err.println("Usage:spark.example.WordCount <input> <output>")
System.exit(1)
}
val input_path=args(0).toString
val output_path=args(1).toString
val conf = new SparkConf().setAppName("WordCount")
conf.set("spark.serializer","org.apache.spark.serializer.KryoSerializer")
val sc = new SparkContext(conf)
val inputfile = sc.textFile(input_path)
val countResult = inputfile.map{ x=>
val fields = x.split(" ")
if ( fields(1).toString == "flume" )
fields(1) = "flumexxx"
fields(0).toString() + "\t" + fields(1).toString + "\t" + fields(2).toString()
}.saveAsTextFile(output_path)
}
}
例子1:
import org.apache.spark._
import SparkContext._
object WordCount {
def main(args: Array[String]){
if (args.length == 0) {
System.err.println("Usage:spark.example.WordCount <input> <output>")
System.exit(1)
}
val input_path=args(0).toString
val output_path=args(1).toString
val conf = new SparkConf().setAppName("WordCount")
conf.set("spark.serializer","org.apache.spark.serializer.KryoSerializer")
val sc = new SparkContext(conf)
val inputfile = sc.textFile(input_path)
val countResult = inputfile.flatMap(line => line.split(" "))
.map(word =>(if (word =="flume") { ("spark",1) } else { (word,1) } ))
.reduceByKey(_ + _)
.map(x => x._1 + "\t" + x._2)
.saveAsTextFile(output_path)
}
}
例子2:
import org.apache.spark._
import SparkContext._
object WordCount {
def main(args: Array[String]){
if (args.length == 0) {
System.err.println("Usage:spark.example.WordCount <input> <output>")
System.exit(1)
}
val input_path=args(0).toString
val output_path=args(1).toString
val conf = new SparkConf().setAppName("WordCount")
conf.set("spark.serializer","org.apache.spark.serializer.KryoSerializer")
val sc = new SparkContext(conf)
val inputfile = sc.textFile(input_path)
val countResult = inputfile.flatMap(line => line.split(" "))
.map(word =>(word,1 ))
.reduceByKey(_ + _)
.map(x => (if (x._1 == "spark") { "sparkXXX" + "\t" + x._2 } else { (x._1 + "\t" + x._2)} ) )
.saveAsTextFile(output_path)
}
}