取访问日志log的ip并去重
val sc = new SparkContext("local","Simple App","/opt/spark/spark-0.9.0-incubating")
val temp = sc.textFile("F:\\log.txt").flatMap(line=>line.split("\\n")).map(cloumn => cloumn.split("\\^")(0))
.filter(word=>null!=word && !word.equals("")).map(word=>word+"&").distinct()
println(temp.count)
println(temp.take(0))
temp.saveAsTextFile("F:\\result")
127.0.0.1&
27.151.207.85&
127.0.2.1&
测试文件:log.txt
27.151.207.85^asdf^gfsd^1fsd
127.0.0.1^sadf^adf^asdf
127.0.0.1^sadf^adf^asdf
127.0.0.1^sadf^adf^asdf
127.0.2.1^sadf^adf^asdf