import java.text.SimpleDateFormat
import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper}
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.{SparkConf, SparkContext}
object Jackson{
def main(args: Array[String]): Unit = {
if (args.length <2) {
System.err.println("Usage:ParseLog <inputPath><outputPath>")
System.exit(1)
}
val conf = new SparkConf().setAppName("ParseLog")
val sc = new SparkContext(conf)
val fileSystem = FileSystem.get(sc.hadoopConfiguration)
val outputPath = new Path(args(1))
if (fileSystem.exists(outputPath)) {
fileSystem.delete(outputPath, true)
}
def init():ObjectMapper = {
val mapper = new ObjectMapper()
mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
mapper.registerModule(DefaultScalaModule)
}
lazy val mapper = init()
val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")
sc.textFile(args(0)).map(x =>{
var jsonMap = Some(mapper.readValue(x.split("\u1111")(2), classOf[Any])).get.asInstanceOf[Map[String, Any]]
jsonMap+=("activeName" -> x.split("\u1111")(1))
jsonMap+=("timeTag" -> sdf.parse(x.split("\u1111")(0)).getTime)
mapper.writeValueAsString(jsonMap)
}).saveAsTextFile(args(1))
}
}
使用到的jar:jackson-databind-2.9.5.jar lib/jackson-module-scala_2.12-2.9.8.jar
这篇博客介绍了如何在Spark Scala项目中利用Jackson库进行JSON数据的解析,详细阐述了所需的依赖jar包,包括jackson-databind-2.9.5.jar和lib/jackson-module-scala_2.12-2.9.8.jar。
1303

被折叠的 条评论
为什么被折叠?



