import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
import org.elasticsearch.spark._
/**
* @Description :es操作RDD获取字段
* @Author: lyw
* @Date: 2019/1/16 11:04
* @Version 1.0
*/
object ESRDDDEMO {
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
conf.setAppName("esrdd")
conf.setMaster("local")
// 配置es配置信息
conf.set("es.nodes","hadoop01,hadoop02,hadoop03")
conf.set("es.port","9200")
conf.set("es.index.auto.create","true")
val sc = new SparkContext(conf)
val query =
"""
{"query":{"match_all":{}}}
""".stripMargin
// 输出类型中,key=每条数据对应的id,value=id对应的数据. map中的key=字段名称
val queryRDD: RDD[(String, collection.Map[String, AnyRef])] = sc.esRDD("blog", query)
// 将id对应的数据拿到
val valueRDD: RDD[collection.Map[String, AnyRef]] = queryRDD.map(_._2)
// 获取每个字段的数据
val dataRDD: RDD[(AnyRef, AnyRef, AnyRef)] = valueRDD.map(line => {
val id = line.getO
ElasticSearch操作RDD获取字段
最新推荐文章于 2022-07-25 14:27:36 发布