【yarn】yarn rest api每日job数量分析

一、说明

# 无法制定时间范围!!!
yarn application -list 

官方文档

rest返回内容(官网案例):

{
  app":
  {
    "id":"application_1324057493980_0001",
    "user":"user1",
    "name":"",
    "queue":"default",
    "state":"ACCEPTED",
    "finalStatus":"UNDEFINED",
    "progress":0,
    "trackingUI":"UNASSIGNED",
    "diagnostics":"",
    "clusterId":1324057493980,
    "startedTime":1324057495921,
    "finishedTime":0,
    "elapsedTime":2063,
    "amContainerLogs":"http:\/\/amNM:2\/node\/containerlogs\/container_1324057493980_0001_01_000001",
    "amHostHttpAddress":"amNM:2"
  }
}

二、代码

gradle项目build.gradle内容

plugins {
    id 'java'
    id 'scala'
    id 'com.github.johnrengelman.shadow' version '7.1.2'
}

group = 'com.test'
version = '1.0'
description = 'yarn-job-stat'

repositories {
    maven { url "https://maven.aliyun.com/repository/releases" }
    maven { url "https://maven.aliyun.com/repository/google" }
    maven { url "https://maven.aliyun.com/repository/central" }
    maven { url "https://maven.aliyun.com/repository/gradle-plugin" }
    maven { url "https://maven.aliyun.com/repository/public" }
    mavenCentral()
}

ext {
    scala_version = '2.11.12'
    scala_major_version = '2.11'
    scalatest_version = '3.2.12'
    scalactic_version = '3.2.12'
    jackson_version = "2.10.3"
    jackson_core_module = "com.fasterxml.jackson.core:jackson-core"
    jackson_annotations_module = "com.fasterxml.jackson.core:jackson-annotations"
    jackson_databind_module = "com.fasterxml.jackson.core:jackson-databind"
    jackson_dataformat_xml_module = "com.fasterxml.jackson.dataformat:jackson-dataformat-xml"
    jackson_module_scala_module = "com.fasterxml.jackson.module:jackson-module-scala_2.11"
    jackson_module_paranamer_module = "com.fasterxml.jackson.module:jackson-module-paranamer"
}

dependencies {
    runtimeOnly 'org.jetbrains.kotlin:kotlin-reflect:1.9.10'  // for runtime on linux
    implementation 'org.jetbrains.kotlin:kotlin-stdlib:1.9.10'
    implementation("com.squareup.okhttp3:okhttp:4.12.0")
    implementation("${jackson_core_module}:${jackson_version}")
    implementation("${jackson_annotations_module}:${jackson_version}")
    implementation("${jackson_databind_module}:${jackson_version}")
    implementation("${jackson_dataformat_xml_module}:${jackson_version}")
    implementation("${jackson_module_scala_module}:${jackson_version}")

    implementation("org.scala-lang:scala-library:${scala_version}")
    implementation("org.scala-lang:scala-reflect:${scala_version}")
    implementation("org.scala-lang:scala-compiler:${scala_version}")
    testRuntimeOnly("org.scala-lang:scala-library:${scala_version}")
    testRuntimeOnly("org.scala-lang:scala-reflect:${scala_version}")
    testRuntimeOnly "org.scala-lang:scala-compiler:${scala_version}"

    testImplementation platform('org.junit:junit-bom:5.9.1')
    testImplementation('org.junit.jupiter:junit-jupiter')
    compileOnly("org.scalactic:scalactic_2.11:${scalactic_version}")
    testImplementation "org.scalatest:scalatest_2.11:${scalatest_version}"
    testRuntimeOnly("org.scalatestplus:junit-5-10_2.11:3.2.17.0")
}

test {
    useJUnitPlatform {
        includeEngines 'scalatest'
        testLogging {
            events("passed", "skipped", "failed", "standard_error")
        }
    }
}

tasks.withType(ScalaCompile) {
    scalaCompileOptions.additionalParameters = ['-unchecked', '-deprecation']
}
shadowJar {
    archiveBaseName.set("${this.description}")
    archiveClassifier.set('shadow')
    archiveVersion.set("${version}")
    configurations = [project.configurations.runtimeClasspath]
    dependencies {
        include(dependency('org.scala-lang:scala-library:'))
        include(dependency('com.fasterxml.jackson.core::'))
        include(dependency('com.fasterxml.jackson.module::'))
        include(dependency('com.fasterxml.jackson.dataformat::'))
        include(dependency('com.squareup.okhttp3::'))
        include(dependency('com.squareup.okio::'))
        include(dependency('com.thoughtworks.paranamer::'))

        include(dependency('org.jetbrains.kotlin:kotlin-reflect:1.9.10')) // java -jar依赖,本地调试无需
        include(dependency('org.jetbrains.kotlin:kotlin-stdlib:1.9.10')) // java -jar依赖,本地调试无需
    }

    manifest {
        attributes 'Main-Class': 'YarnJobCount'
    }
}

Apps.scala

case class Apps(app: Seq[YarnApplication])

case class YarnJobCount(
                            id: String,
                            user: String,
                            name: String,
                            state: String,
                            finalStatus: String,
                            applicationType: String,
                            startedTime: Long,
                            launchTime: Long,
                            finishedTime: Long,
                            elapsedTime: Long
                          )
import java.time.LocalDateTime
case class Interval(
                     startStr: String
                     , endStr: String
                     , startTs: Long
                     , endTs: Long
                     , startDt: LocalDateTime
                     , endDt: LocalDateTime
                   )

import java.io.{File, PrintWriter}
import com.fasterxml.jackson.databind.{DeserializationFeature, JsonNode, ObjectMapper}
import com.fasterxml.jackson.databind.json.JsonMapper
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import okhttp3.{OkHttpClient, Request}

import java.time.{LocalDateTime, ZoneOffset}
import java.time.format.DateTimeFormatter
import scala.collection.mutable

/**
 * java -jar yarn-job-stat-1.0-shadow.jar "yarn-api:8088" "2024-11-14 00:00:00" "2024-11-15 00:00:00"
 */

object YarnJobAnalyse {
  val MONTH = "m"
  val DAY = "d"
  val HOUR = "h"
  val MODES: Seq[String] = Seq(MONTH, DAY, HOUR)

  var mode = DAY

  val df: DateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")
  val fileName = "yarn-job-stat.csv"
  var startTime: String = "2024-11-14 00:00:00"
  var endTime: String = "2024-11-15 00:00:00"
  var currHost: String = null // like "yarn-api:8088"

  var dayList: mutable.ListBuffer[Interval] = mutable.ListBuffer.empty

  var mapper: ObjectMapper = JsonMapper.builder()
    .addModule(DefaultScalaModule)
    .build()
  mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
  val client = new OkHttpClient()

  def toMilli(dt: String): Long = {
    val lt = LocalDateTime.parse(dt, df)
    val ins = lt.toInstant(ZoneOffset.of("+08:00"))
    val milli = ins.toEpochMilli
    milli
  }

  def toMilli(dt: LocalDateTime): Long = {
    val ins = dt.toInstant(ZoneOffset.of("+08:00"))
    val milli = ins.toEpochMilli
    milli
  }


  def addTime(ld:LocalDateTime):LocalDateTime={
    mode match{
      case MONTH=>ld.plusMonths(1)
      case DAY=>ld.plusDays(1)
      case HOUR=>ld.plusHours(1)
      case _=>throw new IllegalArgumentException(s"unsupported interval ${mode}!")
    }
  }

  def initTime(): Unit = {
    val dtStart: LocalDateTime = LocalDateTime.parse(startTime, df)
    val dtEnd: LocalDateTime = LocalDateTime.parse(endTime, df)
    var tmp0: LocalDateTime = dtStart
    var tmp1: LocalDateTime = addTime(tmp0)
    while (!tmp1.isAfter(dtEnd)) {
      val iv = Interval(tmp0.toString, tmp1.toString, toMilli(tmp0), toMilli(tmp1), tmp0, tmp1)
      dayList.append(iv)
      tmp0 = tmp1
      tmp1 = addTime(tmp1)
    }
  }

  def parse(t: Interval, sb: StringBuilder): Unit = {
    val start = t.startTs
    val end = t.endTs
    val url = s"http://${currHost}/ws/v1/cluster/apps?startedTimeBegin=${start}&startedTimeEnd=${end}"
    println(s"Ready to query url: ${url}")
    val request = new Request.Builder()
      .url(url)
      .get()
      .build()


    val response = client.newCall(request).execute()
    val respStr = response.body().string()
    // println(s"response: \n${respStr}")
    val node: JsonNode = mapper.readTree(respStr).get("apps")
    val apps = mapper.treeToValue(node, classOf[Apps])
    if (apps != null && apps.app != null) {
      // println(s"apps count:${apps.app.size}")
      val jobStateCount = apps.app.toList.groupBy(app => app.finalStatus).map(kv => (kv._1, kv._2.size)).toList.sortBy(kv => kv._2).reverse
      // jobStateCount.foreach(e => println(s"state:${e._1}, count:${e._2}"))
      jobStateCount.foreach(e => sb.append(s"${t.startStr},${t.endStr},${e._1},${e._2}\n"))
    }
  }

  def write(fileName: String, fileContext: String): Unit = {
    val writer = new PrintWriter(new File("./" + fileName))
    writer.write(fileContext)
    writer.close()
    println("write complete!")
  }
  
  def main(args: Array[String]): Unit = {
    assert(args.length == 4)
    mode = args(0)
    assert(MODES.contains(mode), s"mode should be ${MODES.toString()},but got ${mode}")
    currHost = args(1)
    startTime = args(2)
    endTime = args(3)
    println(s"yarn url:${currHost},start:${startTime},end:${endTime}")
    initTime()
    val sb: StringBuilder = new StringBuilder()
    sb.append("start,end,state,count\n")

    if (this.dayList.nonEmpty) {
      this.dayList.foreach(p => parse(p, sb))
    }
    write(fileName, sb.toString())
  }
}

三、使用

# d就是day!
java -jar yarn-job-stat-1.0-shadow.jar "d" "yarn-api:8088" "2024-11-14 00:00:00" "2024-11-18 00:00:00"
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值