一、说明
# 无法制定时间范围!!!
yarn application -list
rest返回内容(官网案例):
{
app":
{
"id":"application_1324057493980_0001",
"user":"user1",
"name":"",
"queue":"default",
"state":"ACCEPTED",
"finalStatus":"UNDEFINED",
"progress":0,
"trackingUI":"UNASSIGNED",
"diagnostics":"",
"clusterId":1324057493980,
"startedTime":1324057495921,
"finishedTime":0,
"elapsedTime":2063,
"amContainerLogs":"http:\/\/amNM:2\/node\/containerlogs\/container_1324057493980_0001_01_000001",
"amHostHttpAddress":"amNM:2"
}
}
二、代码
gradle项目build.gradle
内容
plugins {
id 'java'
id 'scala'
id 'com.github.johnrengelman.shadow' version '7.1.2'
}
group = 'com.test'
version = '1.0'
description = 'yarn-job-stat'
repositories {
maven { url "https://maven.aliyun.com/repository/releases" }
maven { url "https://maven.aliyun.com/repository/google" }
maven { url "https://maven.aliyun.com/repository/central" }
maven { url "https://maven.aliyun.com/repository/gradle-plugin" }
maven { url "https://maven.aliyun.com/repository/public" }
mavenCentral()
}
ext {
scala_version = '2.11.12'
scala_major_version = '2.11'
scalatest_version = '3.2.12'
scalactic_version = '3.2.12'
jackson_version = "2.10.3"
jackson_core_module = "com.fasterxml.jackson.core:jackson-core"
jackson_annotations_module = "com.fasterxml.jackson.core:jackson-annotations"
jackson_databind_module = "com.fasterxml.jackson.core:jackson-databind"
jackson_dataformat_xml_module = "com.fasterxml.jackson.dataformat:jackson-dataformat-xml"
jackson_module_scala_module = "com.fasterxml.jackson.module:jackson-module-scala_2.11"
jackson_module_paranamer_module = "com.fasterxml.jackson.module:jackson-module-paranamer"
}
dependencies {
runtimeOnly 'org.jetbrains.kotlin:kotlin-reflect:1.9.10' // for runtime on linux
implementation 'org.jetbrains.kotlin:kotlin-stdlib:1.9.10'
implementation("com.squareup.okhttp3:okhttp:4.12.0")
implementation("${jackson_core_module}:${jackson_version}")
implementation("${jackson_annotations_module}:${jackson_version}")
implementation("${jackson_databind_module}:${jackson_version}")
implementation("${jackson_dataformat_xml_module}:${jackson_version}")
implementation("${jackson_module_scala_module}:${jackson_version}")
implementation("org.scala-lang:scala-library:${scala_version}")
implementation("org.scala-lang:scala-reflect:${scala_version}")
implementation("org.scala-lang:scala-compiler:${scala_version}")
testRuntimeOnly("org.scala-lang:scala-library:${scala_version}")
testRuntimeOnly("org.scala-lang:scala-reflect:${scala_version}")
testRuntimeOnly "org.scala-lang:scala-compiler:${scala_version}"
testImplementation platform('org.junit:junit-bom:5.9.1')
testImplementation('org.junit.jupiter:junit-jupiter')
compileOnly("org.scalactic:scalactic_2.11:${scalactic_version}")
testImplementation "org.scalatest:scalatest_2.11:${scalatest_version}"
testRuntimeOnly("org.scalatestplus:junit-5-10_2.11:3.2.17.0")
}
test {
useJUnitPlatform {
includeEngines 'scalatest'
testLogging {
events("passed", "skipped", "failed", "standard_error")
}
}
}
tasks.withType(ScalaCompile) {
scalaCompileOptions.additionalParameters = ['-unchecked', '-deprecation']
}
shadowJar {
archiveBaseName.set("${this.description}")
archiveClassifier.set('shadow')
archiveVersion.set("${version}")
configurations = [project.configurations.runtimeClasspath]
dependencies {
include(dependency('org.scala-lang:scala-library:'))
include(dependency('com.fasterxml.jackson.core::'))
include(dependency('com.fasterxml.jackson.module::'))
include(dependency('com.fasterxml.jackson.dataformat::'))
include(dependency('com.squareup.okhttp3::'))
include(dependency('com.squareup.okio::'))
include(dependency('com.thoughtworks.paranamer::'))
include(dependency('org.jetbrains.kotlin:kotlin-reflect:1.9.10')) // java -jar依赖,本地调试无需
include(dependency('org.jetbrains.kotlin:kotlin-stdlib:1.9.10')) // java -jar依赖,本地调试无需
}
manifest {
attributes 'Main-Class': 'YarnJobCount'
}
}
Apps.scala
case class Apps(app: Seq[YarnApplication])
case class YarnJobCount(
id: String,
user: String,
name: String,
state: String,
finalStatus: String,
applicationType: String,
startedTime: Long,
launchTime: Long,
finishedTime: Long,
elapsedTime: Long
)
import java.time.LocalDateTime
case class Interval(
startStr: String
, endStr: String
, startTs: Long
, endTs: Long
, startDt: LocalDateTime
, endDt: LocalDateTime
)
import java.io.{File, PrintWriter}
import com.fasterxml.jackson.databind.{DeserializationFeature, JsonNode, ObjectMapper}
import com.fasterxml.jackson.databind.json.JsonMapper
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import okhttp3.{OkHttpClient, Request}
import java.time.{LocalDateTime, ZoneOffset}
import java.time.format.DateTimeFormatter
import scala.collection.mutable
/**
* java -jar yarn-job-stat-1.0-shadow.jar "yarn-api:8088" "2024-11-14 00:00:00" "2024-11-15 00:00:00"
*/
object YarnJobAnalyse {
val MONTH = "m"
val DAY = "d"
val HOUR = "h"
val MODES: Seq[String] = Seq(MONTH, DAY, HOUR)
var mode = DAY
val df: DateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")
val fileName = "yarn-job-stat.csv"
var startTime: String = "2024-11-14 00:00:00"
var endTime: String = "2024-11-15 00:00:00"
var currHost: String = null // like "yarn-api:8088"
var dayList: mutable.ListBuffer[Interval] = mutable.ListBuffer.empty
var mapper: ObjectMapper = JsonMapper.builder()
.addModule(DefaultScalaModule)
.build()
mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
val client = new OkHttpClient()
def toMilli(dt: String): Long = {
val lt = LocalDateTime.parse(dt, df)
val ins = lt.toInstant(ZoneOffset.of("+08:00"))
val milli = ins.toEpochMilli
milli
}
def toMilli(dt: LocalDateTime): Long = {
val ins = dt.toInstant(ZoneOffset.of("+08:00"))
val milli = ins.toEpochMilli
milli
}
def addTime(ld:LocalDateTime):LocalDateTime={
mode match{
case MONTH=>ld.plusMonths(1)
case DAY=>ld.plusDays(1)
case HOUR=>ld.plusHours(1)
case _=>throw new IllegalArgumentException(s"unsupported interval ${mode}!")
}
}
def initTime(): Unit = {
val dtStart: LocalDateTime = LocalDateTime.parse(startTime, df)
val dtEnd: LocalDateTime = LocalDateTime.parse(endTime, df)
var tmp0: LocalDateTime = dtStart
var tmp1: LocalDateTime = addTime(tmp0)
while (!tmp1.isAfter(dtEnd)) {
val iv = Interval(tmp0.toString, tmp1.toString, toMilli(tmp0), toMilli(tmp1), tmp0, tmp1)
dayList.append(iv)
tmp0 = tmp1
tmp1 = addTime(tmp1)
}
}
def parse(t: Interval, sb: StringBuilder): Unit = {
val start = t.startTs
val end = t.endTs
val url = s"http://${currHost}/ws/v1/cluster/apps?startedTimeBegin=${start}&startedTimeEnd=${end}"
println(s"Ready to query url: ${url}")
val request = new Request.Builder()
.url(url)
.get()
.build()
val response = client.newCall(request).execute()
val respStr = response.body().string()
// println(s"response: \n${respStr}")
val node: JsonNode = mapper.readTree(respStr).get("apps")
val apps = mapper.treeToValue(node, classOf[Apps])
if (apps != null && apps.app != null) {
// println(s"apps count:${apps.app.size}")
val jobStateCount = apps.app.toList.groupBy(app => app.finalStatus).map(kv => (kv._1, kv._2.size)).toList.sortBy(kv => kv._2).reverse
// jobStateCount.foreach(e => println(s"state:${e._1}, count:${e._2}"))
jobStateCount.foreach(e => sb.append(s"${t.startStr},${t.endStr},${e._1},${e._2}\n"))
}
}
def write(fileName: String, fileContext: String): Unit = {
val writer = new PrintWriter(new File("./" + fileName))
writer.write(fileContext)
writer.close()
println("write complete!")
}
def main(args: Array[String]): Unit = {
assert(args.length == 4)
mode = args(0)
assert(MODES.contains(mode), s"mode should be ${MODES.toString()},but got ${mode}")
currHost = args(1)
startTime = args(2)
endTime = args(3)
println(s"yarn url:${currHost},start:${startTime},end:${endTime}")
initTime()
val sb: StringBuilder = new StringBuilder()
sb.append("start,end,state,count\n")
if (this.dayList.nonEmpty) {
this.dayList.foreach(p => parse(p, sb))
}
write(fileName, sb.toString())
}
}
三、使用
# d就是day!
java -jar yarn-job-stat-1.0-shadow.jar "d" "yarn-api:8088" "2024-11-14 00:00:00" "2024-11-18 00:00:00"