1.
Welcome to
____ __
/ __/__ ___ _____/ /__
_\ \/ _ \/ _ `/ __/ '_/
/___/ .__/\_,_/_/ /_/\_\ version 3.1.1
/_/
Using Scala version 2.12.10 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_231)
Type in expressions to have them evaluated.
Type :help for more information.
3个节点 8gb(主节点)+4gb(1)+4gb(2)
[root@master huangtest]# cat 1gfortest.scala
import org.apache.spark.sql.SparkSession
import java.lang.management.ManagementFactory
object MemoryConsumer {
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder()
.appName("MemoryConsumer")
.config("spark.driver.memory", "2g") // 预留额外内存给Spark系统
.getOrCreate()
// 内存监控工具
val mxBean = ManagementFactory.getMemoryMXBean
val runtime = Runtime.getRuntime
// 目标消耗1GB内存 (实际会略超)
val targetBytes = 1024L * 1024 * 1024
val chunkSize = 100 * 1024 * 1024 // 每次分配100MB
var totalAllocated = 0L
val memoryHog = scala.collection.mutable.ArrayBuffer[Array[Byte]]()
println(s"=== 开始内存消耗测试 ===")
println(s"JVM最大内存: ${runtime.maxMemory() / 1024 / 1024}MB")
println(s"初始堆内存: ${mxBean.getHeapMemoryUsage.getUsed / 1024 / 1024}MB")
println(s"初始非堆内存: ${mxBean.getNonHeapMemoryUsage.getUsed / 1024 / 1024}MB")
try {
while (totalAllocated < targetBytes) {
// 分配字节数组并填充数据
val chunk = new Array[Byte](chunkSize)
java.util.Arrays.fill(chunk, 1.toByte) // 确保实际占用物理内存
memoryHog += chunk
totalAllocated += chunkSize
// 打印当前状态
val heapUsed = mxBean.getHeapMemoryUsage.getUsed
val nonHeapUsed = mxBean.getNonHeapMemoryUsage.getUsed
println(f"[进度] 已分配: ${totalAllocated / 1024 / 1024}%4dMB | " +
f"堆内存: ${heapUsed / 1024 / 1024}%4dMB | " +
f"非堆: ${nonHeapUsed / 1024 / 1024}%3dMB | " +
f"总RSS: ${getProcessRSS / 1024}%5dMB")
Thread.sleep(500) // 放慢速度观察
}
println("=== 测试成功完成 ===")
} catch {
case e: OutOfMemoryError =>
println(s"!!! 内存溢出 !!! 最终分配: ${totalAllocated / 1024 / 1024}MB")
} finally {
spark.stop()
}
}
// 获取进程实际物理内存使用(RSS)
def getProcessRSS: Long = {
val pid = ManagementFactory.getRuntimeMXBean.getName.split("@")(0)
val rss = scala.io.Source.fromFile(s"/proc/$pid/statm").mkString.split(" ")(1)
rss.toLong * 4096 // 转换为字节
}
}
// 执行程序
MemoryConsumer.main(Array.empty)
[root@master huangtest]#
2.
修改
spark-shell --driver-memory 2g --conf spark.driver.maxResultSize=1g
:load /home/huangtest/1gfortest.scala
3.测试结果
scala> :load /home/huangtest/1gfortest.scala
Loading /home/huangtest/1gfortest.scala...
import org.apache.spark.sql.SparkSession
import java.lang.management.ManagementFactory
defined object MemoryConsumer
2025-08-06 15:20:13,922 WARN sql.SparkSession$Builder: Using an existing SparkSession; some spark core configurations may not take effect.
=== 开始内存消耗测试 ===
JVM最大内存: 1820MB
初始堆内存: 188MB
初始非堆内存: 137MB
[进度] 已分配: 100MB | 堆内存: 288MB | 非堆: 136MB | 总RSS: 682240MB
[进度] 已分配: 200MB | 堆内存: 299MB | 非堆: 136MB | 总RSS: 841036MB
[进度] 已分配: 300MB | 堆内存: 404MB | 非堆: 136MB | 总RSS: 841036MB
[进度] 已分配: 400MB | 堆内存: 491MB | 非堆: 136MB | 总RSS: 1043976MB
[进度] 已分配: 500MB | 堆内存: 596MB | 非堆: 135MB | 总RSS: 1043976MB
[进度] 已分配: 600MB | 堆内存: 691MB | 非堆: 135MB | 总RSS: 1248968MB
[进度] 已分配: 700MB | 堆内存: 797MB | 非堆: 135MB | 总RSS: 1248968MB
[进度] 已分配: 800MB | 堆内存: 892MB | 非堆: 135MB | 总RSS: 1461948MB
[进度] 已分配: 900MB | 堆内存: 1001MB | 非堆: 134MB | 总RSS: 1454624MB
[进度] 已分配: 1000MB | 堆内存: 1091MB | 非堆: 134MB | 总RSS: 1659492MB
[进度] 已分配: 1100MB | 堆内存: 1198MB | 非堆: 134MB | 总RSS: 1659492MB
=== 测试成功完成 ===
2.1测试内存的分配
[root@master huangtest]# cat detailMemory2.scala
import org.apache.spark.sql.SparkSession
import java.lang.management.{ManagementFactory, MemoryPoolMXBean, MemoryType, MemoryMXBean}
import scala.collection.JavaConverters._
object DetailedMemoryTracker {
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder()
.appName("DetailedMemoryTracker")
.config("spark.driver.memory", "2g")
.config("spark.driver.extraJavaOptions",
"-XX:+PrintGCDetails -XX:+PrintGCTimeStamps")
.getOrCreate()
// 获取内存子系统组件
val mxBean = ManagementFactory.getMemoryMXBean
val memoryPools = ManagementFactory.getMemoryPoolMXBeans.asScala
val runtime = Runtime.getRuntime
// 打印初始内存状态
printInitialMemoryStats(mxBean, memoryPools, runtime)
// 定义内存分配测试
val targetMB = 500 // 目标分配500MB
val chunkSize = 50 * 1024 * 1024 // 每次50MB
val memoryHog = scala.collection.mutable.ArrayBuffer[Array[Byte]]()
println("\n=== 开始内存分配跟踪 ===")
try {
(1 to (targetMB / 50)).foreach { i =>
val chunk = new Array[Byte](chunkSize)
java.util.Arrays.fill(chunk, 1.toByte)
memoryHog += chunk
println(s"\n[分配批次 $i] 已分配 ${i * 50}MB")
printCurrentMemoryStats(mxBean, memoryPools, runtime)
Thread.sleep(1000) // 间隔1秒便于观察
}
println("=== 内存分配测试完成 ===")
} catch {
case e: OutOfMemoryError =>
println(s"\n!!! 内存溢出 !!! 错误: ${e.getMessage}")
println("当前内存状态:")
printCurrentMemoryStats(mxBean, memoryPools, runtime)
} finally {
spark.stop()
}
}
// 打印初始内存状态
def printInitialMemoryStats(
mxBean: MemoryMXBean,
pools: scala.collection.Seq[MemoryPoolMXBean],
runtime: Runtime
): Unit = {
println("\n=== 初始内存状态 ===")
println(s"JVM最大内存(Xmx): ${runtime.maxMemory() / 1024 / 1024}MB")
println(s"总物理内存: ${runtime.totalMemory() / 1024 / 1024}MB")
println(s"空闲内存: ${runtime.freeMemory() / 1024 / 1024}MB")
val heapUsage = mxBean.getHeapMemoryUsage
println("\n[堆内存初始状态]")
println(s"初始提交: ${heapUsage.getCommitted / 1024 / 1024}MB")
println(s"初始使用: ${heapUsage.getUsed / 1024 / 1024}MB")
println(s"最大容量: ${heapUsage.getMax / 1024 / 1024}MB")
val nonHeapUsage = mxBean.getNonHeapMemoryUsage
println("\n[非堆内存初始状态]")
println(s"初始提交: ${nonHeapUsage.getCommitted / 1024 / 1024}MB")
println(s"初始使用: ${nonHeapUsage.getUsed / 1024 / 1024}MB")
println("\n[各内存池详情]")
pools.foreach { pool =>
val usage = pool.getUsage
val poolType = if (pool.getType == MemoryType.HEAP) "堆" else "非堆"
println(s"${pool.getName} ($poolType):")
println(s" 提交: ${usage.getCommitted / 1024 / 1024}MB")
println(s" 使用: ${usage.getUsed / 1024 / 1024}MB")
if (pool.getType == MemoryType.HEAP) {
println(s" 最大: ${usage.getMax / 1024 / 1024}MB")
}
}
}
// 打印当前内存状态
def printCurrentMemoryStats(
mxBean: MemoryMXBean,
pools: scala.collection.Seq[MemoryPoolMXBean],
runtime: Runtime
): Unit = {
println("\n[当前内存概览]")
println(s"总分配: ${(runtime.totalMemory() - runtime.freeMemory()) / 1024 / 1024}MB")
println(s"空闲内存: ${runtime.freeMemory() / 1024 / 1024}MB")
val heapUsage = mxBean.getHeapMemoryUsage
println("\n[堆内存变化]")
println(s"当前使用: ${heapUsage.getUsed / 1024 / 1024}MB")
println(s"提交大小: ${heapUsage.getCommitted / 1024 / 1024}MB")
println("\n[各内存池变化]")
pools.foreach { pool =>
val usage = pool.getUsage
val usedMB = usage.getUsed / 1024 / 1024
if (usedMB > 0) {
println(s"${pool.getName}: ${usedMB}MB (${pool.getType})")
}
}
// 打印GC信息
ManagementFactory.getGarbageCollectorMXBeans.asScala.foreach { gc =>
println(s"GC[${gc.getName}]: 次数=${gc.getCollectionCount} 耗时=${gc.getCollectionTime}ms")
}
// 打印进程物理内存
println(s"\n[物理内存] RSS: ${getProcessRSS / 1024 / 1024}MB")
}
// 获取进程RSS内存 (Linux)
def getProcessRSS: Long = {
try {
val pid = ManagementFactory.getRuntimeMXBean.getName.split("@")(0)
scala.io.Source.fromFile(s"/proc/$pid/status")
.getLines()
.find(_.startsWith("VmRSS:"))
.map(_.split("\\s+").tail.head.toLong * 1024)
.getOrElse(0L)
} catch {
case _: Exception => 0L
}
}
}
// 执行程序
DetailedMemoryTracker.main(Array.empty)
[root@master huangtest]#
执行部分的
[root@master huangtest]# pwd
/home/huangtest
执行效果
scala> :load /home/huangtest/detailMemory2.scala
Loading /home/huangtest/detailMemory2.scala...
import org.apache.spark.sql.SparkSession
import java.lang.management.{ManagementFactory, MemoryPoolMXBean, MemoryType, MemoryMXBean}
import scala.collection.JavaConverters._
defined object DetailedMemoryTracker
2025-08-06 15:50:45,460 WARN sql.SparkSession$Builder: Using an existing SparkSession; some spark core configurations may not take effect.
=== 初始内存状态 ===
JVM最大内存(Xmx): 910MB
总物理内存: 438MB
空闲内存: 157MB
[堆内存初始状态]
初始提交: 438MB
初始使用: 281MB
最大容量: 910MB
[非堆内存初始状态]
初始提交: 154MB
初始使用: 146MB
[各内存池详情]
Code Cache (非堆):
提交: 48MB
使用: 48MB
Metaspace (非堆):
提交: 94MB
使用: 87MB
Compressed Class Space (非堆):
提交: 11MB
使用: 10MB
PS Eden Space (堆):
提交: 230MB
使用: 187MB
最大: 287MB
PS Survivor Space (堆):
提交: 23MB
使用: 18MB
最大: 23MB
PS Old Gen (堆):
提交: 185MB
使用: 75MB
最大: 683MB
=== 开始内存分配跟踪 ===
[分配批次 1] 已分配 50MB
[当前内存概览]
总分配: 163MB
空闲内存: 281MB
[堆内存变化]
当前使用: 163MB
提交大小: 445MB
[各内存池变化]
Code Cache: 48MB (Non-heap memory)
Metaspace: 87MB (Non-heap memory)
Compressed Class Space: 10MB (Non-heap memory)
PS Eden Space: 55MB (Heap memory)
PS Survivor Space: 30MB (Heap memory)
PS Old Gen: 77MB (Heap memory)
GC[PS Scavenge]: 次数=15 耗时=677ms
GC[PS MarkSweep]: 次数=3 耗时=1034ms
[物理内存] RSS: 670MB
[分配批次 2] 已分配 100MB
[当前内存概览]
总分配: 215MB
空闲内存: 230MB
[堆内存变化]
当前使用: 215MB
提交大小: 445MB
[各内存池变化]
Code Cache: 48MB (Non-heap memory)
Metaspace: 87MB (Non-heap memory)
Compressed Class Space: 10MB (Non-heap memory)
PS Eden Space: 107MB (Heap memory)
PS Survivor Space: 30MB (Heap memory)
PS Old Gen: 77MB (Heap memory)
GC[PS Scavenge]: 次数=15 耗时=677ms
GC[PS MarkSweep]: 次数=3 耗时=1034ms
[物理内存] RSS: 670MB
[分配批次 3] 已分配 150MB
[当前内存概览]
总分配: 265MB
空闲内存: 180MB
[堆内存变化]
当前使用: 265MB
提交大小: 445MB
[各内存池变化]
Code Cache: 48MB (Non-heap memory)
Metaspace: 87MB (Non-heap memory)
Compressed Class Space: 10MB (Non-heap memory)
PS Eden Space: 157MB (Heap memory)
PS Survivor Space: 30MB (Heap memory)
PS Old Gen: 77MB (Heap memory)
GC[PS Scavenge]: 次数=15 耗时=677ms
GC[PS MarkSweep]: 次数=3 耗时=1034ms
[物理内存] RSS: 670MB
[分配批次 4] 已分配 200MB
[当前内存概览]
总分配: 318MB
空闲内存: 127MB
[堆内存变化]
当前使用: 318MB
提交大小: 445MB
[各内存池变化]
Code Cache: 48MB (Non-heap memory)
Metaspace: 87MB (Non-heap memory)
Compressed Class Space: 10MB (Non-heap memory)
PS Eden Space: 210MB (Heap memory)
PS Survivor Space: 30MB (Heap memory)
PS Old Gen: 77MB (Heap memory)
GC[PS Scavenge]: 次数=15 耗时=677ms
GC[PS MarkSweep]: 次数=3 耗时=1034ms
[物理内存] RSS: 670MB
[分配批次 5] 已分配 250MB
[当前内存概览]
总分配: 350MB
空闲内存: 429MB
[堆内存变化]
当前使用: 350MB
提交大小: 780MB
[各内存池变化]
Code Cache: 48MB (Non-heap memory)
Metaspace: 87MB (Non-heap memory)
Compressed Class Space: 10MB (Non-heap memory)
PS Eden Space: 56MB (Heap memory)
PS Survivor Space: 8MB (Heap memory)
PS Old Gen: 285MB (Heap memory)
GC[PS Scavenge]: 次数=16 耗时=1452ms
GC[PS MarkSweep]: 次数=4 耗时=2135ms
[物理内存] RSS: 883MB
[分配批次 6] 已分配 300MB
[当前内存概览]
总分配: 400MB
空闲内存: 379MB
[堆内存变化]
当前使用: 400MB
提交大小: 780MB
[各内存池变化]
Code Cache: 48MB (Non-heap memory)
Metaspace: 87MB (Non-heap memory)
Compressed Class Space: 10MB (Non-heap memory)
PS Eden Space: 106MB (Heap memory)
PS Survivor Space: 8MB (Heap memory)
PS Old Gen: 285MB (Heap memory)
GC[PS Scavenge]: 次数=16 耗时=1452ms
GC[PS MarkSweep]: 次数=4 耗时=2135ms
[物理内存] RSS: 883MB
[分配批次 7] 已分配 350MB
[当前内存概览]
总分配: 450MB
空闲内存: 329MB
[堆内存变化]
当前使用: 450MB
提交大小: 780MB
[各内存池变化]
Code Cache: 48MB (Non-heap memory)
Metaspace: 87MB (Non-heap memory)
Compressed Class Space: 10MB (Non-heap memory)
PS Eden Space: 156MB (Heap memory)
PS Survivor Space: 8MB (Heap memory)
PS Old Gen: 285MB (Heap memory)
GC[PS Scavenge]: 次数=16 耗时=1452ms
GC[PS MarkSweep]: 次数=4 耗时=2135ms
[物理内存] RSS: 883MB
[分配批次 8] 已分配 400MB
[当前内存概览]
总分配: 500MB
空闲内存: 279MB
[堆内存变化]
当前使用: 500MB
提交大小: 780MB
[各内存池变化]
Code Cache: 48MB (Non-heap memory)
Metaspace: 87MB (Non-heap memory)
Compressed Class Space: 10MB (Non-heap memory)
PS Eden Space: 206MB (Heap memory)
PS Survivor Space: 8MB (Heap memory)
PS Old Gen: 285MB (Heap memory)
GC[PS Scavenge]: 次数=16 耗时=1452ms
GC[PS MarkSweep]: 次数=4 耗时=2135ms
[物理内存] RSS: 868MB
[分配批次 9] 已分配 450MB
[当前内存概览]
总分配: 550MB
空闲内存: 229MB
[堆内存变化]
当前使用: 550MB
提交大小: 780MB
[各内存池变化]
Code Cache: 48MB (Non-heap memory)
Metaspace: 87MB (Non-heap memory)
Compressed Class Space: 10MB (Non-heap memory)
PS Eden Space: 256MB (Heap memory)
PS Survivor Space: 8MB (Heap memory)
PS Old Gen: 285MB (Heap memory)
GC[PS Scavenge]: 次数=16 耗时=1452ms
GC[PS MarkSweep]: 次数=4 耗时=2135ms
[物理内存] RSS: 873MB
[分配批次 10] 已分配 500MB
[当前内存概览]
总分配: 602MB
空闲内存: 377MB
[堆内存变化]
当前使用: 602MB
提交大小: 979MB
[各内存池变化]
Code Cache: 48MB (Non-heap memory)
Metaspace: 87MB (Non-heap memory)
Compressed Class Space: 10MB (Non-heap memory)
PS Eden Space: 58MB (Heap memory)
PS Old Gen: 544MB (Heap memory)
GC[PS Scavenge]: 次数=17 耗时=2795ms
GC[PS MarkSweep]: 次数=5 耗时=2565ms
[物理内存] RSS: 1146MB
=== 内存分配测试完成 ===
scala>