在同一个 ByteBufAllocator
去分配内存,并且管理者同一块内存,如果存在并发申请或释放,netty 是如何设计内存池的并发特性的呢?
本文基于 4.1.38.Final 版本
PoolThreadLocalCache
类定义:
final class PoolThreadCache {
private static final InternalLogger logger = InternalLoggerFactory.getInstance(PoolThreadCache.class);
final PoolArena<byte[]> heapArena; // 关联的堆内存Arena
final PoolArena<ByteBuffer> directArena; // 关联的直接内存Arena
// 按照申请维度,缓存各种不同内存,极小、小、正常三种
private final MemoryRegionCache<byte[]>[] tinySubPageHeapCaches;
private final MemoryRegionCache<byte[]>[] smallSubPageHeapCaches;
private final MemoryRegionCache<ByteBuffer>[] tinySubPageDirectCaches;
private final MemoryRegionCache<ByteBuffer>[] smallSubPageDirectCaches;
private final MemoryRegionCache<byte[]>[] normalHeapCaches;
private final MemoryRegionCache<ByteBuffer>[] normalDirectCaches;
// Used for bitshifting when calculate the index of normal caches later
private final int numShiftsNormalDirect;
private final int numShiftsNormalHeap;
private final int freeSweepAllocationThreshold; // 默认大小为8192
private final AtomicBoolean freed = new AtomicBoolean();
// 分配数
private int allocations;
- 上面几种创建的caches缓存,分别和
numTinySubpagePools
、numSmallSubpagePools
对应,在前面文章有分析过,PoolChunk中分配小内存时候,会将每个page进一步分小,分配时会维护者三个链表来引用。这里取的大小正是PoolArena
中数值。
在 PoolThreadCache
构造方法中,会将 smallSubPageDirectCaches
、smallSubPageDirectCaches
、normalDirectCaches
等初始化:
以直接内存为例:
tinySubPageDirectCaches = createSubPageCaches(
tinyCacheSize, PoolArena.numTinySubpagePools, SizeClass.Tiny);
smallSubPageDirectCaches = createSubPageCaches(
smallCacheSize, directArena.numSmallSubpagePools, SizeClass.Small);
numShiftsNormalDirect = log2(directArena.pageSize);
normalDirectCaches = createNormalCaches(
normalCacheSize, maxCachedBufferCapacity, directArena);
directArena.numThreadCaches.getAndIncrement();
以 createSubPageCaches
为例:
private static <T> MemoryRegionCache<T>[] createSubPageCaches(
int cacheSize, int numCaches, SizeClass sizeClass) {
if (cacheSize > 0 && numCaches > 0) {
@SuppressWarnings("unchecked")
MemoryRegionCache<T>[] cache = new MemoryRegionCache[numCaches];
for (int i = 0; i < cache.length; i++) {
// TODO: maybe use cacheSize / cache.length
cache[i] = new SubPageMemoryRegionCache<T>(cacheSize, sizeClass);
}
return cache;
} else {
return null;
}
}
createSubPageCaches
中初始化了 numCaches
个大小为 cacheSize
的内存。
MemoryRegionCache
private abstract static class MemoryRegionCache<T> {
private final int size; // cache大小
private final Queue<Entry<T>> queue; // 缓存队列
private final SizeClass sizeClass; // 类型大小
private int allocations; // 分配数
MemoryRegionCache(int size, SizeClass sizeClass) {
this.size = MathUtil.safeFindNextPositivePowerOfTwo(size);
queue = PlatformDependent.newFixedMpscQueue(this.size);
this.sizeClass = sizeClass;
}
MemoryRegionCache
实际上,是对当前线程使用的每一种规格的内存,进行了缓存。这样当再次访问,就不需要再次向 PoolArena
申请。
Entry
Entry 则又是用的对象池来避免多次创建对象。
构造方法:
static final class Entry<T> {
final Handle<Entry<?>> recyclerHandle; // 对象池handler
PoolChunk<T> chunk; // 当前分配的PoolChunk
ByteBuffer nioBuffer;
long handle = -1;
Entry(Handle<Entry<?>> recyclerHandle) {
this.recyclerHandle = recyclerHandle;
}
void recycle() {
chunk = null;
nioBuffer = null;
handle = -1;
recyclerHandle.recycle(this);
}
}
所以在 往 MemoryRegionCache
的queue中offer时,调用newEntry 方法,则首先是从对象池中获取:
private static Entry newEntry(PoolChunk<?> chunk, ByteBuffer nioBuffer, long handle) {
Entry entry = RECYCLER.get();
entry.chunk = chunk;
entry.nioBuffer = nioBuffer;
entry.handle = handle;
return entry;
}
@SuppressWarnings("rawtypes")
private static final Recycler<Entry> RECYCLER = new Recycler<Entry>() {
@SuppressWarnings("unchecked")
@Override
protected Entry newObject(Handle<Entry> handle) {
return new Entry(handle);
}
};
构造 PoolThreadCache
如果使用 PooledByteBufAllocator
的 newDirectBuffer
方法:
protected ByteBuf newDirectBuffer(int initialCapacity, int maxCapacity) {
PoolThreadCache cache = threadCache.get();
PoolArena<ByteBuffer> directArena = cache.directArena;
final ByteBuf buf;
if (directArena != null) {
buf = directArena.allocate(cache, initialCapacity, maxCapacity);
} else {
buf = PlatformDependent.hasUnsafe() ?
UnsafeByteBufUtil.newUnsafeDirectByteBuf(this, initialCapacity, maxCapacity) :
new UnpooledDirectByteBuf(this, initialCapacity, maxCapacity);
}
return toLeakAwareBuffer(buf);
}
首先会从去threadCache中获取,如果获取不到,则只会创建UnpooledDirectByteBuf。
在 PoolByteBufAllocator
分配内存时,会从本地上下文线程中获取一份 PoolThreadCache
,而由 PoolThreadCache
中的PoolArena
类型的 directArea 进行分配。
@Override
protected ByteBuf newDirectBuffer(int initialCapacity, int maxCapacity) {
PoolThreadCache cache = threadCache.get(); // 获取 PoolThreadCache
PoolArena<ByteBuffer> directArena = cache.directArena;
final ByteBuf buf;
if (directArena != null) {
buf = directArena.allocate(cache, initialCapacity, maxCapacity);
} else {
// 没有就分配unpool内存
buf = PlatformDependent.hasUnsafe() ?
UnsafeByteBufUtil.newUnsafeDirectByteBuf(this, initialCapacity, maxCapacity) :
new UnpooledDirectByteBuf(this, initialCapacity, maxCapacity);
}
return toLeakAwareBuffer(buf);
}
所以无论如何,会从FastThreadLocal中 PoolThreadLocalCache
,没有就会调用 initialValue
初始化一个。
protected synchronized PoolThreadCache initialValue() {
final PoolArena<byte[]> heapArena = leastUsedArena(heapArenas); // 获取线程使用率最小的一个
final PoolArena<ByteBuffer> directArena = leastUsedArena(directArenas); // 获取线程使用率最小一个
final Thread current = Thread.currentThread();
if (useCacheForAllThreads || current instanceof FastThreadLocalThread) {
final PoolThreadCache cache = new PoolThreadCache(
heapArena, directArena, tinyCacheSize, smallCacheSize, normalCacheSize,
DEFAULT_MAX_CACHED_BUFFER_CAPACITY, DEFAULT_CACHE_TRIM_INTERVAL);
if (DEFAULT_CACHE_TRIM_INTERVAL_MILLIS > 0) {
final EventExecutor executor = ThreadExecutorMap.currentExecutor();
if (executor != null) {
executor.scheduleAtFixedRate(trimTask, DEFAULT_CACHE_TRIM_INTERVAL_MILLIS,
DEFAULT_CACHE_TRIM_INTERVAL_MILLIS, TimeUnit.MILLISECONDS);
}
}
return cache;
}
// No caching so just use 0 as sizes.
return new PoolThreadCache(heapArena, directArena, 0, 0, 0, 0, 0);
}
- 从
heapArenas
中获取 Arena中 线程数最少一个。默认的arena数量 根据 处理器和直接内存大小决定。例如12或者24个。 - 如果是普通线程,那么不使用线程共享内存,直接分。只有
FastThreadLocalThread
才有本地线程缓存的分配。
分配内存
获取 到 PoolThreadCache
后,再获取其 PoolArena
进行分配。
PoolArena
的 allocate
:
PooledByteBuf<T> allocate(PoolThreadCache cache, int reqCapacity, int maxCapacity) {
PooledByteBuf<T> buf = newByteBuf(maxCapacity); //
allocate(cache, buf, reqCapacity);
return buf;
}
- 调用
newByteBuf
获取一个PooledByteBuf
实例,PooledByteBuf
也是 通过对象池获取:
private static final Recycler<PooledUnsafeDirectByteBuf> RECYCLER = new Recycler<PooledUnsafeDirectByteBuf>() {
@Override
protected PooledUnsafeDirectByteBuf newObject(Handle<PooledUnsafeDirectByteBuf> handle) {
return new PooledUnsafeDirectByteBuf(handle, 0);
}
};
static PooledUnsafeDirectByteBuf newInstance(int maxCapacity) {
PooledUnsafeDirectByteBuf buf = RECYCLER.get();
buf.reuse(maxCapacity);
return buf;
}
- 调用
allocate
根据申请大小,看下一小节
allocate
PoolArena
的 allocate
方法:
private void allocate(PoolThreadCache cache, PooledByteBuf<T> buf, final int reqCapacity) {
final int normCapacity = normalizeCapacity(reqCapacity); // 根据申请量,来获取规格化内存
if (isTinyOrSmall(normCapacity)) { // capacity < pageSize // 申请小内存
int tableIdx;
PoolSubpage<T>[] table;
boolean tiny = isTiny(normCapacity);
if (tiny) { // < 512
if (cache.allocateTiny(this, buf, reqCapacity, normCapacity)) {
// was able to allocate out of the cache so move on
return;
}
tableIdx = tinyIdx(normCapacity);
table = tinySubpagePools;
} else {
if (cache.allocateSmall(this, buf, reqCapacity, normCapacity)) {
// was able to allocate out of the cache so move on
return;
}
tableIdx = smallIdx(normCapacity);
table = smallSubpagePools;
}
final PoolSubpage<T> head = table[tableIdx];
/**
* Synchronize on the head. This is needed as {@link PoolChunk#allocateSubpage(int)} and
* {@link PoolChunk#free(long)} may modify the doubly linked list as well.
*/
synchronized (head) {
final PoolSubpage<T> s = head.next;
if (s != head) {
assert s.doNotDestroy && s.elemSize == normCapacity;
long handle = s.allocate();
assert handle >= 0;
s.chunk.initBufWithSubpage(buf, null, handle, reqCapacity);
incTinySmallAllocation(tiny);
return;
}
}
synchronized (this) {
allocateNormal(buf, reqCapacity, normCapacity);
}
incTinySmallAllocation(tiny);
return;
}
if (normCapacity <= chunkSize) {
// 申请chunkSize内的内存。
if (cache.allocateNormal(this, buf, reqCapacity, normCapacity)) {
// was able to allocate out of the cache so move on
return;
}
synchronized (this) {
allocateNormal(buf, reqCapacity, normCapacity);
++allocationsNormal;
}
} else {
// 申请超大内存
// Huge allocations are never served via the cache so just call allocateHuge
allocateHuge(buf, reqCapacity);
}
}
- 根据reqCapacity,获取规格化内存,例如申请2b,那么根据PoolChunk中subPage,则转化后的 normCapacity 为16.
- 根据 对应normaCapacity ,去本地内存池变量中,申请不同规格内存。
但是,如果申请成功后,这一份内存,并不会加到对应的 MemoryRegionCache
中。
MemoryRegionCache 维护
从 add 方法往前看:
public final boolean add(PoolChunk<T> chunk, ByteBuffer nioBuffer, long handle) {
Entry<T> entry = newEntry(chunk, nioBuffer, handle);
boolean queued = queue.offer(entry);
if (!queued) {
// If it was not possible to cache the chunk, immediately recycle the entry
entry.recycle();
}
return queued;
}
最直接的地方,就是调用 PoolArena
的 free
方法时候:
void free(PoolChunk<T> chunk, ByteBuffer nioBuffer, long handle, int normCapacity, PoolThreadCache cache) {
if (chunk.unpooled) {
int size = chunk.chunkSize();
destroyChunk(chunk);
activeBytesHuge.add(-size);
deallocationsHuge.increment();
} else {
SizeClass sizeClass = sizeClass(normCapacity);
if (cache != null && cache.add(this, chunk, nioBuffer, handle, normCapacity, sizeClass)) {
// cached so not free it.
return;
}
freeChunk(chunk, handle, sizeClass, nioBuffer, false);
}
}
而对于free的调用,则在每一个ByteBuf 对应的release方法时候,都会调用PoolArena的free方法:
清除PoolThreadCache
PoolThreadCache
有个方法 free:
void free(boolean finalizer) {
// As free() may be called either by the finalizer or by FastThreadLocal.onRemoval(...) we need to ensure
// we only call this one time.
if (freed.compareAndSet(false, true)) {
int numFreed = free(tinySubPageDirectCaches, finalizer) +
free(smallSubPageDirectCaches, finalizer) +
free(normalDirectCaches, finalizer) +
free(tinySubPageHeapCaches, finalizer) +
free(smallSubPageHeapCaches, finalizer) +
free(normalHeapCaches, finalizer);
if (numFreed > 0 && logger.isDebugEnabled()) {
logger.debug("Freed {} thread-local buffer(s) from thread: {}", numFreed,
Thread.currentThread().getName());
}
// 释放 numThreadCaches 数量
if (directArena != null) {
directArena.numThreadCaches.getAndDecrement();
}
if (heapArena != null) {
heapArena.numThreadCaches.getAndDecrement();
}
}
}
主要功能是将 PoolThreadCache 里面内存缓存都清理掉,调用位置有两处:
- 垃圾回收,回收前执行
finalize
。 - 执行FastThreadLocal 的 remove,进行手动清除。
二者区别在于参数finalizer参数的真假性,如果为false,则会保留一些对象信息到对象池中,减少对象的创建。
总结
- 对于正常netty程序来讲,其实每一个reactor线程都可以绑定一个PoolArena进行操作,这样线程之间根本不会有竞争。
- 我比较担心一个问题,如果一个线程曾经申请了很多内存后面释放了,但是不结束,那么这个线程所占用内存池信息可能并不会减少,如果使用堆外内存,则不会触发oom,如果是堆内存,也可以算是一种内存泄漏。
关注博主公众号: 六点A君。
哈哈哈,一起研究Netty: