glibc下malloc与free的实现原理(三):free
函数的实现
一、概述
在之前对malloc
的分析中,我们实际上大概对free
所要进行的工作有了一点模糊的感觉了,这一节,我们将通过分析free
函数相关源代码来搞清楚free
的具体机制。
当然,与malloc
一样,free
函数对应的是__libc_free
函数,而这个函数的核心则是_int_free
函数。
_int_free
函数的源代码地址:https://github.com/iromise/glibc/blob/master/malloc/malloc.c#L3616
_int_free
的函数头是_int_free(mstate av, mchunkptr p, int have_lock)
由于__libc_free
在调用_int_free
函数时,第三个参数是0,因此我们先从这个参数为0的情况下开始讲起。
二、__int_free
函数
0x00 基本变量定义
static void _int_free(mstate av, mchunkptr p, int have_lock) {
INTERNAL_SIZE_T size; /* its size */
mfastbinptr * fb; /* associated fastbin */
mchunkptr nextchunk; /* next contiguous chunk */
INTERNAL_SIZE_T nextsize; /* its size */
int nextinuse; /* true if nextchunk is used */
INTERNAL_SIZE_T prevsize; /* size of previous contiguous chunk */
mchunkptr bck; /* misc temp for linking */
mchunkptr fwd; /* misc temp for linking */
const char *errstr = NULL;
int locked = 0;
size = chunksize(p);
这些变量在接下来的代码中会使用到,不再赘述。
0x01 一些基础检查
/* Little security check which won't hurt performance: the
allocator never wrapps around at the end of the address space.
Therefore we can exclude some size values which might appear
here by accident or by "design" from some intruder. */
/* 一些小型的安全检查不会影响性能:
分配器从来不会从地址空间的终止处进行包装。
据此,我们就可以排除一些可能意外出现在这里的被攻击者“设计过”的size*/
if (__builtin_expect((uintptr_t) p > (uintptr_t) -size, 0) ||
__builtin_expect(misaligned_chunk(p), 0)) {
errstr = "free(): invalid pointer";
errout:
if (!have_lock && locked) __libc_lock_unlock(av->mutex);
malloc_printerr(check_action, errstr, chunk2mem(p), av);
return;
}
/* We know that each chunk is at least MINSIZE bytes in size or a
multiple of MALLOC_ALIGNMENT. */
/* 我们知道每个chunk的size大小至少为MINSIZE
同时,每个chunk的size是MALLOC_ALIGNMENT的整数倍*/
if (__glibc_unlikely(size < MINSIZE || !aligned_OK(size))) {
errstr = "free(): invalid size";
goto errout;
}
check_inuse_chunk(av, p);
也不再赘述
0x02 尝试将chunk放入fastbins
1. 检查
/*
If eligible, place chunk on a fastbin so it can be found
and used quickly in malloc.
*/
/* 如果这个chunk合适,就把它放入fastbin中,可以让它在malloc中更快的被使用*/
if ((unsigned long) (size) <= (unsigned long) (get_max_fast())
#if TRIM_FASTBINS
/*
If TRIM_FASTBINS set, don't place chunks
bordering top into fastbins
*/
/*
默认TRIM_FASTBINS就是0,所以这段代码默认是不执行的
也就是说if一般情况下只判断第一个条件。
*/
&& (chunk_at_offset(p, size) != av->top)
#endif
) {
if (__builtin_expect(
chunksize_nomask(chunk_at_offset(p, size)) <= 2 * SIZE_SZ, 0) ||
__builtin_expect(
chunksize(chunk_at_offset(p, size)) >= av->system_mem, 0)) {
/* We might not have a lock at this point and concurrent
modifications
of system_mem might have let to a false positive. Redo the test
after getting the lock. */
/* 我们在这个地方可能没有进行锁定,且当前对于system_mem的修改可能在一个错误的位置
上锁后会进行重新尝试。 */
if (have_lock || ({
assert(locked == 0);
__libc_lock_lock(av->mutex);
locked = 1;
chunksize_nomask(chunk_at_offset(p, size)) <= 2 * SIZE_SZ ||
chunksize(chunk_at_offset(p, size)) >= av->system_mem;
})) {
errstr = "free(): invalid next size (fast)";
goto errout;
}
if (!have_lock) {
__libc_lock_unlock(av->mutex);
locked = 0;
}
}
free_perturb(chunk2mem(p), size - 2 * SIZE_SZ);
我想,哪怕无法理解为什么做这些检查,至少也要明白检查的内容是什么。
比起关注代码的原理,代码是怎么写的对我们更重要。
2. 将chunk放入fastbins
set_fastchunks(av);
unsigned int idx = fastbin_index(size);
fb = &fastbin(av, idx);
/* Atomically link P to its fastbin: P->FD = *FB; *FB = P; */
mchunkptr old = *fb, old2;
unsigned int old_idx = ~0u;
do {
/* Check that the top of the bin is not the record we are going to
add
(i.e., double free). */
/* 检查这个bin的最顶部的chunk,确认它不是我们想要新加入的chunk
用于防止double free(尽管并没有什么卵用——编者注) */
if (__builtin_expect(old == p, 0)) {
errstr = "double free or corruption (fasttop)";
goto errout;
}
/* Check that size of fastbin chunk at the top is the same as
size of the chunk that we are adding. We can dereference OLD
only if we have the lock, otherwise it might have already been
deallocated. See use of OLD_IDX below for the actual check. */
/* 检查这个fastbin顶端的chunk的size是否与我们想要加入的chunk的size相同
仅在有lock的情况下,我们可以间接引用OLD,否则它可能已经被释放了。
看一下下面为准确的chunk对OLD_IDX的使用 */
if (have_lock && old != NULL)
old_idx = fastbin_index(chunksize(old));
p->fd = old2 = old;
} while ((old = catomic_compare_and_exchange_val_rel(fb, p, old2)) !=
old2);
if (have_lock && old != NULL && __builtin_expect(old_idx != idx, 0)) {
errstr = "invalid fastbin entry (free)";
goto errout;
}
}
set_fastchunks(av);
涉及到的宏定义为:#define set_fastchunks(M) catomic_and(&(M)->flags, ~FASTCHUNKS_BIT)
,也就是让malloc_state
中flag
中标记是否含有fastbins
的标志位为0(参见第一篇)
然后是获取适配于被释放的chunk的size的fastbin
的下标和指针
mchunkptr old = *fb, old2;
初看到这个语句的时候还没反应过来,原来是声明了俩变量
old
和old2
,同时给old
赋值
接下来考虑这个do-while循环,这里还是有一个catomic_compare_and_exchange_val_rel
宏函数,我们在第二篇中讲到了这个宏函数,可以回去看一看
第一次循环开始前:
第一次循环结束时(没有执行while头)
第二次循环开始前(刚执行完while头)
这里,old2
与old
相等,因此循环结束,可以看到,p指向的chunk已经被放入了对应的fastbin
中
0x03 尝试合并物理相邻的chunk并加入unsorted bin
1. 检查
/*
Consolidate other non-mmapped chunks as they arrive.
*/
/*
合并其他不是通过mmap声明的chunk
*/
else if (!chunk_is_mmapped(p)) {
if (!have_lock) {
__libc_lock_lock(av->mutex);
locked = 1;
}
nextchunk = chunk_at_offset(p, size);
/* Lightweight tests: check whether the block is already the
top block. */
/* 轻量级测试:检查chunk是否已经是top chunk */
if (__glibc_unlikely(p == av->top)) {
errstr = "double free or corruption (top)";
goto errout;
}
/* Or whether the next chunk is beyond the boundaries of the arena. */
/* 以及下一个chunk是否超出了arena的边界 */
if (__builtin_expect(contiguous(av) &&
(char *) nextchunk >=
((char *) av->top + chunksize(av->top)),
0)) {
errstr = "double free or corruption (out)";
goto errout;
}
/* Or whether the block is actually not marked used. */
/* 还有下一个chunk的prev_inuse位是否被标记为used */
if (__glibc_unlikely(!prev_inuse(nextchunk))) {
errstr = "double free or corruption (!prev)";
goto errout;
}
nextsize = chunksize(nextchunk);
if (__builtin_expect(chunksize_nomask(nextchunk) <= 2 * SIZE_SZ, 0) ||
__builtin_expect(nextsize >= av->system_mem, 0)) {
errstr = "free(): invalid next size (normal)";
goto errout;
}
free_perturb(chunk2mem(p), size - 2 * SIZE_SZ);
2. 合并
/* consolidate backward */
/* 反向合并 */
if (!prev_inuse(p)) {
prevsize = prev_size(p);
size += prevsize;
p = chunk_at_offset(p, -((long) prevsize));
unlink(av, p, bck, fwd);
}
if (nextchunk != av->top) {
/* get and clear inuse bit */
/* 获取核清除inuse标志位 */
nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
/* consolidate forward */
/* 正向合并 */
if (!nextinuse) {
unlink(av, nextchunk, bck, fwd);
size += nextsize;
} else
clear_inuse_bit_at_offset(nextchunk, 0);
/*
Place the chunk in unsorted chunk list. Chunks are
not placed into regular bins until after they have
been given one chance to be used in malloc.
*/
/*
将chunk放入unsorted bin
chunk并不会被放入它本该被放入的bin
直到它们在malloc中有了一次被使用的机会
(通过此前对malloc的分析我们得知,malloc函数会对unsorted bin进行一次大处理)
*/
// insert the unsorted chunk at the end of the unsorted bin
// First In First Out
// 从unsorted bin的末尾处插入chunk,unsorted bin是一个FIFO表
bck = unsorted_chunks(av);
fwd = bck->fd;
if (__glibc_unlikely(fwd->bk != bck)) {
errstr = "free(): corrupted unsorted chunks";
goto errout;
}
p->fd = fwd;
p->bk = bck;
if (!in_smallbin_range(size)) {
p->fd_nextsize = NULL;
p->bk_nextsize = NULL;
}
bck->fd = p;
fwd->bk = p;
set_head(p, size | PREV_INUSE);
set_foot(p, size);
check_free_chunk(av, p);
}
/*
If the chunk borders the current high end of memory,
consolidate into top
*/
/*
如果chunk的边界已经触及了当前内存空间的最底部,
就直接将该chunk设定为新的top chunk
*/
else {
size += nextsize;
set_head(p, size | PREV_INUSE);
av->top = p;
check_chunk(av, p);
}
- 如果物理相邻的上一个chunk被标记为free,那么与物理相邻的上一个chunk合并
- 如果物理相邻的下一个chunk的下一个chunk标记这个chunk是free chunk,那么p与物理相邻的下一个chunk合并
- 如果物理相邻的下一个chunk是top chunk,那么直接并入top chunk
0x04 对过大chunk的处理
/*
If freeing a large space, consolidate possibly-surrounding
chunks. Then, if the total unused topmost memory exceeds trim
threshold, ask malloc_trim to reduce top.
Unless max_fast is 0, we don't know if there are fastbins
bordering top, so we cannot tell for sure whether threshold
has been reached unless fastbins are consolidated. But we
don't want to consolidate on each free. As a compromise,
consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
is reached.
*/
// #define FASTBIN_CONSOLIDATION_THRESHOLD (65536UL) 0x10000
/*
如果是在free一个large space, 就会合并周边的chunk
然后,如果总的unused最顶端的内存超过trim门槛/临界值请malloc_trim来减去top
除非max_fast是0,否则我们不知道是否有与top相邻的fastbins, 所以我们
无法确定有没有触及到这个临界点,除非fastbins被合并了。
但是我们并不希望对每一个free chunk都进行合并
所以就有了一个折中的方案,如果达到了FASTBIN_CONSOLIDATION_THRESHOLD再进行合并操作
(自翻还不如机翻, 翻译的狗屁不通, 这段以具体代码分析为准)
*/
if ((unsigned long) (size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
if (have_fastchunks(av)) malloc_consolidate(av);
if (av == &main_arena) {
#ifndef MORECORE_CANNOT_TRIM
if ((unsigned long) (chunksize(av->top)) >=
(unsigned long) (mp_.trim_threshold))
systrim(mp_.top_pad, av);
#endif
} else {
/* Always try heap_trim(), even if the top chunk is not
large, because the corresponding heap might go away. */
heap_info *heap = heap_for_ptr(top(av));
assert(heap->ar_ptr == av);
heap_trim(heap, mp_.top_pad);
}
}
if (!have_lock) {
assert(locked);
__libc_lock_unlock(av->mutex);
}
}
/*
If the chunk was allocated via mmap, release via munmap().
*/
else {
munmap_chunk(p);
}
}
三、__libc_free
函数
源代码地址:https://github.com/iromise/glibc/blob/master/malloc/malloc.c#L2798
void __libc_free(void *mem) {
mstate ar_ptr;
mchunkptr p; /* chunk corresponding to mem */
void (*hook)(void *, const void *) = atomic_forced_read(__free_hook);
if (__builtin_expect(hook != NULL, 0)) {
(*hook)(mem, RETURN_ADDRESS(0));
return;
}
if (mem == 0) /* free(0) has no effect */
return;
p = mem2chunk(mem);
if (chunk_is_mmapped(p)) /* release mmapped memory. */
{
/* See if the dynamic brk/mmap threshold needs adjusting.
Dumped fake mmapped chunks do not affect the threshold. */
if (!mp_.no_dyn_threshold && chunksize_nomask(p) > mp_.mmap_threshold &&
chunksize_nomask(p) <= DEFAULT_MMAP_THRESHOLD_MAX &&
!DUMPED_MAIN_ARENA_CHUNK(p)) {
mp_.mmap_threshold = chunksize(p);
mp_.trim_threshold = 2 * mp_.mmap_threshold;
LIBC_PROBE(memory_mallopt_free_dyn_thresholds, 2,
mp_.mmap_threshold, mp_.trim_threshold);
}
munmap_chunk(p);
return;
}
ar_ptr = arena_for_chunk(p);
_int_free(ar_ptr, p, 0);
}
四、其他相关函数
0x00 do_check_inuse_check
函数
static void do_check_inuse_chunk(mstate av, mchunkptr p) {
mchunkptr next;
do_check_chunk(av, p);
if (chunk_is_mmapped(p)) return; /* mmapped chunks have no next/prev */
/* Check whether it claims to be in use ... */
assert(inuse(p));
next = next_chunk(p);
/* ... and is surrounded by OK chunks.
Since more things can be checked with free chunks than inuse ones,
if an inuse chunk borders them and debug is on, it's worth doing them.
*/
if (!prev_inuse(p)) {
/* Note that we cannot even look at prev unless it is not inuse */
mchunkptr prv = prev_chunk(p);
assert(next_chunk(prv) == p);
do_check_free_chunk(av, prv);
}
if (next == av->top) {
assert(prev_inuse(next));
assert(chunksize(next) >= MINSIZE);
} else if (!inuse(next))
do_check_free_chunk(av, next);
}
这个函数也只是做了一些检查
可以看到,do_check_inuse_chunk
函数又调用了do_check_chunk
函数,可以先去查看下面对其源代码的分析。
0x01 do_check_chunk
函数
static void do_check_chunk(mstate av, mchunkptr p) {
unsigned long sz = chunksize(p);
/* min and max possible addresses assuming contiguous allocation */
char *max_address = (char *) (av->top) + chunksize(av->top);
char *min_address = max_address - av->system_mem;
if (!chunk_is_mmapped(p)) {
/* Has legal address ... */
if (p != av->top) {
if (contiguous(av)) {
assert(((char *) p) >= min_address);
assert(((char *) p + sz) <= ((char *) (av->top)));
}
} else {
/* top size is always at least MINSIZE */
assert((unsigned long) (sz) >= MINSIZE);
/* top predecessor always marked inuse */
assert(prev_inuse(p));
}
} else if (!DUMPED_MAIN_ARENA_CHUNK(p)) {
/* address is outside main heap */
if (contiguous(av) && av->top != initial_top(av)) {
assert(((char *) p) < min_address || ((char *) p) >= max_address);
}
/* chunk is page-aligned */
assert(((prev_size(p) + sz) & (GLRO(dl_pagesize) - 1)) == 0);
/* mem is aligned */
assert(aligned_OK(chunk2mem(p)));
}
}
这个函数主要是对传入的malloc_state
指针和malloc_chunk
指针进行检查
0x02 systrim
函数
/*
systrim is an inverse of sorts to sysmalloc. It gives memory back
to the system (via negative arguments to sbrk) if there is unused
memory at the `high' end of the malloc pool. It is called
automatically by free() when top space exceeds the trim
threshold. It is also called by the public malloc_trim routine. It
returns 1 if it actually released any memory, else 0.
*/
static int systrim(size_t pad, mstate av) {
long top_size; /* Amount of top-most memory */
long extra; /* Amount to release */
long released; /* Amount actually released */
char * current_brk; /* address returned by pre-check sbrk call */
char * new_brk; /* address returned by post-check sbrk call */
size_t pagesize;
long top_area;
pagesize = GLRO(dl_pagesize);
top_size = chunksize(av->top);
top_area = top_size - MINSIZE - 1;
if (top_area <= pad) return 0;
/* Release in pagesize units and round down to the nearest page. */
extra = ALIGN_DOWN(top_area - pad, pagesize);
if (extra == 0) return 0;
/*
Only proceed if end of memory is where we last set it.
This avoids problems if there were foreign sbrk calls.
*/
current_brk = (char *) (MORECORE(0));
if (current_brk == (char *) (av->top) + top_size) {
/*
Attempt to release memory. We ignore MORECORE return value,
and instead call again to find out where new end of memory is.
This avoids problems if first call releases less than we asked,
of if failure somehow altered brk value. (We could still
encounter problems if it altered brk in some very bad way,
but the only thing we can do is adjust anyway, which will cause
some downstream failure.)
*/
MORECORE(-extra);
/* Call the `morecore' hook if necessary. */
void (*hook)(void) = atomic_forced_read(__after_morecore_hook);
if (__builtin_expect(hook != NULL, 0)) (*hook)();
new_brk = (char *) (MORECORE(0));
LIBC_PROBE(memory_sbrk_less, 2, new_brk, extra);
if (new_brk != (char *) MORECORE_FAILURE) {
released = (long) (current_brk - new_brk);
if (released != 0) {
/* Success. Adjust top. */
av->system_mem -= released;
set_head(av->top, (top_size - released) | PREV_INUSE);
check_malloc_state(av);
return 1;
}
}
}
return 0;
}