glibc-2.23 sysmalloc函数代码分析

Suspend.

已于 2022-05-08 21:49:50 修改

阅读量886

点赞数

分类专栏： pwn入门 C语言文章标签：安全 c语言

于 2022-04-25 01:07:34 首次发布

本文链接：https://blog.youkuaiyun.com/qq_34010404/article/details/124395586

版权

pwn入门同时被 2 个专栏收录

18 篇文章

订阅专栏

C语言

4 篇文章

订阅专栏

文章目录

基础知识
什么时候会调用sysmalloc
sysmalloc 流程 :
总结

基础知识

heap 就是一块大的内存区域，非main_arena的话，heap是mmap创建的，main_arena是sbrk拓展得到的(拓展失败也会调用mmap) (由于main_arena的heap在内存开始处没有保存heap的info，所以和非main_arena的heap还有稍微有点区别的)
malloc_state 结构体是用来管理bins和heap的结构体，(top chunk 指向的就是一个heap的可用区域)。
当我们从一个arena上分配内存的时候，top chunk指向的就是当前的heap可用区域，那么之前用过的heap储存在哪里呢? 其实空闲的chunk都在bins里面，只要在bins里面，就有机会被使用，所以不需要记录其他的heap，只要记录当前的heap (top chunk)即可，这样从Bins里面找不到合适的chunk的时候，就要从top chunk里面分割了。

下面这段代码用于创建一个新的heap

//创建一个新的堆,页对齐
static heap_info * internal_function new_heap (size_t size, size_t top_pad)
{
    size_t pagesize = GLRO (dl_pagesize);
    char *p1, *p2;
    unsigned long ul;
    heap_info *h;
    if (size + top_pad < HEAP_MIN_SIZE)
      size = HEAP_MIN_SIZE;
    else if (size + top_pad <= HEAP_MAX_SIZE)
      size += top_pad;
    else if (size > HEAP_MAX_SIZE)
      return 0;
    else
      size = HEAP_MAX_SIZE;
    size = ALIGN_UP (size, pagesize);
    /* A memory region aligned to a multiple of HEAP_MAX_SIZE is needed.
     No swap space needs to be reserved for the following large
     mapping (on Linux, this is the case for all non-writable mappings
     anyway). */
    p2 = MAP_FAILED;
    if (aligned_heap_area)
    {
        p2 = (char *) MMAP (aligned_heap_area, HEAP_MAX_SIZE, PROT_NONE,
                          MAP_NORESERVE);
        aligned_heap_area = NULL;
        if (p2 != MAP_FAILED && ((unsigned long) p2 & (HEAP_MAX_SIZE - 1)))
        {
          //没有对齐,unmap掉内存.
          __munmap (p2, HEAP_MAX_SIZE);
          p2 = MAP_FAILED;
        }
    }
    if (p2 == MAP_FAILED)
    {
        //可能是为了提高HEAP_MAX_SIZE对齐的概率吧,先分配了两个MAX_SIZE的chunk
        p1 = (char *) MMAP (0, HEAP_MAX_SIZE << 1, PROT_NONE, MAP_NORESERVE);
        if (p1 != MAP_FAILED)
        {
          //p2是p1开始对齐HEAP_MAX_SIZE的地址
          p2 = (char *) (((unsigned long) p1 + (HEAP_MAX_SIZE - 1))
                         & ~(HEAP_MAX_SIZE - 1));
          //找到差值,这部分不会被使用,可以unmap了.
          ul = p2 - p1;
          if (ul)
            __munmap (p1, ul);
          else
            aligned_heap_area = p2 + HEAP_MAX_SIZE;
          //再把p2之后多余的部分unmap掉.
          __munmap (p2 + HEAP_MAX_SIZE, HEAP_MAX_SIZE - ul);
        }
        else
        {
          /* Try to take the chance that an allocation of only HEAP_MAX_SIZE
             is already aligned. */
          p2 = (char *) MMAP (0, HEAP_MAX_SIZE, PROT_NONE, MAP_NORESERVE);
          if (p2 == MAP_FAILED)
            return 0;
            //没有对齐HEAP_MAX_SIZE,看作是失败.
          if ((unsigned long) p2 & (HEAP_MAX_SIZE - 1))
          {
             __munmap (p2, HEAP_MAX_SIZE);
             return 0;
          }
        }
    }
    //设置虚拟内存属性失败,返回NULL.
    if (__mprotect (p2, size, PROT_READ | PROT_WRITE) != 0)
    {
      __munmap (p2, HEAP_MAX_SIZE);
      return 0;
    }
    //p2 就是HEAP_MAX_SIZE的内存地址,HEAP_MAX_SIZE对齐.
    //这块内存开始的位置储存的是HEAP_INFO.
    h = (heap_info *) p2;
    h->size = size;
    h->mprotect_size = size;
    LIBC_PROBE (memory_heap_new, 2, h, h->size);
    return h;
}

上面这段代码是new_heap函数的代码，上面这段代码主要的作用就是mmap一块MAX_HEAP_SIZE大小的内存区域，在内存开始的地方保存heap_info, (参考heap_info结构体)。这个函数具体的细节这里就不过多分析了，自己看源码吧，里面的mmap进行了多次尝试。

什么时候会调用sysmalloc

在看sysmalloc代码之前，我们先要知道什么情况下会调用sysmalloc。看一下_int_malloc的源码，在最后top_chunk上无法分割出所需要内存的时候，就会调用sysmalloc来分配内存。

sysmalloc 流程 :

1.mmap内存

判断所需内存是否大于mmap_threshold，大于这个值才会直接调用mmap来分配内存 (还有一个是限制 mmap的内存数量要小于 n_mmaps_max)

if (av == NULL || ((unsigned long)(nb) >= (unsigned long)(mp_.mmap_threshold) && (mp_.n_mmaps < mp_.n_mmaps_max)))
 {
   char *mm; /* return value from mmap call*/
 try_mmap:
   /*
      Round up size to nearest page.  For mmapped chunks, the overhead
      is one SIZE_SZ unit larger than for normal chunks, because there
      is no following chunk whose prev_size field could be used.

      See the front_misalign handling below, for glibc there is no
      need for further alignments unless we have have high alignment.
    */
   if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
     size = ALIGN_UP(nb + SIZE_SZ, pagesize);
   else
     size = ALIGN_UP(nb + SIZE_SZ + MALLOC_ALIGN_MASK, pagesize);
   tried_mmap = true;
   /* Don't try if size wraps around 0 */
   if ((unsigned long)(size) > (unsigned long)(nb))
   {
     mm = (char *)(MMAP(0, size, PROT_READ | PROT_WRITE, 0));
     if (mm != MAP_FAILED)
     {
       /*
          The offset to the start of the mmapped region is stored
          in the prev_size field of the chunk. This allows us to adjust
          returned start address to meet alignment requirements here
          and in memalign(), and still be able to compute proper
          address argument for later munmap in free() and realloc().
        */
       if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
       {
         /* For glibc, chunk2mem increases the address by 2*SIZE_SZ and
            MALLOC_ALIGN_MASK is 2*SIZE_SZ-1.  Each mmap'ed area is page
            aligned and therefore definitely MALLOC_ALIGN_MASK-aligned.  */
         assert(((INTERNAL_SIZE_T)chunk2mem(mm) & MALLOC_ALIGN_MASK) == 0);
         front_misalign = 0;
       }
       else
         front_misalign = (INTERNAL_SIZE_T)chunk2mem(mm) & MALLOC_ALIGN_MASK;
       if (front_misalign > 0)
       {
         correction = MALLOC_ALIGNMENT - front_misalign;
         p = (mchunkptr)(mm + correction);
         p->prev_size = correction;
         set_head(p, (size - correction) | IS_MMAPPED);
       }
       else
       {
         p = (mchunkptr)mm;
         set_head(p, size | IS_MMAPPED);
       }
       /* update statistics */
       int new = atomic_exchange_and_add(&mp_.n_mmaps, 1) + 1;
       atomic_max(&mp_.max_n_mmaps, new);
       unsigned long sum;
       sum = atomic_exchange_and_add(&mp_.mmapped_mem, size) + size;
       atomic_max(&mp_.max_mmapped_mem, sum);
       check_chunk(av, p);
       return chunk2mem(p);
     }
   }
 }

2. 增大当前的arena的top chunk

先看三行代码，了解一下old_top,old_size,old_end分别是什么意思:

	old_top = av->top;										//当前的top_chunk
  	old_size = chunksize(old_top);								//chunk_size
  	old_end = (char *)(chunk_at_offset(old_top, old_size));			//

2.1非main_arena 增大top chunk

尝试向后拓展heap

if ((long)(MINSIZE + nb - old_size) > 0 && grow_heap(old_heap, MINSIZE + nb - old_size) == 0)
{
  //在原来heap的位置上继续往后拓展,但是最大大小只能是HEAP_MAX_SIZE
  av->system_mem += old_heap->size - old_heap_size;
  arena_mem += old_heap->size - old_heap_size;
  set_head(old_top, (((char *)old_heap + old_heap->size) - (char *)old_top) | PREV_INUSE);
}

拓展失败后重新创建一个heap:

else if ((heap = new_heap(nb + (MINSIZE + sizeof(*heap)), mp_.top_pad)))
{
  //在原来的heap上拓展失败,重新创建一个新的heap.
  //旧的heap 的top chunk之后将不会被分割,所有之后要free掉,这样才有机会被使用,而不是被浪费掉.
  heap->ar_ptr = av;
  heap->prev = old_heap;
  av->system_mem += heap->size;
  arena_mem += heap->size;
  /* Set up the new top.  */
  //所有的heap 共用 fastbins 和 normal bins,只有从Bins里面找不到需要的chunk的时候,才会从
  // 尝试从 top chunk上面分割.
  top(av) = chunk_at_offset(heap, sizeof(*heap));
  set_head(top(av), (heap->size - sizeof(*heap)) | PREV_INUSE);

  /* Setup fencepost and free the old top chunk with a multiple of
     MALLOC_ALIGNMENT in size. */
  /* The fencepost takes at least MINSIZE bytes, because it might
     become the top chunk again later.  Note that a footer is set
     up, too, although the chunk is marked in use. */
  old_size = (old_size - MINSIZE) & ~MALLOC_ALIGN_MASK;
  set_head(chunk_at_offset(old_top, old_size + 2 * SIZE_SZ), 0 | PREV_INUSE);
  if (old_size >= MINSIZE)
  {
    set_head(chunk_at_offset(old_top, old_size), (2 * SIZE_SZ) | PREV_INUSE);
    set_foot(chunk_at_offset(old_top, old_size), (2 * SIZE_SZ));
    set_head(old_top, old_size | PREV_INUSE | NON_MAIN_ARENA);
    //free old_heap那个top chunk.
    _int_free(av, old_top, 1);
  }
  else
  {
    set_head(old_top, (old_size + 2 * SIZE_SZ) | PREV_INUSE);
    set_foot(old_top, (old_size + 2 * SIZE_SZ));
  }
}

若新的heap也创建失败了，尝试mmap一块内存

else if (!tried_mmap)
      /* We can at least try to use to mmap memory.  */
      goto try_mmap;

2.2main_arena 增大top chunk

尝试向后拓展，若拓展成功的话，brk值向的就是拓展区域的首地址

size = nb + mp_.top_pad + MINSIZE;
 //main_arena的第一个top chunk是bss段之后的一块内存.
if (contiguous(av))
  size -= old_size;
size = ALIGN_UP(size, pagesize);
/*
  Don't try to call MORECORE if argument is so big as to appear
  negative. Note that since mmap takes size_t arg, it may succeed
  below even if we cannot call MORECORE.
*/
if (size > 0)
{
  brk = (char *)(MORECORE(size));                 //先尝试调用brk调整
  LIBC_PROBE(memory_sbrk_more, 2, brk, size);
}
if (brk != (char *)(MORECORE_FAILURE))
{
  /* Call the `morecore' hook if necessary.  */
  void (*hook)(void) = atomic_forced_read(__after_morecore_hook);
  if (__builtin_expect(hook != NULL, 0))
    (*hook)();
}

拓展失败，mmap一块内存，若mmap成功，取消contiguous标记，brk为mmap出来内存的首地址，snd_brk为内存末尾。

//brk拓展失败了.
 /*
     If have mmap, try using it as a backup when MORECORE fails or
     cannot be used. This is worth doing on systems that have "holes" in
     address space, so sbrk cannot extend to give contiguous space, but
     space is available elsewhere.  Note that we ignore mmap max count
     and threshold limits, since the space will not be used as a
     segregated mmap region.
 */
 /* Cannot merge with old top, so add its size back in */
 //无法brk,重新调整size为nb,并align up.
 if (contiguous(av))
   size = ALIGN_UP(size + old_size, pagesize);

 /* If we are relying on mmap as backup, then use larger units */
 if ((unsigned long)(size) < (unsigned long)(MMAP_AS_MORECORE_SIZE))
   size = MMAP_AS_MORECORE_SIZE;

 /* Don't try if size wraps around 0 */
 if ((unsigned long)(size) > (unsigned long)(nb))
 {
   char *mbrk = (char *)(MMAP(0, size, PROT_READ | PROT_WRITE, 0));
   if (mbrk != MAP_FAILED)
   {
     /* We do not need, and cannot use, another sbrk call to find end */
     brk = mbrk;
     snd_brk = brk + size;
     /*
       Record that we no longer have a contiguous sbrk region.
       After the first time mmap is used as backup, we do not
       ever rely on contiguous space since this could incorrectly
       bridge regions.
     */
     set_noncontiguous(av);
   }
 }

现在brk指向的是拓展或者新分配内存的首地址，如果brk 有效
1. 如果brk紧挨着old_end，也就是新的内存和旧的内存是连续的，只需要改一下top chunk的 size域即可
```
if (brk == old_end && snd_brk == (char *)(MORECORE_FAILURE))
{
  //snd_brk为-1,这里应该是通过sbrk获得的brk
  set_head(old_top, (size + old_size) | PREV_INUSE);
}
```
1. brk < old_end，错误
```
 else if (contiguous(av) && old_size && brk < old_end)
      {
          /* Oops!  Someone else killed our space..  Can't touch anything.  */
        malloc_printerr(3, "break adjusted to free malloc space", brk,av);
      }
```
1. 不连续的情况，即brk != old_end（下面这里看的不是太懂，可能有错误）
  - 若contiguous标记位为1，那么继续向后拓展，大小为brk 与old_end之间的差值，然后计算出aligned_brk，和snd_brk。
  - 若contigunous标记位为0，说明brk是mmap出来的，对齐，计算出aligned_brk。
  - 根据snd_brk设置top_chunk，若old_size不为NULL，为了使old_top还能得到利用，需要把它放到bin里面。这样之后它还有机会被使用到。

3.在top chunk上分割出需要的内存

/* finally, do the allocation */
  p = av->top;
  size = chunksize(p);

  /* check that one of the above allocation paths succeeded */
  if ((unsigned long)(size) >= (unsigned long)(nb + MINSIZE))
  {
    remainder_size = size - nb;
    remainder = chunk_at_offset(p, nb);
    av->top = remainder;
    set_head(p, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0));
    set_head(remainder, remainder_size | PREV_INUSE);
    check_malloced_chunk(av, p, nb);
    return chunk2mem(p);
  }