malloc&free源码浅析

peiwithhao · 发表于 2023-5-22 19:55

本帖最后由 peiwithhao 于 2023-5-22 19:58 编辑

malloc&free源码分析

其作为堆利用的重点，需要我们透彻了解其中细节，我们首先从malloc开始分析，但在此之前，我们需要了解一些重要的数据结构。本章分析版本为glibc-2.23，再之后讨论2.27即以上版本的差异

0x00 重要的数据结构们

1. malloc_state

首先便是我们经常接触的arena

    struct malloc_state
    {
      /* Serialize access.  */
      mutex_t mutex;

      /* Flags (formerly in max_fast).  */
      int flags;

      /* fastbin链条数组 */
      mfastbinptr fastbinsY[NFASTBINS];

      /*top chunk 指针 */
      mchunkptr top;

      /* The remainder from the most recent split of a small request */
      mchunkptr last_remainder;

      /* NBINS为宏，带☞128，这里包含了除fastbin的所有bin指针 */
      mchunkptr bins[NBINS * 2 - 2];

      /* bins数组的位图 */
      unsigned int binmap[BINMAPSIZE];

      /* 链接下一个malloc_state的指针 */
      struct malloc_state *next;

      /* Linked list for free arenas.  Access to this field is serialized
         by free_list_lock in arena.c.  */
      struct malloc_state *next_free;

      /* Number of threads attached to this arena.  0 if the arena is on
         the free list.  Access to this field is serialized by
         free_list_lock in arena.c.  */
      INTERNAL_SIZE_T attached_threads;

      /* 在本arena当中从系统处获取到的内存大小  */
      INTERNAL_SIZE_T system_mem;
      INTERNAL_SIZE_T max_system_mem;
    };

其结构在源码当中表现为宏mstate

2. malloc_chunk

    struct malloc_chunk {

      INTERNAL_SIZE_T      prev_size;  /* Size of previous chunk (if free).  */
      INTERNAL_SIZE_T      size;       /* Size in bytes, including overhead. */

      struct malloc_chunk* fd;         /* double links -- used only if free. */
      struct malloc_chunk* bk;

      /* Only used for large blocks: pointer to next larger size.  */
      struct malloc_chunk* fd_nextsize; /* double links -- used only if free. */
      struct malloc_chunk* bk_nextsize;
    };

3. 源码自带的malloc_chunk细节，十分友善

    /*
       malloc_chunk details:

        (The following includes lightly edited explanations by Colin Plumb.)

        Chunks of memory are maintained using a `boundary tag' method as
        described in e.g., Knuth or Standish.  (See the paper by Paul
        Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a
        survey of such techniques.)  Sizes of free chunks are stored both
        in the front of each chunk and at the end.  This makes
        consolidating fragmented chunks into bigger chunks very fast.  The
        size fields also hold bits representing whether chunks are free or
        in use.

        An allocated chunk looks like this:

        chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
                |             Size of previous chunk, if allocated            | |
                +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
                |             Size of chunk, in bytes                       |M|P|
          mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
                |             User data starts here...                          .
                .                                                               .
                .             (malloc_usable_size() bytes)                      .
                .                                                               |
    nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
                |             Size of chunk                                     |
                +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+

        Where "chunk" is the front of the chunk for the purpose of most of
        the malloc code, but "mem" is the pointer that is returned to the
        user.  "Nextchunk" is the beginning of the next contiguous chunk.

        Chunks always begin on even word boundaries, so the mem portion
        (which is returned to the user) is also on an even word boundary, and
        thus at least double-word aligned.

        Free chunks are stored in circular doubly-linked lists, and look like this:

        chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
                |             Size of previous chunk                            |
                +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
        `head:' |             Size of chunk, in bytes                         |P|
          mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
                |             Forward pointer to next chunk in list             |
                +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
                |             Back pointer to previous chunk in list            |
                +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
                |             Unused space (may be 0 bytes long)                .
                .                                                               .
                .                                                               |
    nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
        `foot:' |             Size of chunk, in bytes                           |
                +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+

        The P (PREV_INUSE) bit, stored in the unused low-order bit of the
        chunk size (which is always a multiple of two words), is an in-use
        bit for the *previous* chunk.  If that bit is *clear*, then the
        word before the current chunk size contains the previous chunk
        size, and can be used to find the front of the previous chunk.
        The very first chunk allocated always has this bit set,
        preventing access to non-existent (or non-owned) memory. If
        prev_inuse is set for any given chunk, then you CANNOT determine
        the size of the previous chunk, and might even get a memory
        addressing fault when trying to do so.

        Note that the `foot' of the current chunk is actually represented
        as the prev_size of the NEXT chunk. This makes it easier to
        deal with alignments etc but can be very confusing when trying
        to extend or adapt this code.

        The two exceptions to all this are

         1. The special chunk `top' doesn't bother using the
            trailing size field since there is no next contiguous chunk
            that would have to index off it. After initialization, `top'
            is forced to always exist.  If it would become less than
            MINSIZE bytes long, it is replenished.

         2. Chunks allocated via mmap, which have the second-lowest-order
            bit M (IS_MMAPPED) set in their size fields.  Because they are
            allocated one-by-one, each must contain its own trailing size field.

    */

0x01 malloc步骤

step1 malloc环境准备

首先我们调用malloc(size)的时候，调用的库函数实际上为_libc_malloc，如下：

    void * __libc_malloc (size_t bytes)
    {
      mstate ar_ptr;
      void *victim;

      void *(*hook) (size_t, const void *)
        = atomic_forced_read (__malloc_hook);    
      if (__builtin_expect (hook != NULL, 0))
        return (*hook)(bytes, RETURN_ADDRESS (0));   //调用malloc_hook

      arena_get (ar_ptr, bytes);                  //表现为宏，获取arena指针

      victim = _int_malloc (ar_ptr, bytes);  //调用_int_malloc，返回分配chunk指针，参数一为arena指针，参数二为我们的需要分配的字节
      /* Retry with another arena only if we were able to find a usable arena
         before.  */
      if (!victim && ar_ptr != NULL)
        {
          LIBC_PROBE (memory_malloc_retry, 1, bytes);
          ar_ptr = arena_get_retry (ar_ptr, bytes);
          victim = _int_malloc (ar_ptr, bytes);
        }

      if (ar_ptr != NULL)
        (void) mutex_unlock (&ar_ptr->mutex);

      assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
              ar_ptr == arena_for_chunk (mem2chunk (victim)));
      return victim;
    }

可以发现我们的_libc_malloc函数主要功能是获取合适的arena，然后传递给_int_malloc分配真正的堆块，然后我们来观察_int_malloc，而我们该函数十分长，因此我们逐步来看，首先看到定义的一些字段，如下：

    static void * _int_malloc (mstate av, size_t bytes)
    {
      INTERNAL_SIZE_T nb;               /* normalized request size */
      unsigned int idx;                 /* 字节所关联的bin数组下标 */
      mbinptr bin;                      /* bin数组下标所对应的bin指针 */

      mchunkptr victim;                 /* 检查/选择得到的chunk指针 */
      INTERNAL_SIZE_T size;             /* 得到的chunk大小 */
      int victim_index;                 /* 所得chunk对应bin的index */

      mchunkptr remainder;              /* 分块后的剩余部分 */
      unsigned long remainder_size;     /* 剩余部分大小 */

      unsigned int block;               /* bit map traverser */
      unsigned int bit;                 /* bit map traverser */
      unsigned int map;                 /* current word of binmap */

      mchunkptr fwd;                    /* misc temp for linking */
      mchunkptr bck;                    /* misc temp for linking */

      const char *errstr = NULL;

然后我们继续来看接下来的步骤：

      /*
                无他，通过我们需求的字节大小来转变至实际需要的堆块大小
       */

      checked_request2size (bytes, nb);

题外话：checked_request2size

（注意这里是单独的宏表示，非int_malloc）
checked_request2size其宏表示为

    #define request2size(req)                                         \
      (((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE)  ?             \
       MINSIZE :                                                      \
       ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)

    /*  Same, except also perform argument check */

    #define checked_request2size(req, sz)                             \
      if (REQUEST_OUT_OF_RANGE (req)) {                                              \
          __set_errno (ENOMEM);                                                      \
          return 0;                                                                      \
        }                                                                              \
      (sz) = request2size (req);

解释结束（下面继续int_malloc）

      /* 若传入的av为空，那么转回调用sysmalloc，通过mmap来分配出一个chunk */
      if (__glibc_unlikely (av == NULL))
        {
          void *p = sysmalloc (nb, av);
          if (p != NULL)
            alloc_perturb (p, bytes);
          return p;
        }

step2 若在fastbin范围

然后接下来判断其是否位于fastbin范围

      /*
         如果该size位于fastbins范围， 首先检查合并堆块.
         即使av未初始化，该代码也是可正常安全执行的, 因此我们可以在不检查的情况下执行
       */

      if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
        {
          idx = fastbin_index (nb);                  //得到fastbin数组的下标
          mfastbinptr *fb = &fastbin (av, idx);         //为一个宏，得到fastbinsY元素指针
          mchunkptr pp = *fb;
          do
            {
              victim = pp;
                            /* 若未找到合适的victim，跳出循环 */
              if (victim == NULL)
                break;
            }
          while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim))
                 != victim);          //这里该函数为一个原子操作，目的是交换fb与victim->fd的值，也就是从链头开始取
            /* 下面就是判断取出的victim是否通过检查 */
          if (victim != 0)
            {
              if (__builtin_expect (fastbin_index (chunksize (victim)) != idx, 0)) //检查堆块的size位
                {
                  errstr = "malloc(): memory corruption (fast)";
                errout:
                  malloc_printerr (check_action, errstr, chunk2mem (victim), av);
                  return NULL;
                }
              check_remalloced_chunk (av, victim, nb);  //检查重分配堆块
              void *p = chunk2mem (victim);         //堆块指针转为指向返回内存的宏
              alloc_perturb (p, bytes);                 //堆块清0
              return p;
            }
        }

其中大致含义即为从fastbinsY链表数组找到对应的下标，然后从头取出fastbin，返回给用户。

step3 若在small范围当中

下面我们继续_int_malloc，执行到这里表示我们fastbin超出其大小范围，或者说使用fastbin分配失败，然后就会判断其进入smallbin的判断当中

    /*
        如果请求的size大小属于smallbin范围，我们则检查通用的bins数组.  Since these "smallbins"
         hold one size each, no searching within bins is necessary.
         (如果是largebin范围, 我们必须等到 unsorted chunks 被处理为寻找最佳适配块. But for small ones, fits are exact
         anyway, so we can check now, which is faster.)
       */

      if (in_smallbin_range (nb))
        {
          idx = smallbin_index (nb);
          bin = bin_at (av, idx);                         //获取对应bin链表数组下标

          if ((victim = last (bin)) != bin)         //last(bin)为宏bin->bk,这里是判断他是否为空，若不为空则说明smallbin里面有堆块，进入下一步
            {
              if (victim == 0) /* 初始化检查，若为0则说明并未初始化，我们的small bin的各项还是0 */
                malloc_consolidate (av); //进行av初始化，也就是取出fast chunk各项堆块合并一下
              else                                 //这里说明我们已经经过了初始化，所以接下来就是普通的判断过程
                {
                  bck = victim->bk;
                                    if (__glibc_unlikely (bck->fd != victim))         //检查
                    {
                      errstr = "malloc(): smallbin double linked list corrupted";
                      goto errout;
                    }
                  set_inuse_bit_at_offset (victim, nb);         //设置相邻下一个堆块的inuse位
                  bin->bk = bck;
                  bck->fd = bin;                                                         //从尾部脱链

                  if (av != &main_arena)
                    victim->size |= NON_MAIN_ARENA;
                  check_malloced_chunk (av, victim, nb);
                  void *p = chunk2mem (victim);
                  alloc_perturb (p, bytes);                  //清空bytes字节大小的值
                  return p;
                }
            }
        }

我们对于smallbin的分配也十分简单，那就是直接从尾部开始取，但是在取之前我们会判断arena是否进行过初始化，若没有进行初始化，则调用malloc_consolidate进行初始化

题外话：malloc_consolidate

这里是我们单独的malloc_consolidate函数讲解，与上面_int_malloc单独分开，整体代码如下：

    /*
      ------------------------- malloc_consolidate -------------------------

      malloc_consolidate是一个用来拆除fastbins中chunk的特殊free()函数.
      free()本身不能用于此目的，因为在其他情况下他可能将堆块放入fastbins当中  因此我们需要使用一个相似的操作来代替free()代码。
      当然，因为该代码在malloc的工程中是第一次被调用 ，他是一个极佳的位置来触发我们的初始化代码
    */

    static void malloc_consolidate(mstate av)
    {
      mfastbinptr*    fb;                 /* 目前正在被合并的fastbin chunk */
      mfastbinptr*    maxfb;              /* last fastbin (for loop control) */
      mchunkptr       p;                  /* current chunk being consolidated */
      mchunkptr       nextp;              /* next chunk to consolidate */
      mchunkptr       unsorted_bin;       /* bin header */
      mchunkptr       first_unsorted;     /* chunk to link to */

      /* These have same use as in free() */
      mchunkptr       nextchunk;
      INTERNAL_SIZE_T size;
      INTERNAL_SIZE_T nextsize;
      INTERNAL_SIZE_T prevsize;
      int             nextinuse;
      mchunkptr       bck;
      mchunkptr       fwd;

      /*
        如果max_fast 为0，则说明arena并未初始化，因此执行下面的步骤
      */

      if (get_max_fast () != 0) {
        clear_fastchunks(av);

        unsorted_bin = unsorted_chunks(av);                         //获取unsorted bin的bins数组下标

        /*
          移除fastbins当中所有的chunk，然后进行合并，再紧接着放入我们的unsorted bin链条当中. Among other reasons for doing this,
          placing in unsorted bin avoids needing to calculate actual bins
          until malloc is sure that chunks aren't immediately going to be
          reused anyway.
        */

        maxfb = &fastbin (av, NFASTBINS - 1);                 //获取最大块的fastbin
        fb = &fastbin (av, 0);                                 //获取最小快的fastbin
        do {
          p = atomic_exchange_acq (fb, 0);         //纳米交换！小子
          if (p != 0) {                                         //如果说换出来的堆块指针非0，那么就说明该链条上面存在fastbin堆块
            do {
              check_inuse_chunk(av, p);         //查询下一个堆块的inuse来表示该堆块是否被使用，以及是否是mmap分配，但这里没怎么理解，因为fastbin的inuse不应该都是始终为1的么，这里可能是检查紧邻的下一个堆块可能不属于fastbin范围时的检测
              nextp = p->fd;

              /* Slightly streamlined version of consolidation code in free() */
              size = p->size & ~(PREV_INUSE|NON_MAIN_ARENA);   //获取堆块size
              nextchunk = chunk_at_offset(p, size); //为一个宏，这里即为p+size
              nextsize = chunksize(nextchunk);                 //获取next堆块的size

              if (!prev_inuse(p)) {                         //查看p堆块前面的堆块是否分配，若未分配则进行下面的步骤
                prevsize = p->prev_size;            
                size += prevsize;
                p = chunk_at_offset(p, -((long) prevsize));         //即为上面p + prevsize前一个块地址
                unlink(av, p, bck, fwd);                 //大名鼎鼎的unlink，在2.23版本表现为一个宏
              }

              if (nextchunk != av->top) {                          //如果nextchunk不是topchunk，则往下走
                nextinuse = inuse_bit_at_offset(nextchunk, nextsize); //判断nextchunk的下一块inuse位

                if (!nextinuse) {         //nextinuse为0说明该块是空的
                  size += nextsize;
                  unlink(av, nextchunk, bck, fwd);                 //unlink该nextchunk
                } else         //nextinuse 为1则说明该块正在被使用，因此清该堆块的inuse位为0
                  clear_inuse_bit_at_offset(nextchunk, 0);

                first_unsorted = unsorted_bin->fd;
                unsorted_bin->fd = p;
                first_unsorted->bk = p;         //将fastbin取出/合并后的堆块存入unsortedbin，从链头放入

                if (!in_smallbin_range (size)) { //如果为largebin范围，则置空fd/bk_nextsize
                  p->fd_nextsize = NULL;
                  p->bk_nextsize = NULL;
                }

                set_head(p, size | PREV_INUSE);  //设置堆块细节，也就是头部分和脚部分
                p->bk = unsorted_bin;
                p->fd = first_unsorted;
                set_foot(p, size);
              }

              else {                 //如果nextchunk是topchunk
                size += nextsize;  //合并为top_chunk
                set_head(p, size | PREV_INUSE);
                av->top = p;
              }

            } while ( (p = nextp) != 0);

          }
        } while (fb++ != maxfb);
      }
      else {                 //若未初始化，则使用下面代码进行一个简单的初始化
        malloc_init_state(av);
        check_malloc_state(av);
      }
    }

题外话： unlink宏

其中unlink宏如下：

    /* 从bin链表数组当中取出一个chunk */
    #define unlink(AV, P, BK, FD) {                                            \
        FD = P->fd;                                                                      \
        BK = P->bk;                                                                      \
        if (__builtin_expect (FD->bk != P || BK->fd != P, 0))                      \  //检查一：p->fd->bk == p && p->bk->fd == p
          malloc_printerr (check_action, "corrupted double-linked list", P, AV);  \
        else {                                                                      \
            FD->bk = BK;                                                              \         
            BK->fd = FD;                                                              \         //脱链操作
            if (!in_smallbin_range (P->size)                                      \          //不在smallbin范围，那么就是在largebin范围
                && __builtin_expect (P->fd_nextsize != NULL, 0)) {                      \
                if (__builtin_expect (P->fd_nextsize->bk_nextsize != P, 0)              \  
                    || __builtin_expect (P->bk_nextsize->fd_nextsize != P, 0))    \         //检查二：p->fd_nextsize->bk_nextsize == p && p->bk_nextsize->fd_nextsize == p
                  malloc_printerr (check_action,                                      \
                                   "corrupted double-linked list (not small)",    \
                                   P, AV);                                              \
                if (FD->fd_nextsize == NULL) {                                      \ //如果在小链条中
                    if (P->fd_nextsize == P)                                      \         //如果largebin链条就一个，则往下走
                      FD->fd_nextsize = FD->bk_nextsize = FD;                      \ 
                    else {                                                              \
                        FD->fd_nextsize = P->fd_nextsize;                              \ 
                        FD->bk_nextsize = P->bk_nextsize;                              \
                        P->fd_nextsize->bk_nextsize = FD;                              \
                        P->bk_nextsize->fd_nextsize = FD;                              \         //largebin脱链
                      }                                                              \
                  } else {                                                              \                 //若刚好p为小链条最后一个
                    P->fd_nextsize->bk_nextsize = P->bk_nextsize;                      \
                    P->bk_nextsize->fd_nextsize = P->fd_nextsize;                      \
                  }                                                                      \
              }                                                                      \
          }                                                                              \
    }

综上所述，malloc_consolidate函数的目的就是从fastbin当中该取的chunk取出，该合并的合并，然后存入unsorted bin当中，说完该函数，那么我们回到咱们的_int_malloc函数当中

step4 清理堆块碎片

上面我们讲到了smallbin判断，但如果说我们请求的bytes既不在fastbin范围，也不在smallbin范围当中呢

    /*
         如果我们是largebinsize的请求, 在我们继续之前，我们先合并一下咱们的fastbin，也就是调用mallock_consolidate函数
         虽然这个举动看起来是在还仍保留有大量空闲区块的同时来消除所有的fastbin堆块, 但是他避免了因fastbin堆块导致的碎片问题。
       */

      else
        {
          idx = largebin_index (nb);         //获取largebin 的下标
          if (have_fastchunks (av))         
            malloc_consolidate (av);         //合并fastbinchunk，然后摘除他们
        }

在我们调用完malloc_consolidate之后，我们进入接下来的判断，也就是进入我们的外部大循环：

step5 大循环-unsortedbin清理

     /*

            处理最近释放或剩余的块，仅在完全匹配的情况下获取一个，
            或者，如果这是一个小请求，则chunk是最近非完全匹配的剩余块。将其他已遍历的块放在bin中。
            请注意，在任何例程中，这一步骤都是将块放置在bin中的唯一位置。
        这里需要外循环，因为我们可能直到malloc接近尾声时才意识到我们应该合并，
            所以必须这样做并重试。这种情况最多发生一次，而且只有当我们需要扩展内存来为“small”请求提供服务时才会发生
       */

      for (;; )
        {
          int iters = 0;
          while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))         //判断条件是unsorted bin不为空，并且此时使得victim指向unsorted 链表尾部
            {
              bck = victim->bk;
              if (__builtin_expect (victim->size <= 2 * SIZE_SZ, 0) //检测一：size大小最小最大检查
                  || __builtin_expect (victim->size > av->system_mem, 0))
                malloc_printerr (check_action, "malloc(): memory corruption",
                                 chunk2mem (victim), av);
              size = chunksize (victim);

              /*
                 如果是个小请求，如果最近的remainder剩余块是unsortedbin当中唯一的块的话，
                            尝试使用他来分配  This helps promote locality for
                 runs of consecutive small requests. 这是最佳适配的唯一例外
                            并且适用于当这里没有最佳适配的小堆块
               */

              if (in_smallbin_range (nb) &&
                  bck == unsorted_chunks (av) &&
                  victim == av->last_remainder &&
                  (unsigned long) (size) > (unsigned long) (nb + MINSIZE))         //请求字节smallbin范围+unsortedbin只有一个堆块，同样也是last_remainder+这个剩余块size大于请求size
                {
                  /* split and reattach remainder */
                  remainder_size = size - nb;
                  remainder = chunk_at_offset (victim, nb);
                  unsorted_chunks (av)->bk = unsorted_chunks (av)->fd = remainder;
                  av->last_remainder = remainder;
                  remainder->bk = remainder->fd = unsorted_chunks (av);
                  if (!in_smallbin_range (remainder_size))         //如果剩余块为largebin范围
                    {
                      remainder->fd_nextsize = NULL;
                      remainder->bk_nextsize = NULL;
                    }

                  set_head (victim, nb | PREV_INUSE |
                            (av != &main_arena ? NON_MAIN_ARENA : 0));
                  set_head (remainder, remainder_size | PREV_INUSE);
                  set_foot (remainder, remainder_size);

                  check_malloced_chunk (av, victim, nb);
                  void *p = chunk2mem (victim);
                  alloc_perturb (p, bytes);
                  return p;                                  //这里是划分出了相应堆块，直接返回
                }

              /* 从unsorted 链表移除我们的victim */
              unsorted_chunks (av)->bk = bck;
              bck->fd = unsorted_chunks (av);

              /* Take now instead of binning if exact fit */

              if (size == nb)         //最佳适配
                {
                  set_inuse_bit_at_offset (victim, size);
                  if (av != &main_arena)
                    victim->size |= NON_MAIN_ARENA;
                  check_malloced_chunk (av, victim, nb);
                  void *p = chunk2mem (victim);
                  alloc_perturb (p, bytes);
                  return p;
                }

              /* 走到这儿说明并没有最佳匹配，因此在这里就开始归还堆块给相应的bin */

              if (in_smallbin_range (size))         //为smallbin范围
                {
                  victim_index = smallbin_index (size);
                  bck = bin_at (av, victim_index);
                  fwd = bck->fd;
                }
              else                         //为largebin范围
                {
                  victim_index = largebin_index (size);
                  bck = bin_at (av, victim_index);
                  fwd = bck->fd;

                  /* maintain large bins in sorted order */
                  if (fwd != bck)                 //说明largebin链条不为空
                    {
                      /* Or with inuse bit to speed comparisons */
                      size |= PREV_INUSE;
                      /* if smaller than smallest, bypass loop below */
                      assert ((bck->bk->size & NON_MAIN_ARENA) == 0);
                      if ((unsigned long) (size) < (unsigned long) (bck->bk->size))         //这里我们知道largebin的链条尾部是最小堆块，所以这里如果小于最小堆块的话那么直接放入链表尾部
                        {
                          fwd = bck;
                          bck = bck->bk;

                          victim->fd_nextsize = fwd->fd;
                          victim->bk_nextsize = fwd->fd->bk_nextsize;
                          fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;
                        }
                      else                 //若该堆块大于最小堆块，那么我们就从链头开始寻找，直至找到一个比他小的堆块，然后放到他前面
                        {
                          assert ((fwd->size & NON_MAIN_ARENA) == 0);
                          while ((unsigned long) size < fwd->size)
                            {
                              fwd = fwd->fd_nextsize;
                              assert ((fwd->size & NON_MAIN_ARENA) == 0);
                            }

                          if ((unsigned long) size == (unsigned long) fwd->size)
                            /* 如果是等于，总是插入小链条的第二个位置  */
                            fwd = fwd->fd;
                          else        //如果是大于
                            {
                              victim->fd_nextsize = fwd;
                              victim->bk_nextsize = fwd->bk_nextsize;
                              fwd->bk_nextsize = victim;
                              victim->bk_nextsize->fd_nextsize = victim;
                            }
                          bck = fwd->bk;
                        }
                    }
                  else         //如果largebin链条为空
                    victim->fd_nextsize = victim->bk_nextsize = victim;
                }

              mark_bin (av, victim_index);
              victim->bk = bck;
              victim->fd = fwd;
              fwd->bk = victim;
              bck->fd = victim;

    #define MAX_ITERS       10000
              if (++iters >= MAX_ITERS)
                break;
            }

进入我们的外部大循环，上面源码仅仅展示了内部unsortedbin循环，这里要么有三种结果：

在unsorted bin循环当中发现unsorted bin只有一个堆块，且大于我们要分配的，则切割他然后返回
循环当中找到适配的堆块并把寻找路径上的不适配堆块返回适当的bin当中，则直接返回，剩余的堆块不予处理
循环当中未找到合适堆块，则继续下一步，此时unsortedbin已经遍历完毕，其中无堆块，全部存放于适合的bin当中

step6 大循环-largebin堆块寻找

经过unsortedbin循环之后，我们再来判断

     /*
             如果是large请求, 寻找最小适配块.  Use the skip list for this.
           */

          if (!in_smallbin_range (nb))
            {
              bin = bin_at (av, idx);

              /* skip scan if empty or largest chunk is too small */
              if ((victim = first (bin)) != bin &&
                  (unsigned long) (victim->size) >= (unsigned long) (nb)) //判断非空且最大块大于请求nb
                {
                  victim = victim->bk_nextsize;
                  while (((unsigned long) (size = chunksize (victim)) <
                          (unsigned long) (nb)))         //从链表尾部开始遍历，寻找到大于或等于他的块
                    victim = victim->bk_nextsize;

                  /* Avoid removing the first entry for a size so that the skip
                     list does not have to be rerouted.  */
                  if (victim != last (bin) && victim->size == victim->fd->size)         //如果victim不为最后一个块，且其中存在着fd指针指向的堆块，也就是说小链表当中有两个或以上的相同大小的堆块
                    victim = victim->fd;         //始终取第二个

                  remainder_size = size - nb;         //判断是否是非最佳适配，可能会多出部分
                  unlink (av, victim, bck, fwd);         //脱链

                  /* Exhaust */
                  if (remainder_size < MINSIZE)
                    {
                      set_inuse_bit_at_offset (victim, size);
                      if (av != &main_arena)
                        victim->size |= NON_MAIN_ARENA;
                    }
                  /* Split */
                  else         //这里是存在剩余部分，然后我们存放在unsiorted bin 当中
                    {
                      remainder = chunk_at_offset (victim, nb);
                      /* We cannot assume the unsorted list is empty and therefore
                         have to perform a complete insert here.  */
                      bck = unsorted_chunks (av);
                      fwd = bck->fd;
              if (__glibc_unlikely (fwd->bk != bck))
                        {
                          errstr = "malloc(): corrupted unsorted chunks";
                          goto errout;
                        }
                      remainder->bk = bck;
                      remainder->fd = fwd;
                      bck->fd = remainder;
                      fwd->bk = remainder;
                      if (!in_smallbin_range (remainder_size))
                        {
                          remainder->fd_nextsize = NULL;
                          remainder->bk_nextsize = NULL;
                        }
                      set_head (victim, nb | PREV_INUSE |
                                (av != &main_arena ? NON_MAIN_ARENA : 0));
                      set_head (remainder, remainder_size | PREV_INUSE);
                      set_foot (remainder, remainder_size);
                    }
                  check_malloced_chunk (av, victim, nb);
                  void *p = chunk2mem (victim);
                  alloc_perturb (p, bytes);
                  return p;
                }
            }

这里是从我们的largebin链表当中，从尾部开始遍历直到找到相同或者稍微大那么点的堆块，要么直接返回要么切割返回，切割的剩余部分存放在unsorted bin当中。

step7 大循环-位图分配

然后接着往下走：

      /*
             从下一个最大的bin开始，通过扫描bin来搜索chunk。
                    此搜索严格按照最佳匹配进行；即选择适合的最小的（具有接近最近最少使用的关系）块。
             位图避免了检查大多数块是否为非空。
                    在预热阶段跳过所有存储箱的特殊情况下，还没有返回块，这比看起来更快。
           */

          ++idx;
          bin = bin_at (av, idx);
          block = idx2block (idx);                 //宏，右移5个bit位
          map = av->binmap[block];
          bit = idx2bit (idx);

          for (;; )
            {
              /* Skip rest of block if there are no more set bits in this block.  */
              if (bit > map || bit == 0)
                {
                  do
                    {
                      if (++block >= BINMAPSIZE) /* out of bins */
                        goto use_top;
                    }
                  while ((map = av->binmap[block]) == 0);

                  bin = bin_at (av, (block << BINMAPSHIFT));
                  bit = 1;
                }

              /* Advance to bin with set bit. There must be one. */
              while ((bit & map) == 0)
                {
                  bin = next_bin (bin);
                  bit <<= 1;
                  assert (bit != 0);
                }

              /* Inspect the bin. It is likely to be non-empty */
              victim = last (bin);

              /*  If a false alarm (empty bin), clear the bit. */
              if (victim == bin)
                {
                  av->binmap[block] = map &= ~bit; /* Write through */
                  bin = next_bin (bin);
                  bit <<= 1;
                }

              else
                {
                  size = chunksize (victim);

                  /*  We know the first chunk in this bin is big enough to use. */
                  assert ((unsigned long) (size) >= (unsigned long) (nb));

                  remainder_size = size - nb;

                  /* unlink */
                  unlink (av, victim, bck, fwd);

                  /* Exhaust */
                  if (remainder_size < MINSIZE)
                    {
                      set_inuse_bit_at_offset (victim, size);
                      if (av != &main_arena)
                        victim->size |= NON_MAIN_ARENA;
                    }

                  /* 切块，然后剩余的给unsorted bin */
                  else
                    {
                      remainder = chunk_at_offset (victim, nb);

                      /* We cannot assume the unsorted list is empty and therefore
                         have to perform a complete insert here.  */
                      bck = unsorted_chunks (av);
                      fwd = bck->fd;
              if (__glibc_unlikely (fwd->bk != bck))
                        {
                          errstr = "malloc(): corrupted unsorted chunks 2";
                          goto errout;
                        }
                      remainder->bk = bck;
                      remainder->fd = fwd;
                      bck->fd = remainder;
                      fwd->bk = remainder;

                      /* advertise as last remainder */
                      if (in_smallbin_range (nb))
                        av->last_remainder = remainder;
                      if (!in_smallbin_range (remainder_size))
                        {
                          remainder->fd_nextsize = NULL;
                          remainder->bk_nextsize = NULL;
                        }
                      set_head (victim, nb | PREV_INUSE |
                                (av != &main_arena ? NON_MAIN_ARENA : 0));
                      set_head (remainder, remainder_size | PREV_INUSE);
                      set_foot (remainder, remainder_size);
                    }
                  check_malloced_chunk (av, victim, nb);
                  void *p = chunk2mem (victim);
                  alloc_perturb (p, bytes);
                  return p;
                }
            }

以上即为普通的位图分配，倒是省下了诸多麻烦，这里通过寻找最小适配块来进行切割，剩下的就分配给unsorted bin

step7 使用top chunk

use_top:
/*
If large enough, split off the chunk bordering the end of memory
(held in av->top). Note that this is in accord with the best-fit
search rule. In effect, av->top is treated as larger (and thus
less well fitting) than any other available chunk since it can
be extended to be as large as necessary (up to system
limitations).

     We require that av->top always exists (i.e., has size >=
     MINSIZE) after initialization, so if it would otherwise be
     exhausted by current request, it is replenished. (The main
     reason for ensuring it exists is that we may need MINSIZE space
     to put in fenceposts in sysmalloc.)
   */

  victim = av->top;
  size = chunksize (victim);

  if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))         //如果topchunk够分配，直接切割
    {
      remainder_size = size - nb;
      remainder = chunk_at_offset (victim, nb);
      av->top = remainder;
      set_head (victim, nb | PREV_INUSE |
                (av != &main_arena ? NON_MAIN_ARENA : 0));
      set_head (remainder, remainder_size | PREV_INUSE);

      check_malloced_chunk (av, victim, nb);
      void *p = chunk2mem (victim);
      alloc_perturb (p, bytes);
      return p;
    }

  /* When we are using atomic ops to free fast chunks we can get
     here for all block sizes.  */
  else if (have_fastchunks (av))         //如果topchunk不够，且有fastchunk，那么进行malloc_consolidate进行合并fast，然后在接着分配
    {
      malloc_consolidate (av);
      /* restore original bin index */
      if (in_smallbin_range (nb))
        idx = smallbin_index (nb);
      else
        idx = largebin_index (nb);
    }

  /*
     Otherwise, relay to handle system-dependent cases
   */
  else         //如果上述都不满足，则调用系统分配
    {
      void *p = sysmalloc (nb, av);
      if (p != NULL)
        alloc_perturb (p, bytes);
      return p;
    }
}

}

至此，malloc分配分析结束，下面附赠一张分配流程图：

0x02 free 步骤

step1 free判断

首先就是我们的__libc_free

            void __libc_free (void *mem)
            {
              mstate ar_ptr;
              mchunkptr p;                          /* chunk corresponding to mem */

              void (*hook) (void *, const void *)
                = atomic_forced_read (__free_hook);         //__free_hook
              if (__builtin_expect (hook != NULL, 0))
                {
                  (*hook)(mem, RETURN_ADDRESS (0));
                  return;
                }

              if (mem == 0)                              /* free(0) has no effect */
                return;

              p = mem2chunk (mem);

              if (chunk_is_mmapped (p))                       /* release mmapped memory. */
                {
                  /* see if the dynamic brk/mmap threshold needs adjusting */
                  if (!mp_.no_dyn_threshold
                      && p->size > mp_.mmap_threshold
                      && p->size <= DEFAULT_MMAP_THRESHOLD_MAX)
                    {
                      mp_.mmap_threshold = chunksize (p);
                      mp_.trim_threshold = 2 * mp_.mmap_threshold;
                      LIBC_PROBE (memory_mallopt_free_dyn_thresholds, 2,
                                  mp_.mmap_threshold, mp_.trim_threshold);
                    }
                  munmap_chunk (p);
                  return;
                }

              ar_ptr = arena_for_chunk (p);
              _int_free (ar_ptr, p, 0);
            }

step2 安全检查

    static void
    _int_free (mstate av, mchunkptr p, int have_lock)
    {
      INTERNAL_SIZE_T size;        /* its size */
      mfastbinptr *fb;             /* associated fastbin */
      mchunkptr nextchunk;         /* next contiguous chunk */
      INTERNAL_SIZE_T nextsize;    /* its size */
      int nextinuse;               /* true if nextchunk is used */
      INTERNAL_SIZE_T prevsize;    /* size of previous contiguous chunk */
      mchunkptr bck;               /* misc temp for linking */
      mchunkptr fwd;               /* misc temp for linking */

      const char *errstr = NULL;
      int locked = 0;

      size = chunksize (p);

      /* Little security check which won't hurt performance: the
         allocator never wrapps around at the end of the address space.
         Therefore we can exclude some size values which might appear
         here by accident or by "design" from some intruder.  */
      if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0)
          || __builtin_expect (misaligned_chunk (p), 0))
        {
          errstr = "free(): invalid pointer";
        errout:
          if (!have_lock && locked)
            (void) mutex_unlock (&av->mutex);
          malloc_printerr (check_action, errstr, chunk2mem (p), av);
          return;
        }
      /* We know that each chunk is at least MINSIZE bytes in size or a
         multiple of MALLOC_ALIGNMENT.  */
      if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size)))
        {
          errstr = "free(): invalid size";
          goto errout;
        }

      check_inuse_chunk(av, p);

其中是对于一系列free参数的判断，我们看看即可

step3 置入fastbin

首先如果判断其范围处于fastbin，则置入链表，当然，存在多个检测：）

     /*
        If eligible, place chunk on a fastbin so it can be found
        and used quickly in malloc.
      */

      if ((unsigned long)(size) <= (unsigned long)(get_max_fast ())         //处于fastbin范围内

    #if TRIM_FASTBINS
          /*
            If TRIM_FASTBINS set, don't place chunks
            bordering top into fastbins
          */
          && (chunk_at_offset(p, size) != av->top)
    #endif
          ) {

        if (__builtin_expect (chunk_at_offset (p, size)->size <= 2 * SIZE_SZ, 0)
            || __builtin_expect (chunksize (chunk_at_offset (p, size))
                                 >= av->system_mem, 0))
          {
            /* We might not have a lock at this point and concurrent modifications
               of system_mem might have let to a false positive.  Redo the test
               after getting the lock.  */
            if (have_lock
                || ({ assert (locked == 0);
                      mutex_lock(&av->mutex);
                      locked = 1;
                      chunk_at_offset (p, size)->size <= 2 * SIZE_SZ
                        || chunksize (chunk_at_offset (p, size)) >= av->system_mem;
                  }))
              {
                errstr = "free(): invalid next size (fast)";
                goto errout;
              }
            if (! have_lock)
              {
                (void)mutex_unlock(&av->mutex);
                locked = 0;
              }
          }

        free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);         //清空chunk内数据

        set_fastchunks(av);                         //设置av->flag的fast位？这里还不甚了解
        unsigned int idx = fastbin_index(size);
        fb = &fastbin (av, idx);

        /* Atomically link P to its fastbin: P->FD = *FB; *FB = P;  */
        mchunkptr old = *fb, old2;
        unsigned int old_idx = ~0u;
        do
          {
            /* 检查fastbin链表头部是不是我们释放的该块，此即为double free检测
               (i.e., double free).  */
            if (__builtin_expect (old == p, 0))
              {
                errstr = "double free or corruption (fasttop)";
                goto errout;
              }
            /* 检查我们fastbin里链表头部size是否相同于我们即将添加的块.  We can dereference OLD
               only if we have the lock, otherwise it might have already been
               deallocated.  See use of OLD_IDX below for the actual check.  */
            if (have_lock && old != NULL)
              old_idx = fastbin_index(chunksize(old));
            p->fd = old2 = old;
          }
        while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2)) != old2);

        if (have_lock && old != NULL && __builtin_expect (old_idx != idx, 0))
          {
            errstr = "invalid fastbin entry (free)";
            goto errout;
          }
      }

step4 若不是fastbin，则该去哪儿呢

    /*
        合并其他非mmap分配的chunk
      */

      else if (!chunk_is_mmapped(p)) {                 //若释放的堆块并不是fastbin大小
        if (! have_lock) {
          (void)mutex_lock(&av->mutex);
          locked = 1;
        }

        nextchunk = chunk_at_offset(p, size);

       ...         //一系列检测

        nextsize = chunksize(nextchunk);
        if (__builtin_expect (nextchunk->size <= 2 * SIZE_SZ, 0)
            || __builtin_expect (nextsize >= av->system_mem, 0))
          {
            errstr = "free(): invalid next size (normal)";
            goto errout;
          }

        free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);         //清空其中元素

        /* 向后（backward）/上合并 */
        if (!prev_inuse(p)) {
          prevsize = p->prev_size;
          size += prevsize;
          p = chunk_at_offset(p, -((long) prevsize));
          unlink(av, p, bck, fwd);
        }

        if (nextchunk != av->top) {                 //若nextchunk不是topchunk
          /* get and clear inuse bit */
          nextinuse = inuse_bit_at_offset(nextchunk, nextsize);

          /* 向前（forward）/下合并 */
          if (!nextinuse) {
            unlink(av, nextchunk, bck, fwd);
            size += nextsize;
          } else
            clear_inuse_bit_at_offset(nextchunk, 0);

          /*
            将该堆块置入unsorted bin. chunks直到下一次malloc的时候才会有机会置入合适的bins，此前一致存入unsorted bin
          */

          bck = unsorted_chunks(av);
          fwd = bck->fd;
          if (__glibc_unlikely (fwd->bk != bck))
            {
              errstr = "free(): corrupted unsorted chunks";
              goto errout;
            }
          p->fd = fwd;
          p->bk = bck;
          if (!in_smallbin_range(size))
            {
              p->fd_nextsize = NULL;
              p->bk_nextsize = NULL;
            }
          bck->fd = p;
          fwd->bk = p;

          set_head(p, size | PREV_INUSE);
          set_foot(p, size);

          check_free_chunk(av, p);
        }

        /*
          如果nextchunk是topchunk，此时我们就要将其合并入topchunk
        */

        else {
          size += nextsize;
          set_head(p, size | PREV_INUSE);
          av->top = p;
          check_chunk(av, p);
        }

            ···

从源码可以得知除了fastbin范围，其他块均存入unsorted bin

至此，free的分析也就此结束，如果大伙是从上面的malloc看下来的，那么肯定会发现这个free较之于十分简单，其中也是因为一些重复的函数在malloc已经讲解过，再写一遍没有必要，其中我也省略了很多free过程当中的检测部分，因为这较之于我们今天分析的目的有点远了。

当然，附赠free过程图：

0x03 glibc2.27版本差异

我们都知道，在2.26及以上增加了tcache，其中使得我们存取块更加迅速，下面我们就来探讨一下其中较之于2.23的差别

差异一：数据结构们

首先就是我们的tcache数据块，如下：

    typedef struct tcache_entry
    {
      struct tcache_entry *next;         //tcache链条
    } tcache_entry;

    /* 每个线程都有这样的一个tcache数据管理结构体, which contains the
       per-thread cache (hence "tcache_perthread_struct").  Keeping
       overall size low is mildly important.  Note that COUNTS and ENTRIES
       are redundant (we could have just counted the linked list each
       time), this is for performance reasons.  */
    typedef struct tcache_perthread_struct
    {
      char counts[TCACHE_MAX_BINS];                 //用一字节来代表一个tcache链表的数量
      tcache_entry *entries[TCACHE_MAX_BINS];         //这里就是我们的链表指针数组
    } tcache_perthread_struct;

    /* 一些宏定义 */
    #if USE_TCACHE
    /* We want 64 entries.  This is an arbitrary limit, which tunables can reduce.  */
    # define TCACHE_MAX_BINS                64
    # define MAX_TCACHE_SIZE        tidx2usize (TCACHE_MAX_BINS-1)

    /* Only used to pre-fill the tunables.  */
    # define tidx2usize(idx)        (((size_t) idx) * MALLOC_ALIGNMENT + MINSIZE - SIZE_SZ)

    /* When "x" is from chunksize(). 通过size定位tcache数组下标 */
    # define csize2tidx(x) (((x) - MINSIZE + MALLOC_ALIGNMENT - 1) / MALLOC_ALIGNMENT)         
    /* When "x" is a user-provided size.  */
    # define usize2tidx(x) csize2tidx (request2size (x))

    /* With rounding and alignment, the bins are...
       idx 0   bytes 0..24 (64-bit) or 0..12 (32-bit)
       idx 1   bytes 25..40 or 13..20
       idx 2   bytes 41..56 or 21..28
       etc.  */

    /* This is another arbitrary limit, which tunables can change.  Each
       tcache bin will hold at most this number of chunks.  */
    # define TCACHE_FILL_COUNT 7////                 //定义最大一个链条的tcache数量
    #endif

差异二：__libc_malloc

我们在使用__libc_malloc进行分配时，在调用malloc_hook后，int_malloc前会首先调用tcache_get函数来获取相关堆块

    void *
    __libc_malloc (size_t bytes)
    {
      mstate ar_ptr;
      void *victim;

      void *(*hook) (size_t, const void *)
        = atomic_forced_read (__malloc_hook);    //malloc_hook
      if (__builtin_expect (hook != NULL, 0))
        return (*hook)(bytes, RETURN_ADDRESS (0));
    #if USE_TCACHE
      /* int_free also calls request2size, be careful to not pad twice.  */
      size_t tbytes;
      checked_request2size (bytes, tbytes);
      size_t tc_idx = csize2tidx (tbytes);                 //获取tcache对应size下标

      MAYBE_INIT_TCACHE ();

      DIAG_PUSH_NEEDS_COMMENT;
      if (tc_idx < mp_.tcache_bins
          /*&& tc_idx < TCACHE_MAX_BINS*/ /* to appease gcc */
          && tcache
          && tcache->entries[tc_idx] != NULL)
        {
          return tcache_get (tc_idx);         //直接从tcache取
        }
      DIAG_POP_NEEDS_COMMENT;
    #endif

题外话：tcache_get

    /* Caller must ensure that we know tc_idx is valid and there's
       available chunks to remove.  */
    static __always_inline void *
    tcache_get (size_t tc_idx)
    {
      tcache_entry *e = tcache->entries[tc_idx];         
      assert (tc_idx < TCACHE_MAX_BINS);
      assert (tcache->entries[tc_idx] > 0);
      tcache->entries[tc_idx] = e->next;         //从tcache链表头部取得堆块返回
      --(tcache->counts[tc_idx]);
      return (void *) e;
    }

结束，继续malloc

差异三：_int_malloc

这里的差异即为在察觉到请求size是fastbin范围时，多的是下面那个#if~#endif

    if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
        {
          idx = fastbin_index (nb);
          mfastbinptr *fb = &fastbin (av, idx);
          mchunkptr pp;
          victim = *fb;

          if (victim != NULL)
            {
              if (SINGLE_THREAD_P)
                *fb = victim->fd;
              else
                REMOVE_FB (fb, pp, victim);
              if (__glibc_likely (victim != NULL))
                {
                  size_t victim_idx = fastbin_index (chunksize (victim));
                  if (__builtin_expect (victim_idx != idx, 0))
                    malloc_printerr ("malloc(): memory corruption (fast)");
                  check_remalloced_chunk (av, victim, nb);
    #if USE_TCACHE
                  /* 当我们运行至此, 如果fastbin该链条仍有其他堆块，则我们stash他们到tcache链条上*/
                  size_t tc_idx = csize2tidx (nb);
                  if (tcache && tc_idx < mp_.tcache_bins)
                    {
                      mchunkptr tc_victim;

                      /* While bin not empty and tcache not full, copy chunks.  */
                      while (tcache->counts[tc_idx] < mp_.tcache_count                 //不能超过7奥
                             && (tc_victim = *fb) != NULL)
                        {
                          if (SINGLE_THREAD_P)
                            *fb = tc_victim->fd;
                          else
                            {
                              REMOVE_FB (fb, pp, tc_victim);
                              if (__glibc_unlikely (tc_victim == NULL))
                                break;
                            }
                          tcache_put (tc_victim, tc_idx);                 //这里注意均为从头置入
                        }
                    }
    #endif
                  void *p = chunk2mem (victim);
                  alloc_perturb (p, bytes);
                  return p;                 //搞完后正常返回堆块
                }
            }
        }

这里就是在我们分配fastbin的时候，若链条上还有其他堆块，我们则需要将其中剩下的free堆块头插入tcache当中，调试源码会发现顺序刚好相反，因为是从fastbin头取，再头插至tcachebin

除了fastbin，还有在我们smallbin找到堆块时，若链表中也有剩余块，其也会用相同的手法头插入tcachebin当中，但是这里有个区别就是smallbin由于是从尾部取堆块，而不是跟fastbin一样从头取，关键区别如下（这里就不写其他的部分了）：

    #if USE_TCACHE
              /* While we're here, if we see other chunks of the same size,
                 stash them in the tcache.  */
              size_t tc_idx = csize2tidx (nb);
              if (tcache && tc_idx < mp_.tcache_bins)
                {
                  mchunkptr tc_victim;

                  /* While bin not empty and tcache not full, copy chunks over.  */
                  while (tcache->counts[tc_idx] < mp_.tcache_count
                         && (tc_victim = last (bin)) != bin)
                    {
                      if (tc_victim != 0)
                        {
                          bck = tc_victim->bk;                 //从bk取，一直向上
                          set_inuse_bit_at_offset (tc_victim, nb);
                          if (av != &main_arena)
                            set_non_main_arena (tc_victim);
                          bin->bk = bck;
                          bck->fd = bin;

                          tcache_put (tc_victim, tc_idx);
                        }
                    }
                }
    #endif

然后就是在for(;;)大循环的时候，unsorted bin while循环置入合适堆块bins的时候，首先会先置入tcache bins 而不是寻找到相应bins置入
如下：

             /* remove from unsorted list */
              unsorted_chunks (av)->bk = bck;
              bck->fd = unsorted_chunks (av);

              /* Take now instead of binning if exact fit */

              if (size == nb)
                {
                  set_inuse_bit_at_offset (victim, size);
                  if (av != &main_arena)
                    set_non_main_arena (victim);
    #if USE_TCACHE
                  /* Fill cache first, return to user only if cache fills.
                     We may return one of these chunks later.  */
                  if (tcache_nb
                      && tcache->counts[tc_idx] < mp_.tcache_count)
                    {
                      tcache_put (victim, tc_idx);         //tcache始终是第一位，堆块们想要回到合适的bins太难了：(
                      return_cached = 1;
                      continue;
                    }
                  else
                    {
    #endif

差异四：_int_free

其中差异便是在调用_int_free时首先会调用tcache_put

    /*
       ------------------------------ free ------------------------------
     */

    static void
    _int_free (mstate av, mchunkptr p, int have_lock)
    {
      INTERNAL_SIZE_T size;        /* its size */
      mfastbinptr *fb;             /* associated fastbin */
      mchunkptr nextchunk;         /* next contiguous chunk */
      INTERNAL_SIZE_T nextsize;    /* its size */
      int nextinuse;               /* true if nextchunk is used */
      INTERNAL_SIZE_T prevsize;    /* size of previous contiguous chunk */
      mchunkptr bck;               /* misc temp for linking */
      mchunkptr fwd;               /* misc temp for linking */

      size = chunksize (p);

      /* Little security check which won't hurt performance: the
         allocator never wrapps around at the end of the address space.
         Therefore we can exclude some size values which might appear
         here by accident or by "design" from some intruder.  */
      if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0)
          || __builtin_expect (misaligned_chunk (p), 0))
        malloc_printerr ("free(): invalid pointer");
      /* We know that each chunk is at least MINSIZE bytes in size or a
         multiple of MALLOC_ALIGNMENT.  */
      if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size)))
        malloc_printerr ("free(): invalid size");

      check_inuse_chunk(av, p);

    #if USE_TCACHE
      {
        size_t tc_idx = csize2tidx (size);         //若free堆块大小处于tcachebin范围当中的话，执行下面语句

        if (tcache
            && tc_idx < mp_.tcache_bins
            && tcache->counts[tc_idx] < mp_.tcache_count)
          {
            tcache_put (p, tc_idx);         //首先将其置入tcache当中
            return;
          }
      }
    #endif

题外话：tcache_put

    /* Caller must ensure that we know tc_idx is valid and there's room
       for more chunks.  */
    static __always_inline void
    tcache_put (mchunkptr chunk, size_t tc_idx)
    {
      tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
      assert (tc_idx < TCACHE_MAX_BINS);
      e->next = tcache->entries[tc_idx];         //从头置入
      tcache->entries[tc_idx] = e;
      ++(tcache->counts[tc_idx]);
    }

至此，个人所分析到的值得注意的差异就此结束

N1nEmAn · 发表于 2023-5-22 20:37

码了，暑假干堆的时候就来看

tianwenmingce · 发表于 2023-5-23 09:14

本帖最后由 tianwenmingce 于 2023-5-23 09:20 编辑

先马住，malloc分块和寻找再看看

eversnow · 发表于 2023-5-23 13:02

马克菠萝，mark

52spress · 发表于 2023-5-23 16:14

mark，谢谢！

qgymib · 发表于 2023-5-23 16:30

厉害了~~

antzhive · 发表于 2023-5-23 21:37

这个牛逼了，码稳了慢慢消化

zjh889 · 发表于 2023-5-24 00:14

大师太厉害了，分享得很到位！

lxytwp · 发表于 2023-5-24 07:07

学习源码来了，看能看懂不。

pb297110281 · 发表于 2023-5-25 14:08

感谢分享！！！！！！！！

帐号		自动登录	找回密码
密码			注册[Register]

[系统底层] malloc&free源码浅析