malloc&free源码分析
其作为堆利用的重点,需要我们透彻了解其中细节,我们首先从malloc开始分析,但在此之前,我们需要了解一些重要的数据结构。本章分析版本为glibc-2.23,再之后讨论2.27即以上版本的差异
0x00 重要的数据结构们
1. malloc_state
首先便是我们经常接触的arena
struct malloc_state
{
mutex_t mutex;
int flags;
mfastbinptr fastbinsY[NFASTBINS];
mchunkptr top;
mchunkptr last_remainder;
mchunkptr bins[NBINS * 2 - 2];
unsigned int binmap[BINMAPSIZE];
struct malloc_state *next;
struct malloc_state *next_free;
INTERNAL_SIZE_T attached_threads;
INTERNAL_SIZE_T system_mem;
INTERNAL_SIZE_T max_system_mem;
};
其结构在源码当中表现为宏mstate
2. malloc_chunk
struct malloc_chunk {
INTERNAL_SIZE_T prev_size;
INTERNAL_SIZE_T size;
struct malloc_chunk* fd;
struct malloc_chunk* bk;
struct malloc_chunk* fd_nextsize;
struct malloc_chunk* bk_nextsize;
};
3. 源码自带的malloc_chunk细节,十分友善
0x01 malloc步骤
step1 malloc环境准备
首先我们调用malloc(size)
的时候,调用的库函数实际上为_libc_malloc
,如下:
void * __libc_malloc (size_t bytes)
{
mstate ar_ptr;
void *victim;
void *(*hook) (size_t, const void *)
= atomic_forced_read (__malloc_hook);
if (__builtin_expect (hook != NULL, 0))
return (*hook)(bytes, RETURN_ADDRESS (0));
arena_get (ar_ptr, bytes);
victim = _int_malloc (ar_ptr, bytes);
if (!victim && ar_ptr != NULL)
{
LIBC_PROBE (memory_malloc_retry, 1, bytes);
ar_ptr = arena_get_retry (ar_ptr, bytes);
victim = _int_malloc (ar_ptr, bytes);
}
if (ar_ptr != NULL)
(void) mutex_unlock (&ar_ptr->mutex);
assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
ar_ptr == arena_for_chunk (mem2chunk (victim)));
return victim;
}
可以发现我们的_libc_malloc
函数主要功能是获取合适的arena
,然后传递给_int_malloc
分配真正的堆块,然后我们来观察_int_malloc
,而我们该函数十分长,因此我们逐步来看,首先看到定义的一些字段,如下:
static void * _int_malloc (mstate av, size_t bytes)
{
INTERNAL_SIZE_T nb;
unsigned int idx;
mbinptr bin;
mchunkptr victim;
INTERNAL_SIZE_T size;
int victim_index;
mchunkptr remainder;
unsigned long remainder_size;
unsigned int block;
unsigned int bit;
unsigned int map;
mchunkptr fwd;
mchunkptr bck;
const char *errstr = NULL;
然后我们继续来看接下来的步骤:
checked_request2size (bytes, nb);
题外话:checked_request2size
(注意这里是单独的宏表示,非int_malloc)
checked_request2size其宏表示为
(((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE) ? \
MINSIZE : \
((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)
/* Same, except also perform argument check */
if (REQUEST_OUT_OF_RANGE (req)) { \
__set_errno (ENOMEM); \
return 0; \
} \
(sz) = request2size (req);
解释结束(下面继续int_malloc)
if (__glibc_unlikely (av == NULL))
{
void *p = sysmalloc (nb, av);
if (p != NULL)
alloc_perturb (p, bytes);
return p;
}
step2 若在fastbin范围
然后接下来判断其是否位于fastbin范围
if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
{
idx = fastbin_index (nb);
mfastbinptr *fb = &fastbin (av, idx);
mchunkptr pp = *fb;
do
{
victim = pp;
if (victim == NULL)
break;
}
while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim))
!= victim);
if (victim != 0)
{
if (__builtin_expect (fastbin_index (chunksize (victim)) != idx, 0))
{
errstr = "malloc(): memory corruption (fast)";
errout:
malloc_printerr (check_action, errstr, chunk2mem (victim), av);
return NULL;
}
check_remalloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}
}
其中大致含义即为从fastbinsY链表数组找到对应的下标,然后从头取出fastbin,返回给用户。
step3 若在small范围当中
下面我们继续_int_malloc
,执行到这里表示我们fastbin超出其大小范围,或者说使用fastbin分配失败,然后就会判断其进入smallbin的判断当中
if (in_smallbin_range (nb))
{
idx = smallbin_index (nb);
bin = bin_at (av, idx);
if ((victim = last (bin)) != bin)
{
if (victim == 0)
malloc_consolidate (av);
else
{
bck = victim->bk;
if (__glibc_unlikely (bck->fd != victim))
{
errstr = "malloc(): smallbin double linked list corrupted";
goto errout;
}
set_inuse_bit_at_offset (victim, nb);
bin->bk = bck;
bck->fd = bin;
if (av != &main_arena)
victim->size |= NON_MAIN_ARENA;
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}
}
}
我们对于smallbin的分配也十分简单,那就是直接从尾部开始取,但是在取之前我们会判断arena是否进行过初始化,若没有进行初始化,则调用malloc_consolidate
进行初始化
题外话:malloc_consolidate
这里是我们单独的malloc_consolidate
函数讲解,与上面_int_malloc
单独分开,整体代码如下:
static void malloc_consolidate(mstate av)
{
mfastbinptr* fb;
mfastbinptr* maxfb;
mchunkptr p;
mchunkptr nextp;
mchunkptr unsorted_bin;
mchunkptr first_unsorted;
mchunkptr nextchunk;
INTERNAL_SIZE_T size;
INTERNAL_SIZE_T nextsize;
INTERNAL_SIZE_T prevsize;
int nextinuse;
mchunkptr bck;
mchunkptr fwd;
if (get_max_fast () != 0) {
clear_fastchunks(av);
unsorted_bin = unsorted_chunks(av);
maxfb = &fastbin (av, NFASTBINS - 1);
fb = &fastbin (av, 0);
do {
p = atomic_exchange_acq (fb, 0);
if (p != 0) {
do {
check_inuse_chunk(av, p);
nextp = p->fd;
size = p->size & ~(PREV_INUSE|NON_MAIN_ARENA);
nextchunk = chunk_at_offset(p, size);
nextsize = chunksize(nextchunk);
if (!prev_inuse(p)) {
prevsize = p->prev_size;
size += prevsize;
p = chunk_at_offset(p, -((long) prevsize));
unlink(av, p, bck, fwd);
}
if (nextchunk != av->top) {
nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
if (!nextinuse) {
size += nextsize;
unlink(av, nextchunk, bck, fwd);
} else
clear_inuse_bit_at_offset(nextchunk, 0);
first_unsorted = unsorted_bin->fd;
unsorted_bin->fd = p;
first_unsorted->bk = p;
if (!in_smallbin_range (size)) {
p->fd_nextsize = NULL;
p->bk_nextsize = NULL;
}
set_head(p, size | PREV_INUSE);
p->bk = unsorted_bin;
p->fd = first_unsorted;
set_foot(p, size);
}
else {
size += nextsize;
set_head(p, size | PREV_INUSE);
av->top = p;
}
} while ( (p = nextp) != 0);
}
} while (fb++ != maxfb);
}
else {
malloc_init_state(av);
check_malloc_state(av);
}
}
题外话: unlink宏
其中unlink宏如下:
#define unlink(AV, P, BK, FD) { \
FD = P->fd; \
BK = P->bk; \
if (__builtin_expect (FD->bk != P || BK->fd != P, 0)) \
malloc_printerr (check_action, "corrupted double-linked list", P, AV); \
else { \
FD->bk = BK; \
BK->fd = FD; \
if (!in_smallbin_range (P->size) \
&& __builtin_expect (P->fd_nextsize != NULL, 0)) { \
if (__builtin_expect (P->fd_nextsize->bk_nextsize != P, 0) \
|| __builtin_expect (P->bk_nextsize->fd_nextsize != P, 0)) \
malloc_printerr (check_action, \
"corrupted double-linked list (not small)", \
P, AV); \
if (FD->fd_nextsize == NULL) { \
if (P->fd_nextsize == P) \
FD->fd_nextsize = FD->bk_nextsize = FD; \
else { \
FD->fd_nextsize = P->fd_nextsize; \
FD->bk_nextsize = P->bk_nextsize; \
P->fd_nextsize->bk_nextsize = FD; \
P->bk_nextsize->fd_nextsize = FD; \
} \
} else { \
P->fd_nextsize->bk_nextsize = P->bk_nextsize; \
P->bk_nextsize->fd_nextsize = P->fd_nextsize; \
} \
} \
} \
}
综上所述,malloc_consolidate
函数的目的就是从fastbin当中该取的chunk取出,该合并的合并,然后存入unsorted bin
当中,说完该函数,那么我们回到咱们的_int_malloc
函数当中
step4 清理堆块碎片
上面我们讲到了smallbin判断,但如果说我们请求的bytes既不在fastbin范围,也不在smallbin范围当中呢
else
{
idx = largebin_index (nb);
if (have_fastchunks (av))
malloc_consolidate (av);
}
在我们调用完malloc_consolidate
之后,我们进入接下来的判断,也就是进入我们的外部大循环:
step5 大循环-unsortedbin清理
for (;; )
{
int iters = 0;
while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
{
bck = victim->bk;
if (__builtin_expect (victim->size <= 2 * SIZE_SZ, 0)
|| __builtin_expect (victim->size > av->system_mem, 0))
malloc_printerr (check_action, "malloc(): memory corruption",
chunk2mem (victim), av);
size = chunksize (victim);
if (in_smallbin_range (nb) &&
bck == unsorted_chunks (av) &&
victim == av->last_remainder &&
(unsigned long) (size) > (unsigned long) (nb + MINSIZE))
{
remainder_size = size - nb;
remainder = chunk_at_offset (victim, nb);
unsorted_chunks (av)->bk = unsorted_chunks (av)->fd = remainder;
av->last_remainder = remainder;
remainder->bk = remainder->fd = unsorted_chunks (av);
if (!in_smallbin_range (remainder_size))
{
remainder->fd_nextsize = NULL;
remainder->bk_nextsize = NULL;
}
set_head (victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head (remainder, remainder_size | PREV_INUSE);
set_foot (remainder, remainder_size);
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}
unsorted_chunks (av)->bk = bck;
bck->fd = unsorted_chunks (av);
if (size == nb)
{
set_inuse_bit_at_offset (victim, size);
if (av != &main_arena)
victim->size |= NON_MAIN_ARENA;
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}
if (in_smallbin_range (size))
{
victim_index = smallbin_index (size);
bck = bin_at (av, victim_index);
fwd = bck->fd;
}
else
{
victim_index = largebin_index (size);
bck = bin_at (av, victim_index);
fwd = bck->fd;
if (fwd != bck)
{
size |= PREV_INUSE;
assert ((bck->bk->size & NON_MAIN_ARENA) == 0);
if ((unsigned long) (size) < (unsigned long) (bck->bk->size))
{
fwd = bck;
bck = bck->bk;
victim->fd_nextsize = fwd->fd;
victim->bk_nextsize = fwd->fd->bk_nextsize;
fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;
}
else
{
assert ((fwd->size & NON_MAIN_ARENA) == 0);
while ((unsigned long) size < fwd->size)
{
fwd = fwd->fd_nextsize;
assert ((fwd->size & NON_MAIN_ARENA) == 0);
}
if ((unsigned long) size == (unsigned long) fwd->size)
fwd = fwd->fd;
else
{
victim->fd_nextsize = fwd;
victim->bk_nextsize = fwd->bk_nextsize;
fwd->bk_nextsize = victim;
victim->bk_nextsize->fd_nextsize = victim;
}
bck = fwd->bk;
}
}
else
victim->fd_nextsize = victim->bk_nextsize = victim;
}
mark_bin (av, victim_index);
victim->bk = bck;
victim->fd = fwd;
fwd->bk = victim;
bck->fd = victim;
#define MAX_ITERS 10000
if (++iters >= MAX_ITERS)
break;
}
进入我们的外部大循环,上面源码仅仅展示了内部unsortedbin循环,这里要么有三种结果:
- 在unsorted bin循环当中发现unsorted bin只有一个堆块,且大于我们要分配的,则切割他然后返回
- 循环当中找到适配的堆块并把寻找路径上的不适配堆块返回适当的bin当中,则直接返回,剩余的堆块不予处理
- 循环当中未找到合适堆块,则继续下一步,此时unsortedbin已经遍历完毕,其中无堆块,全部存放于适合的bin当中
step6 大循环-largebin堆块寻找
经过unsortedbin循环之后,我们再来判断
if (!in_smallbin_range (nb))
{
bin = bin_at (av, idx);
if ((victim = first (bin)) != bin &&
(unsigned long) (victim->size) >= (unsigned long) (nb))
{
victim = victim->bk_nextsize;
while (((unsigned long) (size = chunksize (victim)) <
(unsigned long) (nb)))
victim = victim->bk_nextsize;
if (victim != last (bin) && victim->size == victim->fd->size)
victim = victim->fd;
remainder_size = size - nb;
unlink (av, victim, bck, fwd);
if (remainder_size < MINSIZE)
{
set_inuse_bit_at_offset (victim, size);
if (av != &main_arena)
victim->size |= NON_MAIN_ARENA;
}
else
{
remainder = chunk_at_offset (victim, nb);
bck = unsorted_chunks (av);
fwd = bck->fd;
if (__glibc_unlikely (fwd->bk != bck))
{
errstr = "malloc(): corrupted unsorted chunks";
goto errout;
}
remainder->bk = bck;
remainder->fd = fwd;
bck->fd = remainder;
fwd->bk = remainder;
if (!in_smallbin_range (remainder_size))
{
remainder->fd_nextsize = NULL;
remainder->bk_nextsize = NULL;
}
set_head (victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head (remainder, remainder_size | PREV_INUSE);
set_foot (remainder, remainder_size);
}
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}
}
这里是从我们的largebin链表当中,从尾部开始遍历直到找到相同或者稍微大那么点的堆块,要么直接返回要么切割返回,切割的剩余部分存放在unsorted bin当中。
step7 大循环-位图分配
然后接着往下走:
++idx;
bin = bin_at (av, idx);
block = idx2block (idx);
map = av->binmap[block];
bit = idx2bit (idx);
for (;; )
{
if (bit > map || bit == 0)
{
do
{
if (++block >= BINMAPSIZE)
goto use_top;
}
while ((map = av->binmap[block]) == 0);
bin = bin_at (av, (block << BINMAPSHIFT));
bit = 1;
}
while ((bit & map) == 0)
{
bin = next_bin (bin);
bit <<= 1;
assert (bit != 0);
}
victim = last (bin);
if (victim == bin)
{
av->binmap[block] = map &= ~bit;
bin = next_bin (bin);
bit <<= 1;
}
else
{
size = chunksize (victim);
assert ((unsigned long) (size) >= (unsigned long) (nb));
remainder_size = size - nb;
unlink (av, victim, bck, fwd);
if (remainder_size < MINSIZE)
{
set_inuse_bit_at_offset (victim, size);
if (av != &main_arena)
victim->size |= NON_MAIN_ARENA;
}
else
{
remainder = chunk_at_offset (victim, nb);
bck = unsorted_chunks (av);
fwd = bck->fd;
if (__glibc_unlikely (fwd->bk != bck))
{
errstr = "malloc(): corrupted unsorted chunks 2";
goto errout;
}
remainder->bk = bck;
remainder->fd = fwd;
bck->fd = remainder;
fwd->bk = remainder;
if (in_smallbin_range (nb))
av->last_remainder = remainder;
if (!in_smallbin_range (remainder_size))
{
remainder->fd_nextsize = NULL;
remainder->bk_nextsize = NULL;
}
set_head (victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head (remainder, remainder_size | PREV_INUSE);
set_foot (remainder, remainder_size);
}
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}
}
以上即为普通的位图分配,倒是省下了诸多麻烦,这里通过寻找最小适配块来进行切割,剩下的就分配给unsorted bin
step7 使用top chunk
use_top:
/*
If large enough, split off the chunk bordering the end of memory
(held in av->top). Note that this is in accord with the best-fit
search rule. In effect, av->top is treated as larger (and thus
less well fitting) than any other available chunk since it can
be extended to be as large as necessary (up to system
limitations).
We require that av->top always exists (i.e., has size >=
MINSIZE) after initialization, so if it would otherwise be
exhausted by current request, it is replenished. (The main
reason for ensuring it exists is that we may need MINSIZE space
to put in fenceposts in sysmalloc.)
*/
victim = av->top;
size = chunksize (victim);
if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
{
remainder_size = size - nb;
remainder = chunk_at_offset (victim, nb);
av->top = remainder;
set_head (victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head (remainder, remainder_size | PREV_INUSE);
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}
else if (have_fastchunks (av))
{
malloc_consolidate (av);
if (in_smallbin_range (nb))
idx = smallbin_index (nb);
else
idx = largebin_index (nb);
}
else
{
void *p = sysmalloc (nb, av);
if (p != NULL)
alloc_perturb (p, bytes);
return p;
}
}
}
至此,malloc分配分析结束,下面附赠一张分配流程图:

0x02 free 步骤
step1 free判断
首先就是我们的__libc_free
void __libc_free (void *mem)
{
mstate ar_ptr;
mchunkptr p;
void (*hook) (void *, const void *)
= atomic_forced_read (__free_hook);
if (__builtin_expect (hook != NULL, 0))
{
(*hook)(mem, RETURN_ADDRESS (0));
return;
}
if (mem == 0)
return;
p = mem2chunk (mem);
if (chunk_is_mmapped (p))
{
if (!mp_.no_dyn_threshold
&& p->size > mp_.mmap_threshold
&& p->size <= DEFAULT_MMAP_THRESHOLD_MAX)
{
mp_.mmap_threshold = chunksize (p);
mp_.trim_threshold = 2 * mp_.mmap_threshold;
LIBC_PROBE (memory_mallopt_free_dyn_thresholds, 2,
mp_.mmap_threshold, mp_.trim_threshold);
}
munmap_chunk (p);
return;
}
ar_ptr = arena_for_chunk (p);
_int_free (ar_ptr, p, 0);
}
step2 安全检查
static void
_int_free (mstate av, mchunkptr p, int have_lock)
{
INTERNAL_SIZE_T size;
mfastbinptr *fb;
mchunkptr nextchunk;
INTERNAL_SIZE_T nextsize;
int nextinuse;
INTERNAL_SIZE_T prevsize;
mchunkptr bck;
mchunkptr fwd;
const char *errstr = NULL;
int locked = 0;
size = chunksize (p);
if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0)
|| __builtin_expect (misaligned_chunk (p), 0))
{
errstr = "free(): invalid pointer";
errout:
if (!have_lock && locked)
(void) mutex_unlock (&av->mutex);
malloc_printerr (check_action, errstr, chunk2mem (p), av);
return;
}
if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size)))
{
errstr = "free(): invalid size";
goto errout;
}
check_inuse_chunk(av, p);
其中是对于一系列free参数的判断,我们看看即可
step3 置入fastbin
首先如果判断其范围处于fastbin,则置入链表,当然,存在多个检测:)
if ((unsigned long)(size) <= (unsigned long)(get_max_fast ())
#if TRIM_FASTBINS
&& (chunk_at_offset(p, size) != av->top)
#endif
) {
if (__builtin_expect (chunk_at_offset (p, size)->size <= 2 * SIZE_SZ, 0)
|| __builtin_expect (chunksize (chunk_at_offset (p, size))
>= av->system_mem, 0))
{
if (have_lock
|| ({ assert (locked == 0);
mutex_lock(&av->mutex);
locked = 1;
chunk_at_offset (p, size)->size <= 2 * SIZE_SZ
|| chunksize (chunk_at_offset (p, size)) >= av->system_mem;
}))
{
errstr = "free(): invalid next size (fast)";
goto errout;
}
if (! have_lock)
{
(void)mutex_unlock(&av->mutex);
locked = 0;
}
}
free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
set_fastchunks(av);
unsigned int idx = fastbin_index(size);
fb = &fastbin (av, idx);
mchunkptr old = *fb, old2;
unsigned int old_idx = ~0u;
do
{
if (__builtin_expect (old == p, 0))
{
errstr = "double free or corruption (fasttop)";
goto errout;
}
if (have_lock && old != NULL)
old_idx = fastbin_index(chunksize(old));
p->fd = old2 = old;
}
while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2)) != old2);
if (have_lock && old != NULL && __builtin_expect (old_idx != idx, 0))
{
errstr = "invalid fastbin entry (free)";
goto errout;
}
}
step4 若不是fastbin,则该去哪儿呢
else if (!chunk_is_mmapped(p)) {
if (! have_lock) {
(void)mutex_lock(&av->mutex);
locked = 1;
}
nextchunk = chunk_at_offset(p, size);
...
nextsize = chunksize(nextchunk);
if (__builtin_expect (nextchunk->size <= 2 * SIZE_SZ, 0)
|| __builtin_expect (nextsize >= av->system_mem, 0))
{
errstr = "free(): invalid next size (normal)";
goto errout;
}
free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
if (!prev_inuse(p)) {
prevsize = p->prev_size;
size += prevsize;
p = chunk_at_offset(p, -((long) prevsize));
unlink(av, p, bck, fwd);
}
if (nextchunk != av->top) {
nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
if (!nextinuse) {
unlink(av, nextchunk, bck, fwd);
size += nextsize;
} else
clear_inuse_bit_at_offset(nextchunk, 0);
bck = unsorted_chunks(av);
fwd = bck->fd;
if (__glibc_unlikely (fwd->bk != bck))
{
errstr = "free(): corrupted unsorted chunks";
goto errout;
}
p->fd = fwd;
p->bk = bck;
if (!in_smallbin_range(size))
{
p->fd_nextsize = NULL;
p->bk_nextsize = NULL;
}
bck->fd = p;
fwd->bk = p;
set_head(p, size | PREV_INUSE);
set_foot(p, size);
check_free_chunk(av, p);
}
else {
size += nextsize;
set_head(p, size | PREV_INUSE);
av->top = p;
check_chunk(av, p);
}
···
从源码可以得知除了fastbin范围,其他块均存入unsorted bin
至此,free的分析也就此结束,如果大伙是从上面的malloc看下来的,那么肯定会发现这个free较之于十分简单,其中也是因为一些重复的函数在malloc已经讲解过,再写一遍没有必要,其中我也省略了很多free过程当中的检测部分,因为这较之于我们今天分析的目的有点远了。
当然,附赠free过程图:

0x03 glibc2.27版本差异
我们都知道,在2.26及以上增加了tcache,其中使得我们存取块更加迅速,下面我们就来探讨一下其中较之于2.23的差别
差异一:数据结构们
首先就是我们的tcache数据块,如下:
typedef struct tcache_entry
{
struct tcache_entry *next;
} tcache_entry;
typedef struct tcache_perthread_struct
{
char counts[TCACHE_MAX_BINS];
tcache_entry *entries[TCACHE_MAX_BINS];
} tcache_perthread_struct;
#if USE_TCACHE
# define TCACHE_MAX_BINS 64
# define MAX_TCACHE_SIZE tidx2usize (TCACHE_MAX_BINS-1)
# define tidx2usize(idx) (((size_t) idx) * MALLOC_ALIGNMENT + MINSIZE - SIZE_SZ)
# define csize2tidx(x) (((x) - MINSIZE + MALLOC_ALIGNMENT - 1) / MALLOC_ALIGNMENT)
# define usize2tidx(x) csize2tidx (request2size (x))
# define TCACHE_FILL_COUNT 7
#endif
差异二:__libc_malloc
我们在使用__libc_malloc
进行分配时,在调用malloc_hook后,int_malloc前会首先调用tcache_get函数来获取相关堆块
void *
__libc_malloc (size_t bytes)
{
mstate ar_ptr;
void *victim;
void *(*hook) (size_t, const void *)
= atomic_forced_read (__malloc_hook);
if (__builtin_expect (hook != NULL, 0))
return (*hook)(bytes, RETURN_ADDRESS (0));
#if USE_TCACHE
size_t tbytes;
checked_request2size (bytes, tbytes);
size_t tc_idx = csize2tidx (tbytes);
MAYBE_INIT_TCACHE ();
DIAG_PUSH_NEEDS_COMMENT;
if (tc_idx < mp_.tcache_bins
&& tcache
&& tcache->entries[tc_idx] != NULL)
{
return tcache_get (tc_idx);
}
DIAG_POP_NEEDS_COMMENT;
#endif
题外话:tcache_get
static __always_inline void *
tcache_get (size_t tc_idx)
{
tcache_entry *e = tcache->entries[tc_idx];
assert (tc_idx < TCACHE_MAX_BINS);
assert (tcache->entries[tc_idx] > 0);
tcache->entries[tc_idx] = e->next;
--(tcache->counts[tc_idx]);
return (void *) e;
}
结束,继续malloc
差异三:_int_malloc
这里的差异即为在察觉到请求size是fastbin范围时,多的是下面那个#if~#endif
if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
{
idx = fastbin_index (nb);
mfastbinptr *fb = &fastbin (av, idx);
mchunkptr pp;
victim = *fb;
if (victim != NULL)
{
if (SINGLE_THREAD_P)
*fb = victim->fd;
else
REMOVE_FB (fb, pp, victim);
if (__glibc_likely (victim != NULL))
{
size_t victim_idx = fastbin_index (chunksize (victim));
if (__builtin_expect (victim_idx != idx, 0))
malloc_printerr ("malloc(): memory corruption (fast)");
check_remalloced_chunk (av, victim, nb);
#if USE_TCACHE
size_t tc_idx = csize2tidx (nb);
if (tcache && tc_idx < mp_.tcache_bins)
{
mchunkptr tc_victim;
while (tcache->counts[tc_idx] < mp_.tcache_count
&& (tc_victim = *fb) != NULL)
{
if (SINGLE_THREAD_P)
*fb = tc_victim->fd;
else
{
REMOVE_FB (fb, pp, tc_victim);
if (__glibc_unlikely (tc_victim == NULL))
break;
}
tcache_put (tc_victim, tc_idx);
}
}
#endif
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}
}
}
这里就是在我们分配fastbin的时候,若链条上还有其他堆块,我们则需要将其中剩下的free堆块头插入tcache当中,调试源码会发现顺序刚好相反,因为是从fastbin头取,再头插至tcachebin
除了fastbin,还有在我们smallbin找到堆块时,若链表中也有剩余块,其也会用相同的手法头插入tcachebin当中,但是这里有个区别就是smallbin由于是从尾部取堆块,而不是跟fastbin一样从头取,关键区别如下(这里就不写其他的部分了):
#if USE_TCACHE
size_t tc_idx = csize2tidx (nb);
if (tcache && tc_idx < mp_.tcache_bins)
{
mchunkptr tc_victim;
while (tcache->counts[tc_idx] < mp_.tcache_count
&& (tc_victim = last (bin)) != bin)
{
if (tc_victim != 0)
{
bck = tc_victim->bk;
set_inuse_bit_at_offset (tc_victim, nb);
if (av != &main_arena)
set_non_main_arena (tc_victim);
bin->bk = bck;
bck->fd = bin;
tcache_put (tc_victim, tc_idx);
}
}
}
#endif
然后就是在for(;;)大循环的时候,unsorted bin while循环置入合适堆块bins的时候,首先会先置入tcache bins 而不是寻找到相应bins置入
如下:
unsorted_chunks (av)->bk = bck;
bck->fd = unsorted_chunks (av);
if (size == nb)
{
set_inuse_bit_at_offset (victim, size);
if (av != &main_arena)
set_non_main_arena (victim);
#if USE_TCACHE
if (tcache_nb
&& tcache->counts[tc_idx] < mp_.tcache_count)
{
tcache_put (victim, tc_idx);
return_cached = 1;
continue;
}
else
{
#endif
差异四:_int_free
其中差异便是在调用_int_free
时首先会调用tcache_put
static void
_int_free (mstate av, mchunkptr p, int have_lock)
{
INTERNAL_SIZE_T size;
mfastbinptr *fb;
mchunkptr nextchunk;
INTERNAL_SIZE_T nextsize;
int nextinuse;
INTERNAL_SIZE_T prevsize;
mchunkptr bck;
mchunkptr fwd;
size = chunksize (p);
if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0)
|| __builtin_expect (misaligned_chunk (p), 0))
malloc_printerr ("free(): invalid pointer");
if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size)))
malloc_printerr ("free(): invalid size");
check_inuse_chunk(av, p);
#if USE_TCACHE
{
size_t tc_idx = csize2tidx (size);
if (tcache
&& tc_idx < mp_.tcache_bins
&& tcache->counts[tc_idx] < mp_.tcache_count)
{
tcache_put (p, tc_idx);
return;
}
}
#endif
题外话:tcache_put
static __always_inline void
tcache_put (mchunkptr chunk, size_t tc_idx)
{
tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
assert (tc_idx < TCACHE_MAX_BINS);
e->next = tcache->entries[tc_idx];
tcache->entries[tc_idx] = e;
++(tcache->counts[tc_idx]);
}
至此,个人所分析到的值得注意的差异就此结束