TUM CTF 2016: zwiebel

zwiebel (the German word for onion) was a reversing challenge for 55 points in this year's TUM CTF.

I found this onion in my kitchen, may I ask you to dissect it?



Executing the binary it asks us to enter a key. After entering some string the program responds with a sad face. Our input was not correct, so we have to find find out a valid key.

$ ./zwiebel
Input key: foobar

Disassembling the main function in radare2, we can see, what's going on.

            ;-- main:
╒ (fcn) sym.main 128
│           ; DATA XREF from 0x004006ed (entry0)
│           0x00400800      4157           push r15
│           0x00400802      4156           push r14
│           0x00400804      53             push rbx
│           0x00400805      bf07094000     mov edi, str.Input_key:     ; "Input key: " @ 0x400907
│           0x0040080a      31c0           xor eax, eax
│           0x0040080c      e84ffeffff     call sym.imp.printf
│           0x00400811      488b3d885722.  mov rdi, qword [rip + 0x225788] ; [0x625fa0:8]=0x616962654428203a LEA obj.__TMC_END__ ; ": (Debian 6.1.1-11) 6.1.1 20160802" @ 0x625fa0
│           0x00400818      e883feffff     call sym.imp.fflush
│           0x0040081d      488b158c5722.  mov rdx, qword [rip + 0x22578c] ; [0x625fb0:8]=0x2e312e3620293131 LEA obj.stdin__GLIBC_2.2.5 ; "11) 6.1.1 20160802" @ 0x625fb0
│           0x00400824      41bf80126000   mov r15d, str.hxp_th15_15_c3rt41nly_n0t_th3_fl4g_ ; "hxp{th15_15_c3rt41nly_n0t_th3_fl4g}" @ 0x601280
│           0x0040082a      bf80126000     mov edi, str.hxp_th15_15_c3rt41nly_n0t_th3_fl4g_ ; "hxp{th15_15_c3rt41nly_n0t_th3_fl4g}" @ 0x601280
│           0x0040082f      be90000000     mov esi, 0x90
│           0x00400834      e847feffff     call sym.imp.fgets
│           0x00400839      bf00000000     mov edi, 0
│           0x0040083e      be8d4c0200     mov esi, 0x24c8d
│           0x00400843      ba07000000     mov edx, 7
│           0x00400848      b922000000     mov ecx, 0x22               ; '"'
│           0x0040084d      41b8ffffffff   mov r8d, 0xffffffff         ; -1 ; -1
│           0x00400853      4531c9         xor r9d, r9d
│           0x00400856      e8f5fdffff     call sym.imp.mmap
│           0x0040085b      4989c6         mov r14, rax
│           0x0040085e      be10136000     mov esi, obj.shc            ; obj.shc
│           0x00400863      ba8d4c0200     mov edx, 0x24c8d
│           0x00400868      4c89f7         mov rdi, r14
│           0x0040086b      e820feffff     call sym.imp.memcpy
│           0x00400870      4c89fb         mov rbx, r15
│           0x00400873      31c0           xor eax, eax
│           0x00400875      41ffd6         call r14
│           0x00400878      31c0           xor eax, eax
│           0x0040087a      5b             pop rbx
│           0x0040087b      415e           pop r14
│           0x0040087d      415f           pop r15
╘           0x0040087f      c3             ret

It reads 144 byte from standard input and writes the result in a buffer filled with a fake flag. Then it uses mmap to map a new region of memory, copies an amount of data from the symbol shc to that region and calls it.

      ;-- shc:
      ; DATA XREF from 0x0040085e (sym.main)
      0x00601310      8db600000000   lea esi, dword [rsi]
      0x00601316      8d742600       lea esi, dword [rsi]
      0x0060131a      8db426000000.  lea esi, dword [rsi]
      0x00601321      8dbc27000000.  lea edi, dword [rdi]
      0x00601328      4889d8         mov rax, rbx
      0x0060132b      8a4000         mov al, byte [rax]
      0x0060132e      2440           and al, 0x40
 ┌──< 0x00601330      7416           je 0x601348
 ││   0x00601332      488d35340000.  lea rsi, qword [rip + 0x34] ; 0x60136d
 ││   0x00601339      ad             lodsd eax, dword [rsi]
 ││   0x0060133a      4889c1         mov rcx, rax
 ││   0x0060133d      ad             lodsd eax, dword [rsi]
┌───> 0x0060133e      3106           xor dword [rsi], eax
│││   0x00601340      4883c604       add rsi, 4
└───< 0x00601344      e2f8           loop 0x60133e
┌───< 0x00601346  ~   eb2d           jmp 0x601375
│││   ;-- str._h:__n:
│││   0x00601347     .string "-h:(\\n" ; len=6
│ │   0x0060134d      b801000000     mov eax, 1
│ │   0x00601352      bf00000000     mov edi, 0
│ │   0x00601357      4889e6         mov rsi, rsp
│ │   0x0060135a      ba03000000     mov edx, 3
│ │   0x0060135f      0f05           syscall
│ │   0x00601361      b83c000000     mov eax, 0x3c               ; '<'
│ │   0x00601366      bf01000000     mov edi, 1
│ │   0x0060136b      0f05           syscall
│ │   0x0060136d  ~   0a930000eb28   or dl, byte [rbx + 0x28eb0000]
│ │   ;-- str._F_f___:
│ │   0x00601372     .string "(F<f\\`<" ; len=8
  │   0x0060137a      a19eb6ab3562.  movabs eax, dword [0x3e9f3e6235abb69e] ; [0x3e9f3e6235abb69e:4]=-1
  │   0x00601383      0e             invalid                     ; [0x3e9f3e6235abb69e:4]=-1
  │   0x00601384      b1de           mov cl, 0xde
  │   0x00601386      1c46           sbb al, 0x46

The address of our input buffer is stored in ebx. The code loads the first byte of our input and checks if the bit 0x40 is set.

If that is not the case an error message is printed and the program exits. Otherwise it loads two words relative to the instruction pointer (qword [rip + 0x34]). The first is used as a counter and the second is xored to the following words in a loop. When the counter reaches zero a jump to the decrypted code is executed.

If we try to step through the binary with GDB, the program exits before reaching the main function. The reason is hidden in a function named __printf which is called during startup.

╒ (fcn) sym.__printf 45
│           0x004007d0      50             push rax
│           0x004007d1      31ff           xor edi, edi
│           0x004007d3      31f6           xor esi, esi
│           0x004007d5      31d2           xor edx, edx
│           0x004007d7      31c9           xor ecx, ecx
│           0x004007d9      31c0           xor eax, eax
│           0x004007db      e8d0feffff     call sym.imp.ptrace
│           0x004007e0      4885c0         test rax, rax
│       ┌─< 0x004007e3      7504           jne 0x4007e9
│       │   0x004007e5      31c0           xor eax, eax
│       │   0x004007e7      5a             pop rdx
│       │   0x004007e8      c3             ret
│       └─> 0x004007e9      bf04094000     mov edi, 0x400904
│           0x004007ee      e84dfeffff     call sym.imp.puts
│           0x004007f3      bfffffffff     mov edi, 0xffffffff         ; -1 ; -1
╘           0x004007f8      e833feffff     call sym.imp._exit

It checks whether any process is attached to it using ptrace. If that is the case the program exits with an error message. This is an anti-debugging method, but we can replace the jump with NOPs, such that the exit is never executed.

In each iteration one bit of the input is checked and the code of the next iteration gets decrypted. Since there seemed to be a lot of iterations, we have written a script (see below) so solve this challenge. It uses the capstone engine to disassemble the code, extracts offset and value of the checked bit and decrypts the next section. After 1605 iterations the code exits successfully with ":)" and we have recovered all bits of the flag:



The full solution:

#/usr/bin/env python3

from binascii import hexlify, unhexlify
from struct import pack, unpack
import IPython
from capstone import *

addr_data = 0x601270
len_data = 0x24d2d
off_data = 0x1270
addr_shc = 0x601310
len_shc = 0x24c8d
addr_buf = 0x601280

off_shc = addr_shc - addr_data

with open('zwiebel', 'rb') as f:
    data = f.read(len_data)

shc = bytearray(data[off_shc:])
mov_rax_rbx_bytes = unhexlify('4889d8')


md = Cs(CS_ARCH_X86, CS_MODE_64)
md.detail = True

constraints = []

def xor(offset, size, bites):
    print("[+] XOR 0x{:x}, 0x{:x}, {}".format(offset, size, hexlify(bites)))
    for i in range(size * 4):
        shc[offset+i] ^= bites[i%len(bites)]

def print_inst(i):
    print("0x{:04x}:  {:14s}  {}  {}".format(i.address, hexlify(i.bytes).decode(), i.mnemonic, i.op_str))

def analyze_block(offset):
    global cnt
    b1 = bytes(shc[offset:])
    insts = list(md.disasm(b1, offset))
        while insts[0].bytes != mov_rax_rbx_bytes:
    except IndexError:
        for i in md.disasm(b1, offset):
    i_indx = insts[1]
    print("[+] index instruction")
    indx = i_indx.operands[1].mem.disp
    i_mask = insts[2]
    print("[+] mask instruction")
    mask = i_mask.operands[1].imm
    i_bit_set = insts[3]
    print("[+] bit set instruction")
    bit_set = i_bit_set.insn_name() == 'je'
    constraints.append((indx, mask, bit_set))
    i_addr = insts[4]
    print("[+] next block instruction")
    off = insts[5].address + i_addr.operands[1].mem.disp
    off_next = off + 0x8
    xor_ctr = unpack('<I', shc[off:off+4])[0]
    xor_byte = shc[off+4:off+8]
    xor(off_next, xor_ctr, xor_byte)
    print("[+] next block at offset 0x{:x}".format(off_next))
    return off_next

def print_block(offset):
    sys_cnt = 0
    for i in md.disasm(bytes(shc[offset:]), offset):
        print("0x{:04x}:  {:14s}  {}  {}".format(i.address, hexlify(i.bytes).decode(), i.mnemonic, i.op_str))
        if i.insn_name() == 'syscall':
            sys_cnt += 1
        if sys_cnt == 2:

blocks = [0x11]

cnt = 1
while True:
    print("[+] block {}".format(cnt))
        nxt = analyze_block(blocks[-1])
    cnt += 1


inp = bytearray(0x90)
for off, mask, bit_set in constraints:
    if bit_set:
        inp[off] |= mask