Skip to content

Commit

Permalink
Potential fix for #2: Treating unknown data as THUMB code, not bytes.
Browse files Browse the repository at this point in the history
  • Loading branch information
aarant committed Jun 9, 2020
1 parent 735212b commit a6deada
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 27 deletions.
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Luvdis is a tool for disassembling GBA ROMs. Features include:
- [From Releases](#from-releases)
- [From latest source](#from-latest-source)
- [Usage](#usage)
- [FAQ](#faq)
- [Options](#options)
- [ROM detection](#rom-detection)

Expand Down Expand Up @@ -73,6 +74,17 @@ To disassemble only part of a ROM, say, up to the start of read-only data, provi
$ luvdis rom.gba --start 0x0800024C --stop 0x0x81b32b4 -o rom.s
```

### FAQ

#### How can I get rid of large blocks of raw bytes in the disassembly?

By default, Luvdis treats areas of a ROM that it can't determine are executable as byte data. You can change this behavior
with the `default_mode` option:

```sh
$ luvdis rom.gba --default_mode THUMB -o rom.s
```

### Options

```
Expand Down Expand Up @@ -105,6 +117,9 @@ Options:
--min-length INTEGER RANGE Minimum valid instruction length required in
order to 'guess' a function. Must be at least 1,
defaults to 3.
--default-mode [THUMB|BYTE|WORD]
Default disassembly mode when the nature of
an address is unknown. Defaults to 'BYTE'.
--help Show this message and exit.
```

Expand Down
2 changes: 1 addition & 1 deletion luvdis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@
Copyright (C) 2020 A. Antonitis. Licensed under the MIT license.
"""
__version__ = '0.6.1'
__version__ = '0.7.0'
__doc__ = __doc__.replace('__version__', __version__)
url = __url__ = 'https://github.com/arantonitis/luvdis'
18 changes: 11 additions & 7 deletions luvdis/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from luvdis.config import read_config
from luvdis.common import eprint, set_debug
from luvdis.rom import ROM
from luvdis.analyze import State, BASE_ADDRESS, END_ADDRESS
from luvdis.analyze import State, BASE_ADDRESS, END_ADDRESS, THUMB, BYTE, WORD


class AddressInt(click.ParamType):
Expand All @@ -31,6 +31,7 @@ def convert(self, value, param, ctx):


ADDRESS_INT = AddressInt()
MODE_MAP = {'byte': BYTE, 'thumb': THUMB, 'word': WORD}


@click.group(cls=DefaultGroup, default='disasm', default_if_no_args=True)
Expand All @@ -47,7 +48,7 @@ def main():
'output path.')
@click.option('-c', '--config', type=click.Path(exists=True, dir_okay=False, readable=True),
help='Function configuration file.')
@click.option('-co', '--config-out', 'config_out', type=click.Path(writable=True, dir_okay=False),
@click.option('-co', '--config-out', type=click.Path(writable=True, dir_okay=False),
help="Output configuration. If any functions are 'guessed' by Luvdis, they will appear here.")
@click.option('-D', '--debug', is_flag=True, help='Turn on/off debugging behavior.')
@click.option('--start', type=ADDRESS_INT, default=BASE_ADDRESS,
Expand All @@ -58,15 +59,18 @@ def main():
help="Assembler macro file to '.include' in disassembly. If not specified, default macros are embedded.")
@click.option('--guess/--no-guess', default=True,
help='Turn on/off function guessing & discovery. Default is to perform guessing.')
@click.option('--min-calls', 'min_calls', type=click.IntRange(1), default=2,
@click.option('--min-calls', type=click.IntRange(1), default=2,
help="Minimum number of calls to a function required in order to 'guess' it. Must be at least 1, "
"defaults to 2.")
@click.option('--min-length', 'min_length', type=click.IntRange(1), default=3,
@click.option('--min-length', type=click.IntRange(1), default=3,
help="Minimum valid instruction length required in order to 'guess' a function. Must be at least 1, "
"defaults to 3.")
def disasm(rom, output, config, config_out, debug, start, stop, macros, guess, min_calls, min_length, **kwargs):
@click.option('--default-mode', type=click.Choice(('THUMB', 'BYTE', 'WORD'), case_sensitive=False), default='BYTE',
help="Default disassembly mode when the nature of an address is unknown. Defaults to 'BYTE'.")
def disasm(rom, output, config, config_out, debug, start, stop, macros, guess, min_calls, min_length, default_mode,
**kw):
""" Analyze and disassemble a GBA ROM. """
for k, v in kwargs.items():
for k, v in kw.items():
print(k, v)
set_debug(debug)
functions = read_config(config) if config else None
Expand All @@ -76,7 +80,7 @@ def disasm(rom, output, config, config_out, debug, start, stop, macros, guess, m
if output in (None, '-'):
output = None
eprint(f'No output file specified. Printing to stdout.')
state.dump(rom, output, config_out)
state.dump(rom, output, config_out, default_mode=MODE_MAP[default_mode.lower()])


@main.command(name='info')
Expand Down
37 changes: 18 additions & 19 deletions luvdis/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ def __init__(self, functions=None, min_calls=2, min_length=3, start=BASE_ADDRESS
else:
name = value
self.unexpanded[addr] = name
self.functions = {} # addr -> (name, end)
self.functions = {} # addr -> (name, end_address)
self.not_funcs = set()
self.min_calls, self.min_length, self.start, self.stop = min_calls, min_length, start, stop
self.macros = macros
Expand Down Expand Up @@ -468,7 +468,7 @@ def analyze_func(self, rom, addr, state=None):
exit_behaved = False
end = rom.size | 0x08000000 if ins is None else ins.address
break
elif ins.id == Opcode.ldr: # Mark target as WORD
elif ins.id == Opcode.ldr: # Mark load target as WORD
end = addr = ins.address+2
target = ins.target
if target < self.stop: # TODO: Is this necessary?
Expand Down Expand Up @@ -559,25 +559,27 @@ def label_for(self, addr):
return name
return f'_{addr:08X}'

def dump(self, rom, path=None, config_output=None):
if config_output: # Optionally, write updated function list
def dump(self, rom, path=None, config_output=None, default_mode=BYTE):
if config_output: # Optionally write updated function list
addr_map = {addr: (name, self.module_addrs.get(addr, None)) for addr, (name, _) in self.functions.items()}
write_config(addr_map, config_output)
# Setup initial module & file
folder, module = os.path.split(path) if path else (None, None)
if DEBUG and path: # Output function range info if debugging
if DEBUG and path:
import pickle
# Output function range info if debugging
with open(os.path.join(folder, 'funcs.pickle'), 'wb') as f:
pickle.dump(self.debug_ranges, f)
# Also output a linker script
fl = open('luvdis.ld', 'w')
f = None if path else sys.stdout
# Setup start and end addresses
addr = self.start
if type(self.stop) is float: # End is the final address in the ROM
if type(self.stop) is float: # End at the final address in the ROM
end = rom.size | BASE_ADDRESS
else:
end = min(rom.size, self.stop & 0xffffff) | BASE_ADDRESS
if addr not in self.module_addrs and module: # Set module of initial address to the path output
if addr not in self.module_addrs and module: # Mark the very first address as belonging to the initial module
self.module_addrs[addr] = module
mode, flags, bytecount = BYTE, 0, 0
# Initialize progress bar & messages
Expand All @@ -594,8 +596,8 @@ def warn(*args):
old_mode = mode

# Switch output modes
if addr_flags == 0 and flags != 0: # Switch to byte mode when address flags are zero
mode = BYTE
if addr_flags == 0 and flags != 0: # Switch to default mode when address flags are zero
mode = default_mode # By default, BYTE mode
elif addr_flags & FLAG_EXEC and not (flags & FLAG_EXEC): # Output code
mode = THUMB
elif addr_flags & FLAG_WORD and not (flags & FLAG_WORD) and not (addr_flags & FLAG_EXEC): # Output words
Expand Down Expand Up @@ -638,7 +640,7 @@ def warn(*args):
# Switch module output
if f is not sys.stdout and addr in self.module_addrs: # Address has module info
new_module = self.module_addrs[addr]
if new_module != module or f is None: # New/first module seen
if new_module != module or f is None: # Entering new/first module
module = new_module
path = os.path.join(folder, module)
eprint(f"{addr:08X}: module '{path}'")
Expand All @@ -651,12 +653,12 @@ def warn(*args):
f = open(path, 'w', buffering=1)
f.write(ASM_PRELUDE)
f.write(f'.include "{self.macros}"\n' if self.macros else MACROS)
bytecount = 0 # Reset byte bytecount
if DEBUG: # Output link script if debugging
bytecount = 0 # Reset bytecount
if DEBUG: # Output linker script if debugging
fl.write(f'{path[:-2]}.o(.text);\n')

# Emit code or data
if mode == THUMB:
if mode == THUMB: # THUMB code
offset = ins.size
if ins.id == Opcode.bl or ins.id in BRANCHES:
target = ins.target
Expand All @@ -672,7 +674,7 @@ def warn(*args):
emit = f'.2byte 0x{i:04X} @ {ins.mnemonic} _{target:08X}'
elif ins.id == Opcode.bx:
value = rom.read(addr, 2)
# Assembler cannot emit bx with nonzero rd, see THUMB.5 TODO: Should these be illegal?
# Assembler will not emit bx with nonzero rd, see THUMB.5 TODO: Should these be treated as illegal?
emit = f'.inst 0x{value:04X}' if value & 3 != 0 else str(ins)
elif ins.id == Opcode.ldr and isinstance(ins, Thumb6): # Convert PC-relative loads into labels
target = ins.target
Expand Down Expand Up @@ -700,10 +702,7 @@ def warn(*args):
value = self.label_for(value-1)
else:
value = f'0x{value:08X}'
if label:
emit = f'{label} .4byte {value}'
else:
emit = f'\t.4byte {value}'
emit = f'{label} .4byte {value}' if label else f'\t.4byte {value}'
if DEBUG:
comment += f' @ {addr_flags}'
f.write(f'{emit}{comment}\n')
Expand All @@ -729,7 +728,7 @@ def warn(*args):
flags = addr_flags
addr += offset
bar.update(offset)
# Close current module
# Done with output; close file handles and cleanup
if f is not sys.stdout and f:
if bytecount:
f.write('\n')
Expand Down

0 comments on commit a6deada

Please sign in to comment.