From cf67c597d1b39ff152d133525e41a8791a96c143 Mon Sep 17 00:00:00 2001 From: angie Date: Thu, 21 Sep 2023 13:27:24 -0300 Subject: [PATCH 01/14] Automatic changelog --- CHANGELOG.md | 1057 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1057 insertions(+) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..32d64b0e --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,1057 @@ +## [Unreleased] + +## [1.17.3] - 2023-09-18 + +### Uncategorized + +- Hardcode a check to avoid disassembling `.vutext` +- Fix `.double` disassembly for little endian + +## [1.17.2] - 2023-09-18 + +### Uncategorized + +- Dummy update, sorry + +## [1.17.1] - 2023-09-15 + +### Uncategorized + +- Tweak endlabel to be emitted in the same place as the size directive does +- Emit global labels for symbols with no type in the middle of functions +- Add checks for rs and rt registers instead of assuming they are used to avoid crashes +- Option to set the filtering addresses for the symbol finder +- Consider the `j` instruction as a function end if rabbitizer's `--j-branch` option is turned off +- Always migrate mips1 doubles when migrating rodata + +## [1.17.0] - 2023-08-27 + +### Uncategorized + +- Allow using `MIPS_NONE` reloc type as a way to avoid symbolizing a reference and use the raw value instead. +- Allow using a different label for symbols in the middle of functions. + - Useful for setting alternative entry points for handwritten functions. + - It can be used by setting the `ASM_TEXT_ALT_LABEL`. +- Fix `elfObjDisasm` crashing if a reloc section references an unhandled section like `.pdr`. + +## [1.16.5] - 2023-08-22 + +### Uncategorized + +- Do not use iQue symbols by default if user asked for libultra symbols + +## [1.16.4] - 2023-08-19 + +### Uncategorized + +- Try to gather the `$gp` register value from non-PIP elfs too +- Various bugfixes related to `j` instructions being used as a way to call another function. +- Fix size directive not being properly emitted for functions with user-declared size that has dangling nops. +- Detect ABI and cpu flags from elf header. +- Do not use mips1 double detection heuristic on non o32 abis +- Avoid warning about LOCAL NOTYPE symbols in elf files + +## [1.16.3] - 2023-08-15 + +### Uncategorized + +- Fix hex comment crashing because of doubles when parsing little endian binaries + +## [1.16.2] - 2023-08-14 + +### Uncategorized + +- Fix size directive not using the right label when symbols are smaller than a word +- Fix size directive not being properly emitted for symbols with a size smaller than a word +- Generate pad symbols to honor user declared sizes + - Symbols will be automatically splitted if the user-declared size is smaller than the symbol size (usually due to size not being a multiple of 4, file splits, other symbols not being referenced, etc) +- Add the character `0x1A` to set of special cases for string decoding. +- Fix bug which produced reporting incorrect file splits on strings which their last word was a zero. +- Workaround for big addends when building with modern GAS + + +## [1.16.0] - 2023-07-23 + +### Uncategorized + +- Add a detector for the redundant function end produced by IDO with some specific flag combinations. + - It is turned off by default, but it can be turned on globally with `--detect-redundant-function-end`, or globally and per file via the API. +- Fix BSS sections not emitting a first symbol if it isn't referenced anywhere. + + +## [1.15.4] - 2023-07-14 + +### Uncategorized + +- Avoid taking into account invalid instructions when trying to find function boundaries. +- Properly honor size of user-declared symbols for elf static symbols. + +## [1.15.3] - 2023-07-10 + +### Uncategorized + +- Don't use append the section name if it is known when disassembling elfs + - This special cases the sections `.text`, `.data`, `.rodata` and `.bss`. + - Avoids the redundant `filename_.text/` naming scheme + +## [1.15.2] - 2023-07-04 + +### Uncategorized + +- Fix hardcoded shift value in alignment directive + +## [1.15.1] - 2023-07-04 + +### Uncategorized + +- Emit string alignment directives even when the section isn't aligned to a multiple of 8. + - Some projects can have rodata sections aligned to just a multiple of 4, and not emitting the directive in those cases can break their builds + +## [1.15.0] - 2023-07-03 + +### Uncategorized + +- Change the string guesser to work with multiple levels instead of plainly enabled/disabled and the aggressive toggle. + - The new option is used via the API `GlobalConfig.RODATA_STRING_GUESSER_LEVEL` or via the CLI `--rodata-string-guesser level`. + - The old `GlobalConfig.STRING_GUESSER` and `GlobalConfig.AGGRESSIVE_STRING_GUESSER` options are now deprecated, same as the CLI options `--string-guesser` and `--aggressive-string-guesser`. + - The old `GlobalConfig.STRING_GUESSER = True` is equivalent to the new `GlobalConfig.RODATA_STRING_GUESSER_LEVEL = 1` + - The old `GlobalConfig.AGGRESSIVE_STRING_GUESSER = True` is equivalent to the new `GlobalConfig.RODATA_STRING_GUESSER_LEVEL = 4` + - Meaning of the new levels: + - level 0: Completely disable the guessing feature. + - level 1: The most conservative guessing level. Imposes the following restrictions: + - Do not try to guess if the user provided a type for the symbol. + - Do no try to guess if type information for the symbol can be inferred by other means. + - A string symbol must be referenced only once. + - Strings must not be empty. + - level 2: A string no longer needs to be referenced only once to be considered a possible string. This can happen because of a deduplication optimization. + - level 3: Empty strings are allowed. + - level 4: Symbols with autodetected type information but no user type information can still be guessed as strings. + - The level defaults to 1. +- Implement string guesser for the data section. + - Controlled by the API `GlobalConfig.DATA_STRING_GUESSER_LEVEL` or via the CLI `--data-string-guesser level`. + - Decodes strings with the `ASCII` encoding by default. + - The meaning of each level are the same as the rodata string guesser. + - The level defaults to 2. +- Add experimental Pascal string guesser. + - Works for both rodata and data sections. + - Follows the same level logic as the C string guesser. + - It is disabled by default. +- Start emitting `.size` directives by default. +- Emit `jlabel` instead of `dlabel` for jumptable labels by default +- Emit `dlabel` instead of `dlabel` for data, rodata and bss symbols by default + +## [1.14.3] - 2023-06-19 + +### Uncategorized + +- Failcheck for non aligned doubles +- (Hopefully) Fix same-vram overlays using symbols from other overlays +- `elfObjDisasm`: Can now disassemble sections with arbitrary names +- `disasmdis`: Disable pseudo instructions by default + +## [1.14.2] - 2023-06-10 + +### Uncategorized + +- Actually add `py.typed` to `pyproject.toml` +- Use `bytearray` as little as possible +- `writeBytearrayToFile` is now deprecated, use `writeBytesToFile` instead + +## [1.14.1] - 2023-06-10 + +### Uncategorized + +- Emit a previous alignment directive for strings. + - Ensures strings are always word aligned +- Purge `.balign` directive in favor of `.align` directive +- Add `py.typed` file. Whoops + +## [1.14.0] - 2023-05-10 + +### Uncategorized + +- Try to support better N32 PIC programs. + - The current issue was spimdisasm was not able to properly generate symbol references for `$gp` accesses. + - GOT table now gets its own address from the reginfo instead of the dynamic table. + - Accesses pointing outside the GOT table are tried to be redirected to `sdata`, `srdata` and `sbss` sections. +- Implement `--dyn-syms` on readelf-like mode. +- Minor improvements to readelf output format + +## [1.13.3] - 2023-05-05 + +### Uncategorized + +- Fix not writing to subfolders properly when a csv filesplit entry has a slash on its name. + +## [1.13.2] - 2023-05-01 + +### Uncategorized + +- Add support for `.dummy` section in csv file split format +- Add readelf's `--section-headers` flag to elfObjDisasm + +## [1.13.1] - 2023-04-30 + +### Uncategorized + +- Fix a possible `None` case in ` getInstrCategoryFromStr` +- Add note about R3000GTE and R5900 instruction set support in the README + + +## [1.13.0] - 2023-04-30 + +### Uncategorized + +- Add support for R3000GTE + + +## [1.12.5] - 2023-04-28 + +### Uncategorized + +- Fix jumptable end detection algorithm on vram ranges different than `0x80XXXXXX` +- Add `--function-info` flag to `elfObjDisasm` +- Option for emitting size directives in the generated assembly +- Add `--asm-emit-size-directive` flag to emit size directives on generated assembly + +## [1.12.4] - 2023-04-19 + +### Uncategorized + +- Fix user-declared relocs having an incorrect addend + +## [1.12.3] - 2023-04-18 + +### Uncategorized + +- Fix conflicting `-V` flag + +## [1.12.2] - 2023-04-18 + +### Uncategorized + +- Fix data symbols not using local reloc overrides +- Fix `.word`s not being updated after clearing pointers +- Some pointer clearing fixes +- Add `--version` flag to every cli tool +- Fix data not being properly disassembled on `singleFileDisasm` +- Enforce UTF-8 encoding on generated asm files (PR #111) + - Thanks to @1superchip +- Emit a comment saying if a reloc is a global one when the emit relocs flag is passed +- Fix incorrect addends on non static symbols from elf files. + - Fixes issue #110 +- Fix a regression where some `%lo` symbols weren't being properly paired because of the `%got` being reused on PIC code +- Fix sizes for inferred types +- Properly detect `-mips1` `double` literals + - Fixes issue #57 + +## [1.12.1] - 2023-03-28 + +### Uncategorized + +- Fix addends bigger than `0x7FFF` and smaller than `0x10000` + +## [1.12.0] - 2023-03-21 + +### Uncategorized + +- Now exposes known types to spimdisasm via `common.gKnownTypes` +- Prevents referencing labels and jumptable labels with addends +- Prevents referencing labels and jumptable labels in non jumptable symbols +- `static` (local) symbol handling of non relocated elf object files were improved +- Fake/non used symbols are not longer being emitted when disassembling elf .o files + +## [1.11.6] - 2023-03-10 + +### Uncategorized + +- Add flag to specify instruction category in `elfObjDisasm` and `singleFileDisasm` +- Remove `ContextSymbol.type` and add `ContextSymbol.userDeclaredType` and `ContextSymbol.autodetectedType` + - A property named `.type` is available to provide backwards compatibility + +## [1.11.5] - 2023-03-07 + +### Uncategorized + +- Sort detected file boundaries and remove duplicates + +## [1.11.4] - 2023-02-20 + +### Uncategorized + +- Fix `--data-start` not processing hex correctly +- Add function vrom to `--function-info` and tweak its input a bit + +## [1.11.3] - 2023-02-15 + +### Uncategorized + +- Allow specifying a custom suffix to every autogenerated symbol with `--custom-suffix` +- Add "referenced functions" information to the `--function-info` flag + +## [1.11.2] - 2023-02-13 + +### Uncategorized + +- Add flag to emit inline relocs +- Do not report extra padding in functions if user declared size matches the size of the function +- Rename `ContextSymbol.size` to `ContextSymbol.userDeclaredSize` +- Add `--function-info` flag +- `FuncRodataEntry`: Fix migrate parameter if function has no rodata to be migrated + +## [1.11.1] - 2023-01-30 + +### Uncategorized + +- Allow `None` in `FunctionRodataEntry` methods +- `FuncRodataEntry`: Do not write `.section .text` if the function is `None` + +## [1.11.0] - 2023-01-30 + +### Uncategorized + +- CLI changes: + - Install CLI tools as actual terminal programs + - Allow invoking the CLI tools from spimdisasm as subparsers + - The old way of invoking the CLI tools (`python3 -m spimdisasm.clitool`) is now deprecated, but still works +- `disasmdis`: Fix crash if the input isn't a multiple of a word +- Report with a comment which instruction made spimdisasm detected as a handwritten instruction +- New in the API: `FunctionRodataEntry` + - Cleaner interface for rodata migration and similar functions + - Provides method for intermixing functions and non-migrated rodata symbols in a way the correct order is still preserved + - Old functions from `FileHandlers` which provided rodata migration functionalities are now deprecated + +## [1.10.6] - 2023-01-28 + +### Uncategorized + +- Fix some `.text` boundaries not being properly detected. +- Add hardware registers as constants so they are used by `lui`/`ori` pairs +- Check for bss symbol size to match user declared size +- Warn if the globalsegment vrom start and end is the same +- Identify 32bitsmode elf flag +- Avoid reporting leading zeroes as padding in rodata symbols if the size of the symbol matches the user declared one + +## [1.10.5] - 2023-01-28 + +### Uncategorized + +- Emit a comment on invalid instructions disassembled as words +- Remove redundant `.noreorder` +- Fix `disasmdis` ignoring endian parameter + +## [1.10.4] - 2023-01-20 + +### Uncategorized + +- Avoid trashing function analysis for `j` jumps outside of the function +- Add `EGCS` compiler +- `nop`s at the beginning of the files are now skipped. +- Fix `disasmdis` not properly accepting spaces +- Add iQue-specific libultra syms and hardware regs +- Add `--data-start` and `--data-end` flags to `singleFileDisasm` + +## [1.10.3] - 2023-01-08 + +### Uncategorized + +- Fix OoB for automatic type-based naming + +## [1.10.2] - 2023-01-08 + +### Uncategorized + +- Fix a small typo on `osAppNMIBuffer` + +## [1.10.1] - 2023-01-05 + +### Uncategorized + +- Adds a workaround for addends which does not fit on a 16 bits value + +## [1.10.0] - 2023-01-05 + +### Uncategorized + +- Rework system to allow/disallow addend references on data +- Add support for splat's symbol_addrs format for standalone invocations + +## [1.9.2] - 2023-01-02 + +### Uncategorized + +- Fix emitting `.align` directives on unnaligned jumptables +- Fix rodata split detection not properly considering special jumptable alignment +- Add `nameEnd` member to `ContextSymbol` to allow emitting a closing user-declared label + +## [1.9.1] - 2022-12-29 + +### Uncategorized + +- Emit a `.align 3` directive for every jumptable on non-IDO compilers + +## [1.9.0] - 2022-12-28 + +### Uncategorized + +- Reloc system re-worked. Users can now provide their own relocs to improve the automatic disassembly +- loPatch system has been removed and superseded by the global reloc system +- `GlobalConfig` variables can now be set via environment variables. + - Parameters passed by cli take priority over environment variables. + - Options configured via code (when using this as a library) take priority over environment variables. + + +## [1.8.2] - 2022-12-19 + +### Uncategorized + +- Check for banned symbols on addends references + +## [1.8.1] - 2022-12-19 + +### Uncategorized + +- New interface for allowing banning ranges of symbols, instead of having to add them one by one + +## [1.8.0] - 2022-12-16 + +### Uncategorized + +- Require [`rabbitizer` 1.4.0](https://github.com/Decompollaborate/rabbitizer/releases/tag/1.4.0) +- Allow to type-hint strings with `asciz` +- Allow disassembling `.data` symbols as strings + - This won't be automatically guessed as with `.rodata`, this only will happen with type-hints +- `disasmdis` now accepts spaces and input from `stdin` + +Meta: +- `setup.cfg` was removed and all its info was moved to `pyproject.toml` + +## [1.7.12] - 2022-12-05 + +### Uncategorized + +- Allow symbol references on rodata (for non jump-tables) +- Output version on disassembled files +- Add option to show which symbols reference the disassembled symbol +- Add `--file-splits` option to `elfObjDisasm` + +## [1.7.11] - 2022-11-29 + +### Uncategorized + +Check size of floats and doubles before migrating them + +## [1.7.10] - 2022-11-26 + +### Uncategorized + +- Allow changing the label used for jumptables labels with `GlobalConfig.ASM_JTBL_LABEL` +- Allow forcing (and forcing not to) migrate a symbol on rodata migration + +- Elf fixes: + - Reference `NOTYPE` symbols + - Various GOT fixes + - Show isAutogeneratedPad in the context file #79 + - [Show the first %lo reference for each symbol in the context #80 + + +## [1.7.9] - 2022-11-09 + +### Uncategorized + +- Fix an OoB issue when trying to post-process the GOT analyzis on non-PIC mode +- Emit a comment for automatically generated bss pads. + - Those pads are created mainly to properly adjust the `.space` of a bss symbol if said symbol had an user-declared size + +## [1.7.8] - 2022-11-04 + +### Uncategorized + +- Fix function pointers being incorrectly tagged as `%call16` instead of `%got` +- Avoid crashing when trying to migrate functions when there's no rodata section +- Improve logic to disassemble `.byte`s and `.short`s +- Fix wrong migrated rodata on PIC programs +- Avoid using addends on function references +- Improve logic to find the jumptable ends (again) + +## [1.7.7] - 2022-11-02 + +### Uncategorized + +- Improve detection of the end of jumptables +- Refactor REL handling. It has been simplified + - This should improve disassembling `.o` files +- Symbols from elfs are checked to be in the correct vram range before adding them to the context. +- Allow disassembling data symbols as floats and doubles + + +## [1.7.6] - 2022-10-31 + +### Uncategorized + +- Refactor GOT handling + - Should fix IDO 5.3 disassembly +- Use glabels for jumptable labels when the functions are not being migrated +- Support `MIPS_GOT_HI16`, `MIPS_GOT_LO16`, `MIPS_CALL_HI16` and `MIPS_CALL_LO16` reloc types from `.rel` elf sections +- `disasmdis` now ignores non hex characters +- Negative addresses are considered as GOT accesses in PIC mode +- Add special handling for the GOT lazy resolver + +## [1.7.5] - 2022-10-30 + +### Uncategorized + +- Use `.gpword` on PIC jumptables +- Fix showing the got table in a few niche cases +- Use `glabel` on migrated rodata again until we figure out why it messes matching +- Do not use `glabel` on jumptable labels + +## [1.7.4] - 2022-10-28 + +1.7.4: GOT fixes + +### Uncategorized + +GOT fixes: +- Fix using GOT local addresses as functions +- Fix `.data` symbols incorrectly referencing GOT local addresses +- Fix `elfObjDisasm` using N64 specific symbols by default +- Migrated rodata will no longer use glabels + - This change was made to accommodate GOT global/local references + +New features: +- The autodetected size of functions can now be queried from a `ContextSymbol` with `getSize()` +- The arch level can now be specified when disassembling + - It is detected automatically when parsing an elf file + - `.set gp=64` is disabled on MIPS1 and MIPS2 arch levels +- `elfObjDisasm` will now produce a list of functions and non migrated rodata when requesting to migrate functions. + - This can be useful to bootstrap newly generated C files so the rodata order is easier to preserve + +Misc changes: +- `elfObjDisasm` now display progress to stdout when disassembling + - This behavior can be disabled with `-q` +- Rodata migration has been slightly tweaked: migration will be performed if only one function references the symbol, contrary to the old "only one reference in the whole codebase must reference the symbol" + - Seems to be a common pattern on PIC programs compiled with IDO + + +## [1.7.3] - 2022-10-24 + +1.7.3: `--aggressive-string-guesser` + +### Uncategorized + +- Do not infer the type of a variable if the access types are heterogeneous, which may imply a struct +- New `--aggressive-string-guesser` flag + - Tries to decode string even if the string is empty, the symbol may have type information or it is referenced more than once +- Add `PSYQ` compiler option + - Currently it enables the same options as `SN64` + +## [1.7.2] - 2022-10-24 + +### Uncategorized + +- Allow passing context flags to `elfObjDisasm` +- Fix type inference if the user declared a type for the variable +- Fix some strings not being properly detected on elf files (again) + +## [1.7.1] - 2022-10-23 + +1.7.1: elf fixing: got and dynamic programs + +### Uncategorized + +- New flags in `elfObjDisasm`: + - Flags which try to mimic `readelf`: `--file-header`, `--syms`, `--relocs` and `-display-got` + - `--split-functions`, has the same behavior as `singleFileDisasm` +- `elfObjDisasm` changes: + - Fix undefined symbols handling in + - Fix addends of got global symbols + - Use rel types types from the elf file if they are available instead of trying to infer them + - Warn when trying to disassemble an `abi2` (N32) elf + - Warn for negative GOT accesses instead of crashing + - Warn if unhandled flags are found in an elf file + - Fix gp value on N32 abi + - `$gp` access are no longer symbolized if the address is not found in the got table +- Fix rodata pointer detection in data on elf files (fixes #63) + +## [1.7.0] - 2022-10-18 + +### Uncategorized + +- Add `leoBootID` to libultra syms +- Fix `--help` screen. whoops +- `--no-emit-cpload` flag to disable emitting the `.cpload` directive in PIC programs + - `_gp_disp` is emitted instead of the raw immediate values + - The `_gp_disp` value is emitted as a comment +- Updates `rabbitizer` requirement to 1.3.1 +- Fix `.byte` and `.short` in little endian + - Thanks @Xeeynamo (#62) +- Fix boundaries detection reporting in `elfObjDisasm` + - The output of this report is csv-friendly. Thanks @EllipticEllipsis (#65) +- Tweak string disassembly + - If a symbol is in the middle of a string then the string is aborted and disassembled as `.word`s + - Check the next bytes (until a word boundary) after the nul terminator of a string are zero. Thanks @Xeeynamo (#64) + - Strings with '\a' are no longer treated as real strings +- Fix labels not being emitted in rodata if they were not being word-aligned. + - Fixes #59 +- Remove `GlobalConfig.ADD_NEW_SYMBOLS` +- Avoid pairing `%gp_got` symbols on non PIC code +- General cleanups + +## [1.6.5] - 2022-10-07 + +1.6.5: elf fixes + +### Uncategorized + +- Avoid crashing if a `%got` access' address is not in the global table. Prints a warning instead. +- Fix symbol usage on relocatable files, kinda +- Ignore `.rel` sections if the file is not of REL type +- Avoid emitting `%got` rels for non PIC disassemblies + +## [1.6.4] - 2022-10-06 + + 1.6.4: Fix symtab parsing from relocated elfs + +### Uncategorized + + + +## [1.6.3] - 2022-10-04 + +1.6.3: Speedup for overlay disassembly + +### Uncategorized + +- Moves the `globalSegment` check above all the other checks in `getSymbol`, providing a faster lookup since most of the time overlays usually reference a symbol from the `globalSegment` over a symbol from another overlay segment from a different category + +## [1.6.2] - 2022-10-03 + +1.6.2: hotfix + +### Uncategorized + +Should address issue #55 + +## [1.6.1] - 2022-10-03 + +1.6.1: SN64 strikes again + +### Uncategorized + +- Add align directive for doubles for SN64 +- Track which functions references each symbol + - Used to improve rodata migration on non-IDO compilers +- Use `g` format specifier when formatting disassembled floats and doubles + +## [1.6.0] - 2022-10-01 + +1.6.0: Refactor front-end scripts + +### Uncategorized + +- Deleted `singleFileDisasm.py`, `simpleFileDisasm.py`, `disasmdis.py`, `rspDisasm.py`, `elfObjDisasm.py` +- The frontend scripts were converted to submodules, now those can be executed with the `python3 -m spimdisasm.submodulename` syntax, allowing to execute them even in the pip installations of spimdisasm. +- Refactored raw path handling to use `pathlib`. +- Add installation instructions to readme. +- SN64 tweaks: + - Use `.align` directive for string disassembly on SN64 + - Migrate const variables to functions on SN64 + - Remove `.rdata` check on migrated rodata for SN64 + +## [1.5.7] - 2022-09-30 + +1.5.7: Fix 0 size bss variables + +### Uncategorized + +- Fixes an issue where 0 size bss variables where being outputted if the last bss variable of a file had a size which filled its size up until the file boundary + + + +## [1.5.6] - 2022-09-27 + +1.5.6: More SN64 tweaks and data/rodata endianess + +### Uncategorized + +- Fixes data analyzis. It was ignoring banned symbols +- Use `.rdata` on rodata migration for SN64 +- Fix `ASM_DATA_SYM_AS_LABEL` on bss generation +- Fix rodata symbols searching during migration +- Add option to disassemble data/rodata with different endianess than the global one +- Allow changing the string encoding per rodata segment +- Return the created segment by `addOverlaySegment` + +## [1.5.5] - 2022-09-24 + +1.5.5: Fix `ASM_DATA_SYM_AS_LABEL` + +### Uncategorized + +- Fix `ASM_DATA_SYM_AS_LABEL`: It was outputing the data label + +## [1.5.4] - 2022-09-24 + +1.5.4: More SN64 tweaks + +### Uncategorized + +- `ASM_DATA_SYM_AS_LABEL`: Allow adding a data symbol as a simple label. +- `late_rodata` logic has been tweaked to only be applied when compiler is set to IDO +- Allow range checks on symbols referenced by data symbols +- Add option to allow all addends referenced by data symbols +- Fix use of user-declared sizes on functions and bss symbols + +## [1.5.3] - 2022-09-23 + +1.5.3: More default banned symbols + +### Uncategorized + +- Add `0x7FFFFFFF` to the list of default banned symbols + +## [1.5.2] - 2022-09-21 + +1.5.2: elf endianess fixes and rodata migration fixes + +### Uncategorized + +- Read endianess from elf file +- Use `.section` directive on migrated rodata + +## [1.5.1] - 2022-09-19 + +1.5.1: symbol detection fixes + +### Uncategorized + +- Check for data pointers in data itself and do a recheck in case the pointer is in the same section but behind the current symbol +- Properly update original symbol type when creating symbols for sub-4 sizes + +## [1.5.0] - 2022-09-17 + +1.5.0: dynamic elfs + +### Uncategorized + +- Add compatibility for dynamic elf files + - `.dynsym`, `.dynstr` and `.dynamic` section parsing + - `.got` table parsing + - Use `%got` and `%call16` syntax for `$gp` relative symbols. + - `.cpload REG` detection +- Minor changes on string splitting functions to improve its use as an api + +## [1.4.2] - 2022-09-09 + +1.4.2: PS2 addresses hotfix + +### Uncategorized + +Fix 0 and negative addresses being treated as real symbols + +## [1.4.1] - 2022-09-03 + +1.4.1: Overlay function start bugfix + +### Uncategorized + +Fixes a bug in which a function start was wrongly detected on an overlay because the symbol existed for said address on the global segment + +## [1.4.0] - 2022-08-27 + +1.4.0: RSP and R5900 support + +### Uncategorized + +- Adds proper support for N64's RSP +- Allows passing any `rabbitizer.InstrCategory` to SectionText to allow using any instruction set supported by rabbitizer + +## [1.3.0] - 2022-07-08 + +1.3.0: rabbitizer 1.0.0 + +### Uncategorized + +- Updated to use rabbitizer 1.0.0 +- Rodata boundaries detection + +## [1.2.4] - 2022-07-05 + +### Uncategorized + +- Explicitly require a `rabbitizer` version minor than the next major version to avoid possible compatibility issues. +- Set up automatic discovery on setup.cfg + + +## [1.2.3] - 2022-06-11 + +1.2.3: Fix Python 3.7 compatibility (again) + +### Uncategorized + +This time really fixes 3.7 compat, hopefully + +## [1.2.2] - 2022-06-11 + +1.2.2: %hi/%lo symbol filtering fix + +### Uncategorized + +Prevents filtering out LUI/ADDIU combos from being real symbols + +## [1.2.1] - 2022-06-10 + +1.2.1: Packaging fix + +### Uncategorized + +Hopefully fixes a packaging problem on PyPi + +## [1.2.0] - 2022-06-10 + +1.2.0: Faster disassembly from rabbitizer + +### Uncategorized + +- Use [rabbitizer](https://pypi.org/project/rabbitizer/) as the instruction decoder to speed up instruction analysis and disassembly +- Other minor changes to try to be compatible with Python 3.7 + +## [1.1.7] - 2022-06-08 + +### Uncategorized + +- Allow changing the file offset width comment +- `SortedDict`: New class which abstracts away the logic for keeping a sorted dictionary + +## [1.1.6] - 2022-06-03 + +1.1.6: %hi reuse fix + +### Uncategorized + + + +## [1.1.5] - 2022-06-03 + +1.1.5: More overlay fixes + +### Uncategorized + +- Move most of the instruction analyzing code to a new independent class +- Try to track instructions which are likely to not be a %lo and avoid pairing it. +- Extra checks to avoid using labels from other overlay segments +- Only stop a look ahead search until we find either an unconditional branch or a `jr` instruction + +## [1.1.4] - 2022-06-02 + +1.1.4: Overlay handling fixes + +### Uncategorized + +- Overall adds more vrom checks for handling shared-vram overlays. +- The autogenerated symbol size now adjusts itself considering the symbol type and the address alignment + +## [1.1.3] - 2022-06-01 + + 1.1.3: Minor patch + +### Uncategorized + +Adds a way to register a name getter callback for symbols + +## [1.1.2] - 2022-06-01 + +1.1.2: More symbol finding cleanups + +### Uncategorized + +- General cleanups and fixes related to symbol finding. +- New `COMPILER` option in `GlobalConfig`, used to enable some compiler specific tweaks + +## [1.1.1] - 2022-05-30 + +1.1.1: Symbol finding fixes + +### Uncategorized + +Various fixes: +- Multiples workarounds for pairing multiples %hi to the same %lo +- Fix `J` target calculation for the look ahead symbol finder +- Special cases for `LUI`s on delay slots +- Track moving registers by using `MOVE`, `OR` and `ADDU` +- Invalidate some registers after function calls +- Check negative branches +- Fix jump table detector for SN64 + +## [1.1.0] - 2022-05-29 + + 1.1.0: Overlay support (hopefully) + +### Uncategorized + +- Add support for overlays which share VRAMs and overlays which may communicate to overlays from other categories. + - Each new category and overlay segment must be registered in `Context` +- `ElementBase#vram` is now just an `int` as opposed to old `int|None`. +- The `vrom` of elements is tracked and stored. It is mainly used to autogenerate overlay's names. +- The differentiation in symbol categories (labels, functions, symbols, etc) is removed. Everything now is stored in only one big dictionary to avoid duplication and lower memory usage. +- Remove the concept of "fake functions" +- `addSymbol`, `getSymbol` and similar methods were moved to `ElementBase` because of the need for the respective element overlay info. +- Symbol name generation is delayed as much as possible and autogenerated on the fly. +- Some minor symbol finder fixes +- Bugfix symbols disappearing for no reason +- General cleanups + +## [1.0.6] - 2022-05-26 + +1.0.6: Even more SN64 fixes + +### Uncategorized + +- Add more checks for J as unconditional branch +- Allow disabling %hi/%lo syntax for constants +- Add extra global label for other symbols (usually labels) in functions when `GlobalConfig.ASM_TEXT_FUNC_AS_LABEL` is enabled. + +## [1.0.5] - 2022-05-26 + +### Uncategorized + +- Allow setting a .ent and the function name as a label +- Float register `$31` fix for `NAMED_REGISTERS=False` +- Fix CI builds (2) +- Only apply the SN64 DIV fix for non handwritten functions +- Finer control over pseudo instructions + + +## [1.0.4] - 2022-05-26 + +### Uncategorized + +Fix the CI wheel + +## [1.0.3] - 2022-05-26 + +### Uncategorized + +Add CI which builds a wheel and publishes it to PyPI + +## [1.0.2] - 2022-05-26 + +### Uncategorized + +- Fix missing `f` on float registers when `GlobalConfig.NAMED_REGISTERS` was set to `False` +- Allow changing the line ends to anything via `GlobalConfig.LINE_ENDS` + +## [1.0.1] - 2022-05-26 + +### Uncategorized + +Changes: +- Improve RAM usage by removing an almost unused dictionary from the Instruction classes + +## [1.0.0] - 2022-05-26 + +### Uncategorized + + + +[unreleased]: https://github.com/Decompollaborate/spimdisasm/compare/master...develop +[1.17.3]: https://github.com/Decompollaborate/spimdisasm/compare/1.17.2...1.17.3 +[1.17.2]: https://github.com/Decompollaborate/spimdisasm/compare/1.17.1...1.17.2 +[1.17.1]: https://github.com/Decompollaborate/spimdisasm/compare/1.17.0...1.17.1 +[1.17.0]: https://github.com/Decompollaborate/spimdisasm/compare/1.16.5...1.17.0 +[1.16.5]: https://github.com/Decompollaborate/spimdisasm/compare/1.16.4...1.16.5 +[1.16.4]: https://github.com/Decompollaborate/spimdisasm/compare/1.16.3...1.16.4 +[1.16.3]: https://github.com/Decompollaborate/spimdisasm/compare/1.16.2...1.16.3 +[1.16.2]: https://github.com/Decompollaborate/spimdisasm/compare/1.16.0...1.16.2 +[1.16.0]: https://github.com/Decompollaborate/spimdisasm/compare/1.15.4...1.16.0 +[1.15.4]: https://github.com/Decompollaborate/spimdisasm/compare/1.15.3...1.15.4 +[1.15.3]: https://github.com/Decompollaborate/spimdisasm/compare/1.15.2...1.15.3 +[1.15.2]: https://github.com/Decompollaborate/spimdisasm/compare/1.15.1...1.15.2 +[1.15.1]: https://github.com/Decompollaborate/spimdisasm/compare/1.15.0...1.15.1 +[1.15.0]: https://github.com/Decompollaborate/spimdisasm/compare/1.14.3...1.15.0 +[1.14.3]: https://github.com/Decompollaborate/spimdisasm/compare/1.14.2...1.14.3 +[1.14.2]: https://github.com/Decompollaborate/spimdisasm/compare/1.14.1...1.14.2 +[1.14.1]: https://github.com/Decompollaborate/spimdisasm/compare/1.14.0...1.14.1 +[1.14.0]: https://github.com/Decompollaborate/spimdisasm/compare/1.13.3...1.14.0 +[1.13.3]: https://github.com/Decompollaborate/spimdisasm/compare/1.13.2...1.13.3 +[1.13.2]: https://github.com/Decompollaborate/spimdisasm/compare/1.13.1...1.13.2 +[1.13.1]: https://github.com/Decompollaborate/spimdisasm/compare/1.13.0...1.13.1 +[1.13.0]: https://github.com/Decompollaborate/spimdisasm/compare/1.12.5...1.13.0 +[1.12.5]: https://github.com/Decompollaborate/spimdisasm/compare/1.12.4...1.12.5 +[1.12.4]: https://github.com/Decompollaborate/spimdisasm/compare/1.12.3...1.12.4 +[1.12.3]: https://github.com/Decompollaborate/spimdisasm/compare/1.12.2...1.12.3 +[1.12.2]: https://github.com/Decompollaborate/spimdisasm/compare/1.12.1...1.12.2 +[1.12.1]: https://github.com/Decompollaborate/spimdisasm/compare/1.12.0...1.12.1 +[1.12.0]: https://github.com/Decompollaborate/spimdisasm/compare/1.11.6...1.12.0 +[1.11.6]: https://github.com/Decompollaborate/spimdisasm/compare/1.11.5...1.11.6 +[1.11.5]: https://github.com/Decompollaborate/spimdisasm/compare/1.11.4...1.11.5 +[1.11.4]: https://github.com/Decompollaborate/spimdisasm/compare/1.11.3...1.11.4 +[1.11.3]: https://github.com/Decompollaborate/spimdisasm/compare/1.11.2...1.11.3 +[1.11.2]: https://github.com/Decompollaborate/spimdisasm/compare/1.11.1...1.11.2 +[1.11.1]: https://github.com/Decompollaborate/spimdisasm/compare/1.11.0...1.11.1 +[1.11.0]: https://github.com/Decompollaborate/spimdisasm/compare/1.10.6...1.11.0 +[1.10.6]: https://github.com/Decompollaborate/spimdisasm/compare/1.10.5...1.10.6 +[1.10.5]: https://github.com/Decompollaborate/spimdisasm/compare/1.10.4...1.10.5 +[1.10.4]: https://github.com/Decompollaborate/spimdisasm/compare/1.10.3...1.10.4 +[1.10.3]: https://github.com/Decompollaborate/spimdisasm/compare/1.10.2...1.10.3 +[1.10.2]: https://github.com/Decompollaborate/spimdisasm/compare/1.10.1...1.10.2 +[1.10.1]: https://github.com/Decompollaborate/spimdisasm/compare/1.10.0...1.10.1 +[1.10.0]: https://github.com/Decompollaborate/spimdisasm/compare/1.9.2...1.10.0 +[1.9.2]: https://github.com/Decompollaborate/spimdisasm/compare/1.9.1...1.9.2 +[1.9.1]: https://github.com/Decompollaborate/spimdisasm/compare/1.9.0...1.9.1 +[1.9.0]: https://github.com/Decompollaborate/spimdisasm/compare/1.8.2...1.9.0 +[1.8.2]: https://github.com/Decompollaborate/spimdisasm/compare/1.8.1...1.8.2 +[1.8.1]: https://github.com/Decompollaborate/spimdisasm/compare/1.8.0...1.8.1 +[1.8.0]: https://github.com/Decompollaborate/spimdisasm/compare/1.7.12...1.8.0 +[1.7.12]: https://github.com/Decompollaborate/spimdisasm/compare/1.7.11...1.7.12 +[1.7.11]: https://github.com/Decompollaborate/spimdisasm/compare/1.7.10...1.7.11 +[1.7.10]: https://github.com/Decompollaborate/spimdisasm/compare/1.7.9...1.7.10 +[1.7.9]: https://github.com/Decompollaborate/spimdisasm/compare/1.7.8...1.7.9 +[1.7.8]: https://github.com/Decompollaborate/spimdisasm/compare/1.7.7...1.7.8 +[1.7.7]: https://github.com/Decompollaborate/spimdisasm/compare/1.7.6...1.7.7 +[1.7.6]: https://github.com/Decompollaborate/spimdisasm/compare/1.7.5...1.7.6 +[1.7.5]: https://github.com/Decompollaborate/spimdisasm/compare/1.7.4...1.7.5 +[1.7.4]: https://github.com/Decompollaborate/spimdisasm/compare/1.7.3...1.7.4 +[1.7.3]: https://github.com/Decompollaborate/spimdisasm/compare/1.7.2...1.7.3 +[1.7.2]: https://github.com/Decompollaborate/spimdisasm/compare/1.7.1...1.7.2 +[1.7.1]: https://github.com/Decompollaborate/spimdisasm/compare/1.7.0...1.7.1 +[1.7.0]: https://github.com/Decompollaborate/spimdisasm/compare/1.6.5...1.7.0 +[1.6.5]: https://github.com/Decompollaborate/spimdisasm/compare/1.6.4...1.6.5 +[1.6.4]: https://github.com/Decompollaborate/spimdisasm/compare/1.6.3...1.6.4 +[1.6.3]: https://github.com/Decompollaborate/spimdisasm/compare/1.6.2...1.6.3 +[1.6.2]: https://github.com/Decompollaborate/spimdisasm/compare/1.6.1...1.6.2 +[1.6.1]: https://github.com/Decompollaborate/spimdisasm/compare/1.6.0...1.6.1 +[1.6.0]: https://github.com/Decompollaborate/spimdisasm/compare/1.5.7...1.6.0 +[1.5.7]: https://github.com/Decompollaborate/spimdisasm/compare/1.5.6...1.5.7 +[1.5.6]: https://github.com/Decompollaborate/spimdisasm/compare/1.5.5...1.5.6 +[1.5.5]: https://github.com/Decompollaborate/spimdisasm/compare/1.5.4...1.5.5 +[1.5.4]: https://github.com/Decompollaborate/spimdisasm/compare/1.5.3...1.5.4 +[1.5.3]: https://github.com/Decompollaborate/spimdisasm/compare/1.5.2...1.5.3 +[1.5.2]: https://github.com/Decompollaborate/spimdisasm/compare/1.5.1...1.5.2 +[1.5.1]: https://github.com/Decompollaborate/spimdisasm/compare/1.5.0...1.5.1 +[1.5.0]: https://github.com/Decompollaborate/spimdisasm/compare/1.4.2...1.5.0 +[1.4.2]: https://github.com/Decompollaborate/spimdisasm/compare/1.4.1...1.4.2 +[1.4.1]: https://github.com/Decompollaborate/spimdisasm/compare/1.4.0...1.4.1 +[1.4.0]: https://github.com/Decompollaborate/spimdisasm/compare/1.3.0...1.4.0 +[1.3.0]: https://github.com/Decompollaborate/spimdisasm/compare/1.2.4...1.3.0 +[1.2.4]: https://github.com/Decompollaborate/spimdisasm/compare/1.2.3...1.2.4 +[1.2.3]: https://github.com/Decompollaborate/spimdisasm/compare/1.2.2...1.2.3 +[1.2.2]: https://github.com/Decompollaborate/spimdisasm/compare/1.2.1...1.2.2 +[1.2.1]: https://github.com/Decompollaborate/spimdisasm/compare/1.2.0...1.2.1 +[1.2.0]: https://github.com/Decompollaborate/spimdisasm/compare/1.1.7...1.2.0 +[1.1.7]: https://github.com/Decompollaborate/spimdisasm/compare/1.1.6...1.1.7 +[1.1.6]: https://github.com/Decompollaborate/spimdisasm/compare/1.1.5...1.1.6 +[1.1.5]: https://github.com/Decompollaborate/spimdisasm/compare/1.1.4...1.1.5 +[1.1.4]: https://github.com/Decompollaborate/spimdisasm/compare/1.1.3...1.1.4 +[1.1.3]: https://github.com/Decompollaborate/spimdisasm/compare/1.1.2...1.1.3 +[1.1.2]: https://github.com/Decompollaborate/spimdisasm/compare/1.1.1...1.1.2 +[1.1.1]: https://github.com/Decompollaborate/spimdisasm/compare/1.1.0...1.1.1 +[1.1.0]: https://github.com/Decompollaborate/spimdisasm/compare/1.0.6...1.1.0 +[1.0.6]: https://github.com/Decompollaborate/spimdisasm/compare/1.0.5...1.0.6 +[1.0.5]: https://github.com/Decompollaborate/spimdisasm/compare/1.0.4...1.0.5 +[1.0.4]: https://github.com/Decompollaborate/spimdisasm/compare/1.0.3...1.0.4 +[1.0.3]: https://github.com/Decompollaborate/spimdisasm/compare/1.0.2...1.0.3 +[1.0.2]: https://github.com/Decompollaborate/spimdisasm/compare/1.0.1...1.0.2 +[1.0.1]: https://github.com/Decompollaborate/spimdisasm/compare/1.0.0...1.0.1 +[1.0.0]: https://github.com/Decompollaborate/spimdisasm/releases/tag/1.0.0 From cea2c73534bff3896c12496790690ca5c6513d5c Mon Sep 17 00:00:00 2001 From: angie Date: Thu, 21 Sep 2023 13:50:15 -0300 Subject: [PATCH 02/14] Fix lingting on CHANGELOG.md --- .gitignore | 3 +- .markdownlint.jsonc | 14 ++ .vscode/extensions.json | 7 + CHANGELOG.md | 286 +++++++++++++++++++++++++--------------- pyproject.toml | 2 +- spimdisasm/__init__.py | 4 +- 6 files changed, 207 insertions(+), 109 deletions(-) create mode 100644 .markdownlint.jsonc create mode 100644 .vscode/extensions.json diff --git a/.gitignore b/.gitignore index b597d690..0e60b5c9 100644 --- a/.gitignore +++ b/.gitignore @@ -162,7 +162,8 @@ cython_debug/ #.idea/ # Text editor remnants -.vscode/ +.vscode/* +!.vscode/extensions.json .vs/ .idea/ CMakeLists.txt diff --git a/.markdownlint.jsonc b/.markdownlint.jsonc new file mode 100644 index 00000000..ef8db42f --- /dev/null +++ b/.markdownlint.jsonc @@ -0,0 +1,14 @@ +{ + // https://github.com/DavidAnson/markdownlint/blob/main/doc/md024.md + // MD024 - Multiple headings with the same content + "MD024": { + "siblings_only": true + }, + + // https://github.com/DavidAnson/markdownlint/blob/main/doc/md013.md + // MD013 - Line length + "MD013": { + "code_block_line_length": 120, + "headings": false + } +} diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 00000000..a20a77e4 --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,7 @@ +{ + // See https://go.microsoft.com/fwlink/?LinkId=827846 + // for the documentation about the extensions.json format + "recommendations": [ + "davidanson.vscode-markdownlint" + ] +} diff --git a/CHANGELOG.md b/CHANGELOG.md index 32d64b0e..6c0eb049 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + ## [Unreleased] ## [1.17.3] - 2023-09-18 @@ -21,18 +28,21 @@ - Emit global labels for symbols with no type in the middle of functions - Add checks for rs and rt registers instead of assuming they are used to avoid crashes - Option to set the filtering addresses for the symbol finder -- Consider the `j` instruction as a function end if rabbitizer's `--j-branch` option is turned off +- Consider the `j` instruction as a function end if rabbitizer's `--j-branch` + option is turned off - Always migrate mips1 doubles when migrating rodata ## [1.17.0] - 2023-08-27 ### Uncategorized -- Allow using `MIPS_NONE` reloc type as a way to avoid symbolizing a reference and use the raw value instead. +- Allow using `MIPS_NONE` reloc type as a way to avoid symbolizing a reference + and use the raw value instead. - Allow using a different label for symbols in the middle of functions. - Useful for setting alternative entry points for handwritten functions. - It can be used by setting the `ASM_TEXT_ALT_LABEL`. -- Fix `elfObjDisasm` crashing if a reloc section references an unhandled section like `.pdr`. +- Fix `elfObjDisasm` crashing if a reloc section references an unhandled section + like `.pdr`. ## [1.16.5] - 2023-08-22 @@ -45,8 +55,10 @@ ### Uncategorized - Try to gather the `$gp` register value from non-PIP elfs too -- Various bugfixes related to `j` instructions being used as a way to call another function. -- Fix size directive not being properly emitted for functions with user-declared size that has dangling nops. +- Various bugfixes related to `j` instructions being used as a way to call + another function. +- Fix size directive not being properly emitted for functions with user-declared + size that has dangling nops. - Detect ABI and cpu flags from elf header. - Do not use mips1 double detection heuristic on non o32 abis - Avoid warning about LOCAL NOTYPE symbols in elf files @@ -62,23 +74,27 @@ ### Uncategorized - Fix size directive not using the right label when symbols are smaller than a word -- Fix size directive not being properly emitted for symbols with a size smaller than a word +- Fix size directive not being properly emitted for symbols with a size smaller + than a word - Generate pad symbols to honor user declared sizes - - Symbols will be automatically splitted if the user-declared size is smaller than the symbol size (usually due to size not being a multiple of 4, file splits, other symbols not being referenced, etc) + - Symbols will be automatically splitted if the user-declared size is smaller + than the symbol size (usually due to size not being a multiple of 4, file + splits, other symbols not being referenced, etc) - Add the character `0x1A` to set of special cases for string decoding. -- Fix bug which produced reporting incorrect file splits on strings which their last word was a zero. +- Fix bug which produced reporting incorrect file splits on strings which their + last word was a zero. - Workaround for big addends when building with modern GAS - ## [1.16.0] - 2023-07-23 ### Uncategorized -- Add a detector for the redundant function end produced by IDO with some specific flag combinations. - - It is turned off by default, but it can be turned on globally with `--detect-redundant-function-end`, or globally and per file via the API. +- Add a detector for the redundant function end produced by IDO with some + specific flag combinations. + - It is turned off by default, but it can be turned on globally with + `--detect-redundant-function-end`, or globally and per file via the API. - Fix BSS sections not emitting a first symbol if it isn't referenced anywhere. - ## [1.15.4] - 2023-07-14 ### Uncategorized @@ -104,31 +120,44 @@ ### Uncategorized -- Emit string alignment directives even when the section isn't aligned to a multiple of 8. - - Some projects can have rodata sections aligned to just a multiple of 4, and not emitting the directive in those cases can break their builds +- Emit string alignment directives even when the section isn't aligned to a + multiple of 8. + - Some projects can have rodata sections aligned to just a multiple of 4, and + not emitting the directive in those cases can break their builds ## [1.15.0] - 2023-07-03 ### Uncategorized -- Change the string guesser to work with multiple levels instead of plainly enabled/disabled and the aggressive toggle. - - The new option is used via the API `GlobalConfig.RODATA_STRING_GUESSER_LEVEL` or via the CLI `--rodata-string-guesser level`. - - The old `GlobalConfig.STRING_GUESSER` and `GlobalConfig.AGGRESSIVE_STRING_GUESSER` options are now deprecated, same as the CLI options `--string-guesser` and `--aggressive-string-guesser`. - - The old `GlobalConfig.STRING_GUESSER = True` is equivalent to the new `GlobalConfig.RODATA_STRING_GUESSER_LEVEL = 1` - - The old `GlobalConfig.AGGRESSIVE_STRING_GUESSER = True` is equivalent to the new `GlobalConfig.RODATA_STRING_GUESSER_LEVEL = 4` +- Change the string guesser to work with multiple levels instead of plainly + enabled/disabled and the aggressive toggle. + - The new option is used via the API + `GlobalConfig.RODATA_STRING_GUESSER_LEVEL` or via the CLI + `--rodata-string-guesser level`. + - The old `GlobalConfig.STRING_GUESSER` and + `GlobalConfig.AGGRESSIVE_STRING_GUESSER` options are now deprecated, same as + the CLI options `--string-guesser` and `--aggressive-string-guesser`. + - The old `GlobalConfig.STRING_GUESSER = True` is equivalent to the new + `GlobalConfig.RODATA_STRING_GUESSER_LEVEL = 1` + - The old `GlobalConfig.AGGRESSIVE_STRING_GUESSER = True` is equivalent to + the new `GlobalConfig.RODATA_STRING_GUESSER_LEVEL = 4` - Meaning of the new levels: - level 0: Completely disable the guessing feature. - level 1: The most conservative guessing level. Imposes the following restrictions: - Do not try to guess if the user provided a type for the symbol. - - Do no try to guess if type information for the symbol can be inferred by other means. + - Do no try to guess if type information for the symbol can be inferred by + other means. - A string symbol must be referenced only once. - Strings must not be empty. - - level 2: A string no longer needs to be referenced only once to be considered a possible string. This can happen because of a deduplication optimization. + - level 2: A string no longer needs to be referenced only once to be + considered a possible string. This can happen because of a deduplication optimization. - level 3: Empty strings are allowed. - - level 4: Symbols with autodetected type information but no user type information can still be guessed as strings. + - level 4: Symbols with autodetected type information but no user type + information can still be guessed as strings. - The level defaults to 1. - Implement string guesser for the data section. - - Controlled by the API `GlobalConfig.DATA_STRING_GUESSER_LEVEL` or via the CLI `--data-string-guesser level`. + - Controlled by the API `GlobalConfig.DATA_STRING_GUESSER_LEVEL` or via the + CLI `--data-string-guesser level`. - Decodes strings with the `ASCII` encoding by default. - The meaning of each level are the same as the rodata string guesser. - The level defaults to 2. @@ -171,9 +200,11 @@ ### Uncategorized - Try to support better N32 PIC programs. - - The current issue was spimdisasm was not able to properly generate symbol references for `$gp` accesses. + - The current issue was spimdisasm was not able to properly generate symbol + references for `$gp` accesses. - GOT table now gets its own address from the reginfo instead of the dynamic table. - - Accesses pointing outside the GOT table are tried to be redirected to `sdata`, `srdata` and `sbss` sections. + - Accesses pointing outside the GOT table are tried to be redirected to + `sdata`, `srdata` and `sbss` sections. - Implement `--dyn-syms` on readelf-like mode. - Minor improvements to readelf output format @@ -181,7 +212,8 @@ ### Uncategorized -- Fix not writing to subfolders properly when a csv filesplit entry has a slash on its name. +- Fix not writing to subfolders properly when a csv filesplit entry has a slash + on its name. ## [1.13.2] - 2023-05-01 @@ -194,17 +226,15 @@ ### Uncategorized -- Fix a possible `None` case in ` getInstrCategoryFromStr` +- Fix a possible `None` case in `getInstrCategoryFromStr` - Add note about R3000GTE and R5900 instruction set support in the README - ## [1.13.0] - 2023-04-30 ### Uncategorized - Add support for R3000GTE - ## [1.12.5] - 2023-04-28 ### Uncategorized @@ -236,14 +266,15 @@ - Add `--version` flag to every cli tool - Fix data not being properly disassembled on `singleFileDisasm` - Enforce UTF-8 encoding on generated asm files (PR #111) - - Thanks to @1superchip + - Thanks to @1superchip - Emit a comment saying if a reloc is a global one when the emit relocs flag is passed - Fix incorrect addends on non static symbols from elf files. - - Fixes issue #110 -- Fix a regression where some `%lo` symbols weren't being properly paired because of the `%got` being reused on PIC code + - Fixes issue #110 +- Fix a regression where some `%lo` symbols weren't being properly paired + because of the `%got` being reused on PIC code - Fix sizes for inferred types - Properly detect `-mips1` `double` literals - - Fixes issue #57 + - Fixes issue #57 ## [1.12.1] - 2023-03-28 @@ -294,7 +325,8 @@ ### Uncategorized - Add flag to emit inline relocs -- Do not report extra padding in functions if user declared size matches the size of the function +- Do not report extra padding in functions if user declared size matches the + size of the function - Rename `ContextSymbol.size` to `ContextSymbol.userDeclaredSize` - Add `--function-info` flag - `FuncRodataEntry`: Fix migrate parameter if function has no rodata to be migrated @@ -313,13 +345,17 @@ - CLI changes: - Install CLI tools as actual terminal programs - Allow invoking the CLI tools from spimdisasm as subparsers - - The old way of invoking the CLI tools (`python3 -m spimdisasm.clitool`) is now deprecated, but still works + - The old way of invoking the CLI tools (`python3 -m spimdisasm.clitool`) is + now deprecated, but still works - `disasmdis`: Fix crash if the input isn't a multiple of a word -- Report with a comment which instruction made spimdisasm detected as a handwritten instruction +- Report with a comment which instruction made spimdisasm detected as a + handwritten instruction - New in the API: `FunctionRodataEntry` - Cleaner interface for rodata migration and similar functions - - Provides method for intermixing functions and non-migrated rodata symbols in a way the correct order is still preserved - - Old functions from `FileHandlers` which provided rodata migration functionalities are now deprecated + - Provides method for intermixing functions and non-migrated rodata symbols in + a way the correct order is still preserved + - Old functions from `FileHandlers` which provided rodata migration + functionalities are now deprecated ## [1.10.6] - 2023-01-28 @@ -330,7 +366,8 @@ - Check for bss symbol size to match user declared size - Warn if the globalsegment vrom start and end is the same - Identify 32bitsmode elf flag -- Avoid reporting leading zeroes as padding in rodata symbols if the size of the symbol matches the user declared one +- Avoid reporting leading zeroes as padding in rodata symbols if the size of the + symbol matches the user declared one ## [1.10.5] - 2023-01-28 @@ -382,7 +419,8 @@ - Fix emitting `.align` directives on unnaligned jumptables - Fix rodata split detection not properly considering special jumptable alignment -- Add `nameEnd` member to `ContextSymbol` to allow emitting a closing user-declared label +- Add `nameEnd` member to `ContextSymbol` to allow emitting a closing + user-declared label ## [1.9.1] - 2022-12-29 @@ -394,12 +432,13 @@ ### Uncategorized -- Reloc system re-worked. Users can now provide their own relocs to improve the automatic disassembly +- Reloc system re-worked. Users can now provide their own relocs to improve the + automatic disassembly - loPatch system has been removed and superseded by the global reloc system -- `GlobalConfig` variables can now be set via environment variables. +- `GlobalConfig` variables can now be set via environment variables. - Parameters passed by cli take priority over environment variables. - - Options configured via code (when using this as a library) take priority over environment variables. - + - Options configured via code (when using this as a library) take priority + over environment variables. ## [1.8.2] - 2022-12-19 @@ -411,7 +450,8 @@ ### Uncategorized -- New interface for allowing banning ranges of symbols, instead of having to add them one by one +- New interface for allowing banning ranges of symbols, instead of having to add + them one by one ## [1.8.0] - 2022-12-16 @@ -420,10 +460,12 @@ - Require [`rabbitizer` 1.4.0](https://github.com/Decompollaborate/rabbitizer/releases/tag/1.4.0) - Allow to type-hint strings with `asciz` - Allow disassembling `.data` symbols as strings - - This won't be automatically guessed as with `.rodata`, this only will happen with type-hints + - This won't be automatically guessed as with `.rodata`, this only will happen + with type-hints - `disasmdis` now accepts spaces and input from `stdin` Meta: + - `setup.cfg` was removed and all its info was moved to `pyproject.toml` ## [1.7.12] - 2022-12-05 @@ -451,9 +493,8 @@ Check size of floats and doubles before migrating them - Elf fixes: - Reference `NOTYPE` symbols - Various GOT fixes - - Show isAutogeneratedPad in the context file #79 - - [Show the first %lo reference for each symbol in the context #80 - + - Show isAutogeneratedPad in the context file #79 + - Show the first %lo reference for each symbol in the context #80 ## [1.7.9] - 2022-11-09 @@ -461,7 +502,8 @@ Check size of floats and doubles before migrating them - Fix an OoB issue when trying to post-process the GOT analyzis on non-PIC mode - Emit a comment for automatically generated bss pads. - - Those pads are created mainly to properly adjust the `.space` of a bss symbol if said symbol had an user-declared size + - Those pads are created mainly to properly adjust the `.space` of a bss + symbol if said symbol had an user-declared size ## [1.7.8] - 2022-11-04 @@ -481,18 +523,19 @@ Check size of floats and doubles before migrating them - Improve detection of the end of jumptables - Refactor REL handling. It has been simplified - This should improve disassembling `.o` files -- Symbols from elfs are checked to be in the correct vram range before adding them to the context. +- Symbols from elfs are checked to be in the correct vram range before adding + them to the context. - Allow disassembling data symbols as floats and doubles - ## [1.7.6] - 2022-10-31 ### Uncategorized - Refactor GOT handling - Should fix IDO 5.3 disassembly -- Use glabels for jumptable labels when the functions are not being migrated -- Support `MIPS_GOT_HI16`, `MIPS_GOT_LO16`, `MIPS_CALL_HI16` and `MIPS_CALL_LO16` reloc types from `.rel` elf sections +- Use glabels for jumptable labels when the functions are not being migrated +- Support `MIPS_GOT_HI16`, `MIPS_GOT_LO16`, `MIPS_CALL_HI16` and + `MIPS_CALL_LO16` reloc types from `.rel` elf sections - `disasmdis` now ignores non hex characters - Negative addresses are considered as GOT accesses in PIC mode - Add special handling for the GOT lazy resolver @@ -513,6 +556,7 @@ Check size of floats and doubles before migrating them ### Uncategorized GOT fixes: + - Fix using GOT local addresses as functions - Fix `.data` symbols incorrectly referencing GOT local addresses - Fix `elfObjDisasm` using N64 specific symbols by default @@ -520,29 +564,37 @@ GOT fixes: - This change was made to accommodate GOT global/local references New features: -- The autodetected size of functions can now be queried from a `ContextSymbol` with `getSize()` + +- The autodetected size of functions can now be queried from a `ContextSymbol` + with `getSize()` - The arch level can now be specified when disassembling - It is detected automatically when parsing an elf file - `.set gp=64` is disabled on MIPS1 and MIPS2 arch levels -- `elfObjDisasm` will now produce a list of functions and non migrated rodata when requesting to migrate functions. - - This can be useful to bootstrap newly generated C files so the rodata order is easier to preserve +- `elfObjDisasm` will now produce a list of functions and non migrated rodata + when requesting to migrate functions. + - This can be useful to bootstrap newly generated C files so the rodata order + is easier to preserve Misc changes: + - `elfObjDisasm` now display progress to stdout when disassembling - This behavior can be disabled with `-q` -- Rodata migration has been slightly tweaked: migration will be performed if only one function references the symbol, contrary to the old "only one reference in the whole codebase must reference the symbol" +- Rodata migration has been slightly tweaked: migration will be performed if + only one function references the symbol, contrary to the old "only one + reference in the whole codebase must reference the symbol" - Seems to be a common pattern on PIC programs compiled with IDO - ## [1.7.3] - 2022-10-24 1.7.3: `--aggressive-string-guesser` ### Uncategorized -- Do not infer the type of a variable if the access types are heterogeneous, which may imply a struct +- Do not infer the type of a variable if the access types are heterogeneous, + which may imply a struct - New `--aggressive-string-guesser` flag - - Tries to decode string even if the string is empty, the symbol may have type information or it is referenced more than once + - Tries to decode string even if the string is empty, the symbol may have type + information or it is referenced more than once - Add `PSYQ` compiler option - Currently it enables the same options as `SN64` @@ -561,17 +613,20 @@ Misc changes: ### Uncategorized - New flags in `elfObjDisasm`: - - Flags which try to mimic `readelf`: `--file-header`, `--syms`, `--relocs` and `-display-got` + - Flags which try to mimic `readelf`: `--file-header`, `--syms`, `--relocs` + and `-display-got` - `--split-functions`, has the same behavior as `singleFileDisasm` - `elfObjDisasm` changes: - - Fix undefined symbols handling in + - Fix undefined symbols handling in - Fix addends of got global symbols - - Use rel types types from the elf file if they are available instead of trying to infer them - - Warn when trying to disassemble an `abi2` (N32) elf - - Warn for negative GOT accesses instead of crashing + - Use rel types types from the elf file if they are available instead of + trying to infer them + - Warn when trying to disassemble an `abi2` (N32) elf + - Warn for negative GOT accesses instead of crashing - Warn if unhandled flags are found in an elf file - Fix gp value on N32 abi - - `$gp` access are no longer symbolized if the address is not found in the got table + - `$gp` access are no longer symbolized if the address is not found in the got + table - Fix rodata pointer detection in data on elf files (fixes #63) ## [1.7.0] - 2022-10-18 @@ -589,11 +644,13 @@ Misc changes: - Fix boundaries detection reporting in `elfObjDisasm` - The output of this report is csv-friendly. Thanks @EllipticEllipsis (#65) - Tweak string disassembly - - If a symbol is in the middle of a string then the string is aborted and disassembled as `.word`s - - Check the next bytes (until a word boundary) after the nul terminator of a string are zero. Thanks @Xeeynamo (#64) + - If a symbol is in the middle of a string then the string is aborted and + disassembled as `.word`s + - Check the next bytes (until a word boundary) after the nul terminator of a + string are zero. Thanks @Xeeynamo (#64) - Strings with '\a' are no longer treated as real strings - Fix labels not being emitted in rodata if they were not being word-aligned. - - Fixes #59 + - Fixes #59 - Remove `GlobalConfig.ADD_NEW_SYMBOLS` - Avoid pairing `%gp_got` symbols on non PIC code - General cleanups @@ -604,7 +661,8 @@ Misc changes: ### Uncategorized -- Avoid crashing if a `%got` access' address is not in the global table. Prints a warning instead. +- Avoid crashing if a `%got` access' address is not in the global table. Prints + a warning instead. - Fix symbol usage on relocatable files, kinda - Ignore `.rel` sections if the file is not of REL type - Avoid emitting `%got` rels for non PIC disassemblies @@ -615,15 +673,16 @@ Misc changes: ### Uncategorized - - ## [1.6.3] - 2022-10-04 1.6.3: Speedup for overlay disassembly ### Uncategorized -- Moves the `globalSegment` check above all the other checks in `getSymbol`, providing a faster lookup since most of the time overlays usually reference a symbol from the `globalSegment` over a symbol from another overlay segment from a different category +- Moves the `globalSegment` check above all the other checks in `getSymbol`, + providing a faster lookup since most of the time overlays usually reference a + symbol from the `globalSegment` over a symbol from another overlay segment + from a different category ## [1.6.2] - 2022-10-03 @@ -631,7 +690,7 @@ Misc changes: ### Uncategorized -Should address issue #55 +Should address issue #55 ## [1.6.1] - 2022-10-03 @@ -650,9 +709,12 @@ Should address issue #55 ### Uncategorized -- Deleted `singleFileDisasm.py`, `simpleFileDisasm.py`, `disasmdis.py`, `rspDisasm.py`, `elfObjDisasm.py` -- The frontend scripts were converted to submodules, now those can be executed with the `python3 -m spimdisasm.submodulename` syntax, allowing to execute them even in the pip installations of spimdisasm. -- Refactored raw path handling to use `pathlib`. +- Deleted `singleFileDisasm.py`, `simpleFileDisasm.py`, `disasmdis.py`, + `rspDisasm.py`, `elfObjDisasm.py` +- The frontend scripts were converted to submodules, now those can be executed + with the `python3 -m spimdisasm.submodulename` syntax, allowing to execute + them even in the pip installations of spimdisasm. +- Refactored raw path handling to use `pathlib`. - Add installation instructions to readme. - SN64 tweaks: - Use `.align` directive for string disassembly on SN64 @@ -665,9 +727,8 @@ Should address issue #55 ### Uncategorized -- Fixes an issue where 0 size bss variables where being outputted if the last bss variable of a file had a size which filled its size up until the file boundary - - +- Fixes an issue where 0 size bss variables where being outputted if the last + bss variable of a file had a size which filled its size up until the file boundary ## [1.5.6] - 2022-09-27 @@ -679,7 +740,8 @@ Should address issue #55 - Use `.rdata` on rodata migration for SN64 - Fix `ASM_DATA_SYM_AS_LABEL` on bss generation - Fix rodata symbols searching during migration -- Add option to disassemble data/rodata with different endianess than the global one +- Add option to disassemble data/rodata with different endianess than the global + one - Allow changing the string encoding per rodata segment - Return the created segment by `addOverlaySegment` @@ -698,7 +760,8 @@ Should address issue #55 ### Uncategorized - `ASM_DATA_SYM_AS_LABEL`: Allow adding a data symbol as a simple label. -- `late_rodata` logic has been tweaked to only be applied when compiler is set to IDO +- `late_rodata` logic has been tweaked to only be applied when compiler is set + to IDO - Allow range checks on symbols referenced by data symbols - Add option to allow all addends referenced by data symbols - Fix use of user-declared sizes on functions and bss symbols @@ -726,7 +789,8 @@ Should address issue #55 ### Uncategorized -- Check for data pointers in data itself and do a recheck in case the pointer is in the same section but behind the current symbol +- Check for data pointers in data itself and do a recheck in case the pointer is + in the same section but behind the current symbol - Properly update original symbol type when creating symbols for sub-4 sizes ## [1.5.0] - 2022-09-17 @@ -756,7 +820,8 @@ Fix 0 and negative addresses being treated as real symbols ### Uncategorized -Fixes a bug in which a function start was wrongly detected on an overlay because the symbol existed for said address on the global segment +Fixes a bug in which a function start was wrongly detected on an overlay because +the symbol existed for said address on the global segment ## [1.4.0] - 2022-08-27 @@ -765,7 +830,8 @@ Fixes a bug in which a function start was wrongly detected on an overlay because ### Uncategorized - Adds proper support for N64's RSP -- Allows passing any `rabbitizer.InstrCategory` to SectionText to allow using any instruction set supported by rabbitizer +- Allows passing any `rabbitizer.InstrCategory` to SectionText to allow using + any instruction set supported by rabbitizer ## [1.3.0] - 2022-07-08 @@ -780,10 +846,10 @@ Fixes a bug in which a function start was wrongly detected on an overlay because ### Uncategorized -- Explicitly require a `rabbitizer` version minor than the next major version to avoid possible compatibility issues. +- Explicitly require a `rabbitizer` version minor than the next major version to + avoid possible compatibility issues. - Set up automatic discovery on setup.cfg - ## [1.2.3] - 2022-06-11 1.2.3: Fix Python 3.7 compatibility (again) @@ -814,7 +880,8 @@ Hopefully fixes a packaging problem on PyPi ### Uncategorized -- Use [rabbitizer](https://pypi.org/project/rabbitizer/) as the instruction decoder to speed up instruction analysis and disassembly +- Use [rabbitizer](https://pypi.org/project/rabbitizer/) as the instruction + decoder to speed up instruction analysis and disassembly - Other minor changes to try to be compatible with Python 3.7 ## [1.1.7] - 2022-06-08 @@ -830,8 +897,6 @@ Hopefully fixes a packaging problem on PyPi ### Uncategorized - - ## [1.1.5] - 2022-06-03 1.1.5: More overlay fixes @@ -841,7 +906,8 @@ Hopefully fixes a packaging problem on PyPi - Move most of the instruction analyzing code to a new independent class - Try to track instructions which are likely to not be a %lo and avoid pairing it. - Extra checks to avoid using labels from other overlay segments -- Only stop a look ahead search until we find either an unconditional branch or a `jr` instruction +- Only stop a look ahead search until we find either an unconditional branch or + a `jr` instruction ## [1.1.4] - 2022-06-02 @@ -850,7 +916,8 @@ Hopefully fixes a packaging problem on PyPi ### Uncategorized - Overall adds more vrom checks for handling shared-vram overlays. -- The autogenerated symbol size now adjusts itself considering the symbol type and the address alignment +- The autogenerated symbol size now adjusts itself considering the symbol type + and the address alignment ## [1.1.3] - 2022-06-01 @@ -867,7 +934,8 @@ Adds a way to register a name getter callback for symbols ### Uncategorized - General cleanups and fixes related to symbol finding. -- New `COMPILER` option in `GlobalConfig`, used to enable some compiler specific tweaks +- New `COMPILER` option in `GlobalConfig`, used to enable some compiler + specific tweaks ## [1.1.1] - 2022-05-30 @@ -876,6 +944,7 @@ Adds a way to register a name getter callback for symbols ### Uncategorized Various fixes: + - Multiples workarounds for pairing multiples %hi to the same %lo - Fix `J` target calculation for the look ahead symbol finder - Special cases for `LUI`s on delay slots @@ -890,14 +959,20 @@ Various fixes: ### Uncategorized -- Add support for overlays which share VRAMs and overlays which may communicate to overlays from other categories. +- Add support for overlays which share VRAMs and overlays which may communicate + to overlays from other categories. - Each new category and overlay segment must be registered in `Context` - `ElementBase#vram` is now just an `int` as opposed to old `int|None`. -- The `vrom` of elements is tracked and stored. It is mainly used to autogenerate overlay's names. -- The differentiation in symbol categories (labels, functions, symbols, etc) is removed. Everything now is stored in only one big dictionary to avoid duplication and lower memory usage. +- The `vrom` of elements is tracked and stored. It is mainly used to + autogenerate overlay's names. +- The differentiation in symbol categories (labels, functions, symbols, etc) is + removed. Everything now is stored in only one big dictionary to avoid + duplication and lower memory usage. - Remove the concept of "fake functions" -- `addSymbol`, `getSymbol` and similar methods were moved to `ElementBase` because of the need for the respective element overlay info. -- Symbol name generation is delayed as much as possible and autogenerated on the fly. +- `addSymbol`, `getSymbol` and similar methods were moved to `ElementBase` + because of the need for the respective element overlay info. +- Symbol name generation is delayed as much as possible and autogenerated on + the fly. - Some minor symbol finder fixes - Bugfix symbols disappearing for no reason - General cleanups @@ -910,7 +985,8 @@ Various fixes: - Add more checks for J as unconditional branch - Allow disabling %hi/%lo syntax for constants -- Add extra global label for other symbols (usually labels) in functions when `GlobalConfig.ASM_TEXT_FUNC_AS_LABEL` is enabled. +- Add extra global label for other symbols (usually labels) in functions when + `GlobalConfig.ASM_TEXT_FUNC_AS_LABEL` is enabled. ## [1.0.5] - 2022-05-26 @@ -922,7 +998,6 @@ Various fixes: - Only apply the SN64 DIV fix for non handwritten functions - Finer control over pseudo instructions - ## [1.0.4] - 2022-05-26 ### Uncategorized @@ -939,7 +1014,8 @@ Add CI which builds a wheel and publishes it to PyPI ### Uncategorized -- Fix missing `f` on float registers when `GlobalConfig.NAMED_REGISTERS` was set to `False` +- Fix missing `f` on float registers when `GlobalConfig.NAMED_REGISTERS` was set + to `False` - Allow changing the line ends to anything via `GlobalConfig.LINE_ENDS` ## [1.0.1] - 2022-05-26 @@ -947,14 +1023,14 @@ Add CI which builds a wheel and publishes it to PyPI ### Uncategorized Changes: -- Improve RAM usage by removing an almost unused dictionary from the Instruction classes + +- Improve RAM usage by removing an almost unused dictionary from the Instruction + classes ## [1.0.0] - 2022-05-26 ### Uncategorized - - [unreleased]: https://github.com/Decompollaborate/spimdisasm/compare/master...develop [1.17.3]: https://github.com/Decompollaborate/spimdisasm/compare/1.17.2...1.17.3 [1.17.2]: https://github.com/Decompollaborate/spimdisasm/compare/1.17.1...1.17.2 diff --git a/pyproject.toml b/pyproject.toml index 7c8bf83f..912383a5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ [project] name = "spimdisasm" # Version should be synced with spimdisasm/__init__.py -version = "1.17.3" +version = "1.17.4.dev0" description = "MIPS disassembler" # license = "MIT" readme = "README.md" diff --git a/spimdisasm/__init__.py b/spimdisasm/__init__.py index ad46233b..b834f895 100644 --- a/spimdisasm/__init__.py +++ b/spimdisasm/__init__.py @@ -5,8 +5,8 @@ from __future__ import annotations -__version_info__: tuple[int, int, int] = (1, 17, 3) -__version__ = ".".join(map(str, __version_info__)) +__version_info__: tuple[int, int, int] = (1, 17, 4) +__version__ = ".".join(map(str, __version_info__)) + ".dev0" __author__ = "Decompollaborate" from . import common as common From 235c7a68f146baa6f4ece91450653fa002e4c594 Mon Sep 17 00:00:00 2001 From: angie Date: Thu, 21 Sep 2023 15:50:41 -0300 Subject: [PATCH 03/14] organize half of the changelog --- CHANGELOG.md | 413 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 281 insertions(+), 132 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c0eb049..8f0d04ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,11 +7,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- Add `CHANGELOG.md` + ## [1.17.3] - 2023-09-18 -### Uncategorized +### Changed + +- Hardcodes a check to avoid disassembling `.vutext`. This will be changed in a + future release. + - Fixes `.vutext` sections from PS2 elfs messing with symbol analyzis + +### Fixed -- Hardcode a check to avoid disassembling `.vutext` - Fix `.double` disassembly for little endian ## [1.17.2] - 2023-09-18 @@ -22,103 +31,133 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [1.17.1] - 2023-09-15 -### Uncategorized +### Added + +- Option to set the filtering addresses for the symbol finder + +### Changed - Tweak endlabel to be emitted in the same place as the size directive does - Emit global labels for symbols with no type in the middle of functions -- Add checks for rs and rt registers instead of assuming they are used to avoid crashes -- Option to set the filtering addresses for the symbol finder - Consider the `j` instruction as a function end if rabbitizer's `--j-branch` option is turned off - Always migrate mips1 doubles when migrating rodata +### Fixed + +- Add checks for `$rs` and `$rt` registers instead of assuming they are used to + avoid crashing during runtime. + ## [1.17.0] - 2023-08-27 -### Uncategorized +### Changed - Allow using `MIPS_NONE` reloc type as a way to avoid symbolizing a reference and use the raw value instead. - Allow using a different label for symbols in the middle of functions. - Useful for setting alternative entry points for handwritten functions. - It can be used by setting the `ASM_TEXT_ALT_LABEL`. + +### Fixed + - Fix `elfObjDisasm` crashing if a reloc section references an unhandled section like `.pdr`. ## [1.16.5] - 2023-08-22 -### Uncategorized +### Changed -- Do not use iQue symbols by default if user asked for libultra symbols +- Do not use iQue symbols by default if user asked only for libultra symbols ## [1.16.4] - 2023-08-19 -### Uncategorized +### Added - Try to gather the `$gp` register value from non-PIP elfs too +- Detect ABI and cpu flags from elf header. + +### Changed + +- Do not use mips1 double detection heuristic on non o32 abis + +### Fixed + - Various bugfixes related to `j` instructions being used as a way to call another function. - Fix size directive not being properly emitted for functions with user-declared size that has dangling nops. -- Detect ABI and cpu flags from elf header. -- Do not use mips1 double detection heuristic on non o32 abis -- Avoid warning about LOCAL NOTYPE symbols in elf files +- Avoid warning about `LOCAL` `NOTYPE` symbols in elf files ## [1.16.3] - 2023-08-15 -### Uncategorized +### Fixed - Fix hex comment crashing because of doubles when parsing little endian binaries ## [1.16.2] - 2023-08-14 -### Uncategorized +### Added -- Fix size directive not using the right label when symbols are smaller than a word -- Fix size directive not being properly emitted for symbols with a size smaller - than a word - Generate pad symbols to honor user declared sizes - Symbols will be automatically splitted if the user-declared size is smaller than the symbol size (usually due to size not being a multiple of 4, file splits, other symbols not being referenced, etc) - Add the character `0x1A` to set of special cases for string decoding. + +### Changed + +- Workaround for big addends when building with modern GAS + +### Fixed + +- Fix size directive not using the right label when symbols are smaller than a word +- Fix size directive not being properly emitted for symbols with a size smaller + than a word - Fix bug which produced reporting incorrect file splits on strings which their last word was a zero. -- Workaround for big addends when building with modern GAS ## [1.16.0] - 2023-07-23 -### Uncategorized +### Added - Add a detector for the redundant function end produced by IDO with some specific flag combinations. - It is turned off by default, but it can be turned on globally with `--detect-redundant-function-end`, or globally and per file via the API. + +### Fixed + - Fix BSS sections not emitting a first symbol if it isn't referenced anywhere. ## [1.15.4] - 2023-07-14 ### Uncategorized +### Changed + - Avoid taking into account invalid instructions when trying to find function boundaries. + +### Fixed + - Properly honor size of user-declared symbols for elf static symbols. ## [1.15.3] - 2023-07-10 -### Uncategorized +### Changed -- Don't use append the section name if it is known when disassembling elfs +- Don't append the section name if it is known when disassembling elfs - This special cases the sections `.text`, `.data`, `.rodata` and `.bss`. - Avoids the redundant `filename_.text/` naming scheme ## [1.15.2] - 2023-07-04 -### Uncategorized +### Fixed - Fix hardcoded shift value in alignment directive ## [1.15.1] - 2023-07-04 -### Uncategorized +### Changed - Emit string alignment directives even when the section isn't aligned to a multiple of 8. @@ -127,7 +166,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [1.15.0] - 2023-07-03 -### Uncategorized +### Added + +- Implement string guesser for the data section. + - Controlled by the API `GlobalConfig.DATA_STRING_GUESSER_LEVEL` or via the + CLI `--data-string-guesser level`. + - Decodes strings with the `ASCII` encoding by default. + - The meaning of each level are the same as the rodata string guesser. + - The level defaults to 2. +- Add experimental Pascal string guesser. + - Works for both rodata and data sections. + - Follows the same level logic as the C string guesser. + - It is disabled by default. + +### Changed - Change the string guesser to work with multiple levels instead of plainly enabled/disabled and the aggressive toggle. @@ -155,69 +207,74 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - level 4: Symbols with autodetected type information but no user type information can still be guessed as strings. - The level defaults to 1. -- Implement string guesser for the data section. - - Controlled by the API `GlobalConfig.DATA_STRING_GUESSER_LEVEL` or via the - CLI `--data-string-guesser level`. - - Decodes strings with the `ASCII` encoding by default. - - The meaning of each level are the same as the rodata string guesser. - - The level defaults to 2. -- Add experimental Pascal string guesser. - - Works for both rodata and data sections. - - Follows the same level logic as the C string guesser. - - It is disabled by default. - Start emitting `.size` directives by default. - Emit `jlabel` instead of `dlabel` for jumptable labels by default - Emit `dlabel` instead of `dlabel` for data, rodata and bss symbols by default ## [1.14.3] - 2023-06-19 -### Uncategorized +### Added - Failcheck for non aligned doubles -- (Hopefully) Fix same-vram overlays using symbols from other overlays + +### Changed + - `elfObjDisasm`: Can now disassemble sections with arbitrary names - `disasmdis`: Disable pseudo instructions by default +### Fixed + +- (Hopefully) Fix same-vram overlays using symbols from other overlays + ## [1.14.2] - 2023-06-10 -### Uncategorized +### Changed - Actually add `py.typed` to `pyproject.toml` - Use `bytearray` as little as possible + +### Deprecated + - `writeBytearrayToFile` is now deprecated, use `writeBytesToFile` instead ## [1.14.1] - 2023-06-10 -### Uncategorized +### Added - Emit a previous alignment directive for strings. - Ensures strings are always word aligned -- Purge `.balign` directive in favor of `.align` directive - Add `py.typed` file. Whoops +### Changed + +- Purge `.balign` directive in favor of `.align` directive + ## [1.14.0] - 2023-05-10 -### Uncategorized +### Added + +- Implement `--dyn-syms` on readelf-like mode. + +### Changed -- Try to support better N32 PIC programs. +- Improve a bit support for N32 PIC programs. - The current issue was spimdisasm was not able to properly generate symbol references for `$gp` accesses. - GOT table now gets its own address from the reginfo instead of the dynamic table. - Accesses pointing outside the GOT table are tried to be redirected to `sdata`, `srdata` and `sbss` sections. -- Implement `--dyn-syms` on readelf-like mode. - Minor improvements to readelf output format ## [1.13.3] - 2023-05-05 -### Uncategorized +### Fixed - Fix not writing to subfolders properly when a csv filesplit entry has a slash on its name. ## [1.13.2] - 2023-05-01 -### Uncategorized +### Added - Add support for `.dummy` section in csv file split format - Add readelf's `--section-headers` flag to elfObjDisasm @@ -226,319 +283,411 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Uncategorized -- Fix a possible `None` case in `getInstrCategoryFromStr` +### Added + - Add note about R3000GTE and R5900 instruction set support in the README +### Fixed + +- Fix a possible `None` case in `getInstrCategoryFromStr` + ## [1.13.0] - 2023-04-30 -### Uncategorized +### Added - Add support for R3000GTE ## [1.12.5] - 2023-04-28 -### Uncategorized +### Added -- Fix jumptable end detection algorithm on vram ranges different than `0x80XXXXXX` - Add `--function-info` flag to `elfObjDisasm` -- Option for emitting size directives in the generated assembly -- Add `--asm-emit-size-directive` flag to emit size directives on generated assembly +- Option for emitting `.size`` directives in the generated assembly + - Add `--asm-emit-size-directive` flag to emit size directives on generated assembly + +### Fixed + +- Fix jumptable end detection algorithm on vram ranges different than `0x80XXXXXX` ## [1.12.4] - 2023-04-19 -### Uncategorized +### Fixed - Fix user-declared relocs having an incorrect addend ## [1.12.3] - 2023-04-18 -### Uncategorized +### Fixed - Fix conflicting `-V` flag ## [1.12.2] - 2023-04-18 -### Uncategorized +### Added + +- Add `--version` flag to every cli tool +- Properly detect `-mips1` `double` literals + - Fixes issue #57 + +### Changed + +- Emit a comment saying if a reloc is a global one when the emit relocs flag is passed + +### Fixed - Fix data symbols not using local reloc overrides - Fix `.word`s not being updated after clearing pointers - Some pointer clearing fixes -- Add `--version` flag to every cli tool - Fix data not being properly disassembled on `singleFileDisasm` - Enforce UTF-8 encoding on generated asm files (PR #111) - Thanks to @1superchip -- Emit a comment saying if a reloc is a global one when the emit relocs flag is passed - Fix incorrect addends on non static symbols from elf files. - Fixes issue #110 - Fix a regression where some `%lo` symbols weren't being properly paired because of the `%got` being reused on PIC code - Fix sizes for inferred types -- Properly detect `-mips1` `double` literals - - Fixes issue #57 ## [1.12.1] - 2023-03-28 -### Uncategorized +### Fixed - Fix addends bigger than `0x7FFF` and smaller than `0x10000` ## [1.12.0] - 2023-03-21 -### Uncategorized +### Added + +- Expose known types to spimdisasm via `common.gKnownTypes` + +### Changed + +- Improve handling `static` (local) symbols for non relocated elf object files +- Fake/non used symbols are not longer emitted when disassembling elf `.o` files + +### Fixed -- Now exposes known types to spimdisasm via `common.gKnownTypes` - Prevents referencing labels and jumptable labels with addends - Prevents referencing labels and jumptable labels in non jumptable symbols -- `static` (local) symbol handling of non relocated elf object files were improved -- Fake/non used symbols are not longer being emitted when disassembling elf .o files ## [1.11.6] - 2023-03-10 -### Uncategorized +### Added - Add flag to specify instruction category in `elfObjDisasm` and `singleFileDisasm` -- Remove `ContextSymbol.type` and add `ContextSymbol.userDeclaredType` and `ContextSymbol.autodetectedType` - - A property named `.type` is available to provide backwards compatibility +- Add `ContextSymbol.userDeclaredType` and `ContextSymbol.autodetectedType` + +### Deprecated + +- Deprecate `ContextSymbol.type` + - `.type` is kept as a property to provide backwards compatibility ## [1.11.5] - 2023-03-07 -### Uncategorized +### Changed - Sort detected file boundaries and remove duplicates ## [1.11.4] - 2023-02-20 -### Uncategorized +### Changed -- Fix `--data-start` not processing hex correctly - Add function vrom to `--function-info` and tweak its input a bit +### Fixed + +- Fix `--data-start` not processing hex correctly + ## [1.11.3] - 2023-02-15 -### Uncategorized +### Added - Allow specifying a custom suffix to every autogenerated symbol with `--custom-suffix` - Add "referenced functions" information to the `--function-info` flag ## [1.11.2] - 2023-02-13 -### Uncategorized +### Added - Add flag to emit inline relocs +- Add `ContextSymbol.userDeclaredSize` +- Add `--function-info` flag + +### Changed + - Do not report extra padding in functions if user declared size matches the size of the function -- Rename `ContextSymbol.size` to `ContextSymbol.userDeclaredSize` -- Add `--function-info` flag - `FuncRodataEntry`: Fix migrate parameter if function has no rodata to be migrated +### Deprecated + +- Deprecate `ContextSymbol.size` + - It is kept as a property that wraps `ContextSymbol.userDeclaredSize` + ## [1.11.1] - 2023-01-30 -### Uncategorized +### Changed - Allow `None` in `FunctionRodataEntry` methods - `FuncRodataEntry`: Do not write `.section .text` if the function is `None` ## [1.11.0] - 2023-01-30 -### Uncategorized +### Added -- CLI changes: - - Install CLI tools as actual terminal programs - - Allow invoking the CLI tools from spimdisasm as subparsers - - The old way of invoking the CLI tools (`python3 -m spimdisasm.clitool`) is - now deprecated, but still works -- `disasmdis`: Fix crash if the input isn't a multiple of a word -- Report with a comment which instruction made spimdisasm detected as a - handwritten instruction +- Install CLI tools as actual terminal programs +- Allow invoking the CLI tools from spimdisasm as subparsers +- Report, with a comment, which instruction made spimdisasm detect a function as + handwritten function - New in the API: `FunctionRodataEntry` - Cleaner interface for rodata migration and similar functions - Provides method for intermixing functions and non-migrated rodata symbols in a way the correct order is still preserved - - Old functions from `FileHandlers` which provided rodata migration - functionalities are now deprecated + +### Deprecated + +- The old way of invoking the CLI tools (`python3 -m spimdisasm.clitool`) is now + deprecated, but still works +- Old functions from `FileHandlers` which provided rodata migration + functionalities are now deprecated in favour of the new `FunctionRodataEntry`. + +### Fixed + +- `disasmdis`: Fix crash if the input isn't a multiple of a word ## [1.10.6] - 2023-01-28 -### Uncategorized +### Added -- Fix some `.text` boundaries not being properly detected. - Add hardware registers as constants so they are used by `lui`/`ori` pairs - Check for bss symbol size to match user declared size -- Warn if the globalsegment vrom start and end is the same + - If the size doesn't match then a warning is printed to `stderr` +- Warn if the globalsegment's vrom start and end is the same - Identify 32bitsmode elf flag + +### Changed + - Avoid reporting leading zeroes as padding in rodata symbols if the size of the symbol matches the user declared one +### Fixed + +- Fix some `.text` boundaries not being properly detected. + ## [1.10.5] - 2023-01-28 -### Uncategorized +### Added - Emit a comment on invalid instructions disassembled as words + +### Removed + - Remove redundant `.noreorder` + +### Fixed + - Fix `disasmdis` ignoring endian parameter ## [1.10.4] - 2023-01-20 -### Uncategorized +### Added -- Avoid trashing function analysis for `j` jumps outside of the function - Add `EGCS` compiler -- `nop`s at the beginning of the files are now skipped. -- Fix `disasmdis` not properly accepting spaces - Add iQue-specific libultra syms and hardware regs - Add `--data-start` and `--data-end` flags to `singleFileDisasm` +### Changed + +- `nop`s at the beginning of the files are now skipped. + +### Fixed + +- Avoid trashing function analysis for `j` jumps outside of the function +- Fix `disasmdis` not properly accepting spaces + ## [1.10.3] - 2023-01-08 -### Uncategorized +### Fixed - Fix OoB for automatic type-based naming ## [1.10.2] - 2023-01-08 -### Uncategorized +### Fixed - Fix a small typo on `osAppNMIBuffer` ## [1.10.1] - 2023-01-05 -### Uncategorized +### Added -- Adds a workaround for addends which does not fit on a 16 bits value +- Add a workaround for addends which does not fit on a 16 bits value ## [1.10.0] - 2023-01-05 -### Uncategorized +### Added -- Rework system to allow/disallow addend references on data - Add support for splat's symbol_addrs format for standalone invocations +### Changed + +- Rework system to allow/disallow addend references on data + ## [1.9.2] - 2023-01-02 -### Uncategorized +### Added -- Fix emitting `.align` directives on unnaligned jumptables -- Fix rodata split detection not properly considering special jumptable alignment - Add `nameEnd` member to `ContextSymbol` to allow emitting a closing user-declared label +### Fixed + +- Fix emitting `.align` directives on unnaligned jumptables +- Fix rodata split detection not properly considering special jumptable alignment + ## [1.9.1] - 2022-12-29 -### Uncategorized +### Changed - Emit a `.align 3` directive for every jumptable on non-IDO compilers ## [1.9.0] - 2022-12-28 -### Uncategorized +### Added -- Reloc system re-worked. Users can now provide their own relocs to improve the - automatic disassembly -- loPatch system has been removed and superseded by the global reloc system - `GlobalConfig` variables can now be set via environment variables. - Parameters passed by cli take priority over environment variables. - Options configured via code (when using this as a library) take priority over environment variables. +### Changed + +- Reloc system re-worked. Users can now provide their own relocs to improve the + automatic disassembly + +### Removed + +- loPatch system has been removed and superseded by the global reloc system + ## [1.8.2] - 2022-12-19 -### Uncategorized +### Added - Check for banned symbols on addends references ## [1.8.1] - 2022-12-19 -### Uncategorized +### Added - New interface for allowing banning ranges of symbols, instead of having to add them one by one ## [1.8.0] - 2022-12-16 -### Uncategorized +### Added -- Require [`rabbitizer` 1.4.0](https://github.com/Decompollaborate/rabbitizer/releases/tag/1.4.0) - Allow to type-hint strings with `asciz` - Allow disassembling `.data` symbols as strings - This won't be automatically guessed as with `.rodata`, this only will happen with type-hints + +### Changed + +- Require [`rabbitizer` 1.4.0](https://github.com/Decompollaborate/rabbitizer/releases/tag/1.4.0) - `disasmdis` now accepts spaces and input from `stdin` -Meta: +### Removed - `setup.cfg` was removed and all its info was moved to `pyproject.toml` ## [1.7.12] - 2022-12-05 -### Uncategorized +### Added -- Allow symbol references on rodata (for non jump-tables) - Output version on disassembled files - Add option to show which symbols reference the disassembled symbol - Add `--file-splits` option to `elfObjDisasm` +### Changed + +- Allow symbol references on rodata (for non jump-tables) + ## [1.7.11] - 2022-11-29 -### Uncategorized +### Changed -Check size of floats and doubles before migrating them +- Check size of floats and doubles before migrating them ## [1.7.10] - 2022-11-26 -### Uncategorized +### Added - Allow changing the label used for jumptables labels with `GlobalConfig.ASM_JTBL_LABEL` -- Allow forcing (and forcing not to) migrate a symbol on rodata migration +- Allow forcing (and forcing not to) migrate a symbol on rodata migration +- Show `isAutogeneratedPad` in the context file (#79) + - Thanks @simonlindholm +- Show the first `%lo` reference for each symbol in the context (#80) + - Thanks @simonlindholm -- Elf fixes: - - Reference `NOTYPE` symbols - - Various GOT fixes - - Show isAutogeneratedPad in the context file #79 - - Show the first %lo reference for each symbol in the context #80 +### Fixed + +- Reference `NOTYPE` symbols +- Various GOT fixes ## [1.7.9] - 2022-11-09 -### Uncategorized +### Added -- Fix an OoB issue when trying to post-process the GOT analyzis on non-PIC mode - Emit a comment for automatically generated bss pads. - Those pads are created mainly to properly adjust the `.space` of a bss symbol if said symbol had an user-declared size +### Fixed + +- Fix an OoB issue when trying to post-process the GOT analyzis on non-PIC mode + ## [1.7.8] - 2022-11-04 -### Uncategorized +### Changed + +- Improve logic to disassemble `.byte`s and `.short`s +- Improve logic to find the jumptable ends (again) + +### Fixed - Fix function pointers being incorrectly tagged as `%call16` instead of `%got` - Avoid crashing when trying to migrate functions when there's no rodata section -- Improve logic to disassemble `.byte`s and `.short`s - Fix wrong migrated rodata on PIC programs - Avoid using addends on function references -- Improve logic to find the jumptable ends (again) ## [1.7.7] - 2022-11-02 -### Uncategorized +### Added + +- Allow disassembling data symbols as floats and doubles + +### Changed - Improve detection of the end of jumptables - Refactor REL handling. It has been simplified - This should improve disassembling `.o` files - Symbols from elfs are checked to be in the correct vram range before adding them to the context. -- Allow disassembling data symbols as floats and doubles ## [1.7.6] - 2022-10-31 -### Uncategorized +### Added + +- Add special handling for the GOT lazy resolver + +### Changed - Refactor GOT handling - Should fix IDO 5.3 disassembly -- Use glabels for jumptable labels when the functions are not being migrated +- Use `glabel`s for jumptable labels when the functions are not being migrated - Support `MIPS_GOT_HI16`, `MIPS_GOT_LO16`, `MIPS_CALL_HI16` and `MIPS_CALL_LO16` reloc types from `.rel` elf sections - `disasmdis` now ignores non hex characters - Negative addresses are considered as GOT accesses in PIC mode -- Add special handling for the GOT lazy resolver ## [1.7.5] - 2022-10-30 From 3309f938ae916fa211feff5c919965aff46161a6 Mon Sep 17 00:00:00 2001 From: angie Date: Thu, 21 Sep 2023 16:37:06 -0300 Subject: [PATCH 04/14] Finish categorizing changes on the CHANGELOG --- CHANGELOG.md | 318 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 193 insertions(+), 125 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f0d04ca..8cf0d2ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -691,28 +691,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [1.7.5] - 2022-10-30 -### Uncategorized +### Changed - Use `.gpword` on PIC jumptables -- Fix showing the got table in a few niche cases - Use `glabel` on migrated rodata again until we figure out why it messes matching - Do not use `glabel` on jumptable labels -## [1.7.4] - 2022-10-28 - -1.7.4: GOT fixes +### Fixed -### Uncategorized +- Fix showing the got table in a few niche cases -GOT fixes: +## [1.7.4] - 2022-10-28 -- Fix using GOT local addresses as functions -- Fix `.data` symbols incorrectly referencing GOT local addresses -- Fix `elfObjDisasm` using N64 specific symbols by default -- Migrated rodata will no longer use glabels - - This change was made to accommodate GOT global/local references +1.7.4: GOT fixes -New features: +### Added - The autodetected size of functions can now be queried from a `ContextSymbol` with `getSize()` @@ -724,7 +717,7 @@ New features: - This can be useful to bootstrap newly generated C files so the rodata order is easier to preserve -Misc changes: +### Changed - `elfObjDisasm` now display progress to stdout when disassembling - This behavior can be disabled with `-q` @@ -733,25 +726,39 @@ Misc changes: reference in the whole codebase must reference the symbol" - Seems to be a common pattern on PIC programs compiled with IDO +### Fixed + +- Fix using GOT local addresses as functions +- Fix `.data` symbols incorrectly referencing GOT local addresses +- Fix `elfObjDisasm` using N64 specific symbols by default +- Migrated rodata will no longer use glabels + - This change was made to accommodate GOT global/local references + ## [1.7.3] - 2022-10-24 1.7.3: `--aggressive-string-guesser` -### Uncategorized +### Added -- Do not infer the type of a variable if the access types are heterogeneous, - which may imply a struct - New `--aggressive-string-guesser` flag - Tries to decode string even if the string is empty, the symbol may have type information or it is referenced more than once - Add `PSYQ` compiler option - Currently it enables the same options as `SN64` +### Changed + +- Do not try to infer the type of a variable if the access types are heterogeneous, + which may imply a struct + ## [1.7.2] - 2022-10-24 -### Uncategorized +### Added - Allow passing context flags to `elfObjDisasm` + +### Fixed + - Fix type inference if the user declared a type for the variable - Fix some strings not being properly detected on elf files (again) @@ -759,56 +766,67 @@ Misc changes: 1.7.1: elf fixing: got and dynamic programs -### Uncategorized +### Added + +- `elfObjDisasm`: Flags which try to mimic `readelf`: `--file-header`, `--syms`, + `--relocs` and `-display-got` +- `elfObjDisasm`: Add `--split-functions`. It has the same behavior as `singleFileDisasm` +- `elfObjDisasm`: Warn when trying to disassemble an `abi2` (N32) elf +- `elfObjDisasm`: Warn for negative GOT accesses instead of crashing +- `elfObjDisasm`: Warn if unhandled flags are found in an elf file -- New flags in `elfObjDisasm`: - - Flags which try to mimic `readelf`: `--file-header`, `--syms`, `--relocs` - and `-display-got` - - `--split-functions`, has the same behavior as `singleFileDisasm` -- `elfObjDisasm` changes: - - Fix undefined symbols handling in - - Fix addends of got global symbols - - Use rel types types from the elf file if they are available instead of - trying to infer them - - Warn when trying to disassemble an `abi2` (N32) elf - - Warn for negative GOT accesses instead of crashing - - Warn if unhandled flags are found in an elf file - - Fix gp value on N32 abi - - `$gp` access are no longer symbolized if the address is not found in the got - table +### Fixed + +- `elfObjDisasm`. Fix undefined symbols handling in +- `elfObjDisasm`. Fix addends of got global symbols +- `elfObjDisasm`. Use rel types types from the elf file if they are available + instead of trying to infer them +- `elfObjDisasm`: Fix gp value detection on N32 abi +- `elfObjDisasm`: `$gp` access are no longer symbolized if the address is not + found in the got table - Fix rodata pointer detection in data on elf files (fixes #63) ## [1.7.0] - 2022-10-18 -### Uncategorized +### Added - Add `leoBootID` to libultra syms -- Fix `--help` screen. whoops - `--no-emit-cpload` flag to disable emitting the `.cpload` directive in PIC programs - `_gp_disp` is emitted instead of the raw immediate values - The `_gp_disp` value is emitted as a comment + +### Changed + - Updates `rabbitizer` requirement to 1.3.1 -- Fix `.byte` and `.short` in little endian - - Thanks @Xeeynamo (#62) -- Fix boundaries detection reporting in `elfObjDisasm` - - The output of this report is csv-friendly. Thanks @EllipticEllipsis (#65) - Tweak string disassembly - If a symbol is in the middle of a string then the string is aborted and disassembled as `.word`s - Check the next bytes (until a word boundary) after the nul terminator of a string are zero. Thanks @Xeeynamo (#64) - Strings with '\a' are no longer treated as real strings -- Fix labels not being emitted in rodata if they were not being word-aligned. - - Fixes #59 -- Remove `GlobalConfig.ADD_NEW_SYMBOLS` - Avoid pairing `%gp_got` symbols on non PIC code - General cleanups +### Removed + +- Remove `GlobalConfig.ADD_NEW_SYMBOLS` + +### Fixed + +- Fix `--help` screen. whoops +- Fix `.byte` and `.short` in little endian (#62) + - Thanks @Xeeynamo +- Fix boundaries detection reporting in `elfObjDisasm` (#65) + - The output of this report is csv-friendly. + - Thanks @EllipticEllipsis +- Fix labels not being emitted in rodata if they were not being word-aligned. + - Fixes #59 + ## [1.6.5] - 2022-10-07 1.6.5: elf fixes -### Uncategorized +### Fixed - Avoid crashing if a `%got` access' address is not in the global table. Prints a warning instead. @@ -818,15 +836,17 @@ Misc changes: ## [1.6.4] - 2022-10-06 - 1.6.4: Fix symtab parsing from relocated elfs +1.6.4: Fix symtab parsing from relocated elfs -### Uncategorized +### Fixed + +- Fix symtab parsing from relocated elfs ## [1.6.3] - 2022-10-04 1.6.3: Speedup for overlay disassembly -### Uncategorized +### Changed - Moves the `globalSegment` check above all the other checks in `getSymbol`, providing a faster lookup since most of the time overlays usually reference a @@ -837,29 +857,31 @@ Misc changes: 1.6.2: hotfix -### Uncategorized +### Fixed -Should address issue #55 +- Fix unhashable type crash + - Fixes #55 ## [1.6.1] - 2022-10-03 1.6.1: SN64 strikes again -### Uncategorized +### Added - Add align directive for doubles for SN64 - Track which functions references each symbol - Used to improve rodata migration on non-IDO compilers + +### Changed + - Use `g` format specifier when formatting disassembled floats and doubles ## [1.6.0] - 2022-10-01 1.6.0: Refactor front-end scripts -### Uncategorized +### Changed -- Deleted `singleFileDisasm.py`, `simpleFileDisasm.py`, `disasmdis.py`, - `rspDisasm.py`, `elfObjDisasm.py` - The frontend scripts were converted to submodules, now those can be executed with the `python3 -m spimdisasm.submodulename` syntax, allowing to execute them even in the pip installations of spimdisasm. @@ -870,11 +892,16 @@ Should address issue #55 - Migrate const variables to functions on SN64 - Remove `.rdata` check on migrated rodata for SN64 +### Removed + +- Deleted `singleFileDisasm.py`, `simpleFileDisasm.py`, `disasmdis.py`, + `rspDisasm.py`, `elfObjDisasm.py` + ## [1.5.7] - 2022-09-30 1.5.7: Fix 0 size bss variables -### Uncategorized +### Fixed - Fixes an issue where 0 size bss variables where being outputted if the last bss variable of a file had a size which filled its size up until the file boundary @@ -883,22 +910,29 @@ Should address issue #55 1.5.6: More SN64 tweaks and data/rodata endianess -### Uncategorized +### Added -- Fixes data analyzis. It was ignoring banned symbols -- Use `.rdata` on rodata migration for SN64 -- Fix `ASM_DATA_SYM_AS_LABEL` on bss generation -- Fix rodata symbols searching during migration - Add option to disassemble data/rodata with different endianess than the global one - Allow changing the string encoding per rodata segment + +### Changed + +- Use `.rdata` on rodata migration for SN64 + +### Fixed + +- Fixes data analyzis. + - It was ignoring banned symbols +- Fix `ASM_DATA_SYM_AS_LABEL` on bss generation +- Fix rodata symbols searching during migration - Return the created segment by `addOverlaySegment` ## [1.5.5] - 2022-09-24 1.5.5: Fix `ASM_DATA_SYM_AS_LABEL` -### Uncategorized +### Fixed - Fix `ASM_DATA_SYM_AS_LABEL`: It was outputing the data label @@ -906,20 +940,26 @@ Should address issue #55 1.5.4: More SN64 tweaks -### Uncategorized +### Added - `ASM_DATA_SYM_AS_LABEL`: Allow adding a data symbol as a simple label. -- `late_rodata` logic has been tweaked to only be applied when compiler is set - to IDO - Allow range checks on symbols referenced by data symbols - Add option to allow all addends referenced by data symbols + +### Changed + +- `late_rodata` logic has been tweaked to only be applied when compiler is set + to IDO + +### Fixed + - Fix use of user-declared sizes on functions and bss symbols ## [1.5.3] - 2022-09-23 1.5.3: More default banned symbols -### Uncategorized +### Changed - Add `0x7FFFFFFF` to the list of default banned symbols @@ -927,7 +967,7 @@ Should address issue #55 1.5.2: elf endianess fixes and rodata migration fixes -### Uncategorized +### Changed - Read endianess from elf file - Use `.section` directive on migrated rodata @@ -936,7 +976,7 @@ Should address issue #55 1.5.1: symbol detection fixes -### Uncategorized +### Changed - Check for data pointers in data itself and do a recheck in case the pointer is in the same section but behind the current symbol @@ -946,88 +986,94 @@ Should address issue #55 1.5.0: dynamic elfs -### Uncategorized +### Added - Add compatibility for dynamic elf files - `.dynsym`, `.dynstr` and `.dynamic` section parsing - `.got` table parsing - Use `%got` and `%call16` syntax for `$gp` relative symbols. - `.cpload REG` detection + +### Changed + - Minor changes on string splitting functions to improve its use as an api ## [1.4.2] - 2022-09-09 1.4.2: PS2 addresses hotfix -### Uncategorized +### Fixed -Fix 0 and negative addresses being treated as real symbols +- Fix 0 and negative addresses being treated as real symbols ## [1.4.1] - 2022-09-03 1.4.1: Overlay function start bugfix -### Uncategorized +### Fixed -Fixes a bug in which a function start was wrongly detected on an overlay because -the symbol existed for said address on the global segment +- Fixes a bug in which a function start was wrongly detected on an overlay because + the symbol existed for said address on the global segment ## [1.4.0] - 2022-08-27 1.4.0: RSP and R5900 support -### Uncategorized +### Added - Adds proper support for N64's RSP - Allows passing any `rabbitizer.InstrCategory` to SectionText to allow using - any instruction set supported by rabbitizer + any instruction set supported by `rabbitizer` ## [1.3.0] - 2022-07-08 1.3.0: rabbitizer 1.0.0 -### Uncategorized +### Added -- Updated to use rabbitizer 1.0.0 - Rodata boundaries detection +### Changed + +- Updated to use rabbitizer 1.0.0 + ## [1.2.4] - 2022-07-05 -### Uncategorized +### Changed -- Explicitly require a `rabbitizer` version minor than the next major version to +- Explicitly require a `rabbitizer` version smaller than the next major version to avoid possible compatibility issues. -- Set up automatic discovery on setup.cfg +- Set up automatic discovery on `setup.cfg` ## [1.2.3] - 2022-06-11 1.2.3: Fix Python 3.7 compatibility (again) -### Uncategorized +### Fixed -This time really fixes 3.7 compat, hopefully +- This time really fixes 3.7 compat, hopefully ## [1.2.2] - 2022-06-11 1.2.2: %hi/%lo symbol filtering fix -### Uncategorized +### Fixed -Prevents filtering out LUI/ADDIU combos from being real symbols +- Prevents filtering out `lui`/`addiu` combos from being real symbols ## [1.2.1] - 2022-06-10 1.2.1: Packaging fix -### Uncategorized +### Fixed -Hopefully fixes a packaging problem on PyPi +- Hopefully fixes a packaging problem on PyPi ## [1.2.0] - 2022-06-10 1.2.0: Faster disassembly from rabbitizer -### Uncategorized +### Changed - Use [rabbitizer](https://pypi.org/project/rabbitizer/) as the instruction decoder to speed up instruction analysis and disassembly @@ -1035,7 +1081,7 @@ Hopefully fixes a packaging problem on PyPi ## [1.1.7] - 2022-06-08 -### Uncategorized +### Changed - Allow changing the file offset width comment - `SortedDict`: New class which abstracts away the logic for keeping a sorted dictionary @@ -1044,16 +1090,19 @@ Hopefully fixes a packaging problem on PyPi 1.1.6: %hi reuse fix -### Uncategorized +### Fixed + +- Fix `%hi` reuse with wrong values ## [1.1.5] - 2022-06-03 1.1.5: More overlay fixes -### Uncategorized +### Changed - Move most of the instruction analyzing code to a new independent class -- Try to track instructions which are likely to not be a %lo and avoid pairing it. +- Try to track instructions which are likely to not be a `%lo` and avoid pairing + it. - Extra checks to avoid using labels from other overlay segments - Only stop a look ahead search until we find either an unconditional branch or a `jr` instruction @@ -1062,7 +1111,7 @@ Hopefully fixes a packaging problem on PyPi 1.1.4: Overlay handling fixes -### Uncategorized +### Changed - Overall adds more vrom checks for handling shared-vram overlays. - The autogenerated symbol size now adjusts itself considering the symbol type @@ -1070,115 +1119,134 @@ Hopefully fixes a packaging problem on PyPi ## [1.1.3] - 2022-06-01 - 1.1.3: Minor patch +1.1.3: Minor patch -### Uncategorized +### Added -Adds a way to register a name getter callback for symbols +- Adds a way to register a name getter callback for symbols ## [1.1.2] - 2022-06-01 1.1.2: More symbol finding cleanups -### Uncategorized +### Added -- General cleanups and fixes related to symbol finding. - New `COMPILER` option in `GlobalConfig`, used to enable some compiler specific tweaks +### Fixed + +- General cleanups and fixes related to symbol finding. + ## [1.1.1] - 2022-05-30 1.1.1: Symbol finding fixes -### Uncategorized - -Various fixes: +### Fixed - Multiples workarounds for pairing multiples %hi to the same %lo -- Fix `J` target calculation for the look ahead symbol finder -- Special cases for `LUI`s on delay slots -- Track moving registers by using `MOVE`, `OR` and `ADDU` +- Fix `j` target calculation for the look ahead symbol finder +- Special cases for `lui`s on delay slots +- Track moving registers by using `move`, `or` and `addu` - Invalidate some registers after function calls - Check negative branches - Fix jump table detector for SN64 ## [1.1.0] - 2022-05-29 - 1.1.0: Overlay support (hopefully) +1.1.0: Overlay support (hopefully) -### Uncategorized +### Added - Add support for overlays which share VRAMs and overlays which may communicate to overlays from other categories. - Each new category and overlay segment must be registered in `Context` -- `ElementBase#vram` is now just an `int` as opposed to old `int|None`. - The `vrom` of elements is tracked and stored. It is mainly used to autogenerate overlay's names. -- The differentiation in symbol categories (labels, functions, symbols, etc) is +- The categorization in symbol categories (labels, functions, symbols, etc) is removed. Everything now is stored in only one big dictionary to avoid duplication and lower memory usage. -- Remove the concept of "fake functions" + +### Changed + +- `ElementBase#vram` is now just an `int` as opposed to old `int|None`. - `addSymbol`, `getSymbol` and similar methods were moved to `ElementBase` because of the need for the respective element overlay info. - Symbol name generation is delayed as much as possible and autogenerated on the fly. -- Some minor symbol finder fixes -- Bugfix symbols disappearing for no reason - General cleanups +### Removed + +- Remove the concept of "fake functions" + +### Fixed + +- Some minor symbol finder fixes +- Gix symbols disappearing for no reason + ## [1.0.6] - 2022-05-26 1.0.6: Even more SN64 fixes -### Uncategorized +### Added -- Add more checks for J as unconditional branch -- Allow disabling %hi/%lo syntax for constants +- Add more checks for `j` as unconditional branch +- Allow disabling `%hi`/`%lo` syntax for constants - Add extra global label for other symbols (usually labels) in functions when `GlobalConfig.ASM_TEXT_FUNC_AS_LABEL` is enabled. ## [1.0.5] - 2022-05-26 -### Uncategorized +### Added -- Allow setting a .ent and the function name as a label +- Allow setting a `.ent` and the function name as a label - Float register `$31` fix for `NAMED_REGISTERS=False` -- Fix CI builds (2) + +### Changed + - Only apply the SN64 DIV fix for non handwritten functions - Finer control over pseudo instructions +### Fixed + +- Fix CI builds (2) + ## [1.0.4] - 2022-05-26 -### Uncategorized +### Fixed -Fix the CI wheel +- Fix the CI wheel ## [1.0.3] - 2022-05-26 -### Uncategorized +### Changed -Add CI which builds a wheel and publishes it to PyPI +- Add CI which builds a wheel and publishes it to PyPI ## [1.0.2] - 2022-05-26 -### Uncategorized +### Added + +- Allow changing the line ends to anything via `GlobalConfig.LINE_ENDS` + +### Fixed - Fix missing `f` on float registers when `GlobalConfig.NAMED_REGISTERS` was set to `False` -- Allow changing the line ends to anything via `GlobalConfig.LINE_ENDS` ## [1.0.1] - 2022-05-26 -### Uncategorized - -Changes: +### Changed - Improve RAM usage by removing an almost unused dictionary from the Instruction classes ## [1.0.0] - 2022-05-26 -### Uncategorized +### Added + +- Version 1.0.0 [unreleased]: https://github.com/Decompollaborate/spimdisasm/compare/master...develop [1.17.3]: https://github.com/Decompollaborate/spimdisasm/compare/1.17.2...1.17.3 From 08c4ea9e913dcfb34f7428a8cb328e9615ca6872 Mon Sep 17 00:00:00 2001 From: angie Date: Thu, 21 Sep 2023 16:41:09 -0300 Subject: [PATCH 05/14] Add yanked versions to changelog --- CHANGELOG.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8cf0d2ea..5f777bb7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -862,7 +862,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fix unhashable type crash - Fixes #55 -## [1.6.1] - 2022-10-03 +## [1.6.1] - 2022-10-03 [YANKED] 1.6.1: SN64 strikes again @@ -1069,7 +1069,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Hopefully fixes a packaging problem on PyPi -## [1.2.0] - 2022-06-10 +## [1.2.0] - 2022-06-10 [YANKED] 1.2.0: Faster disassembly from rabbitizer @@ -1212,13 +1212,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fix CI builds (2) -## [1.0.4] - 2022-05-26 +## [1.0.4] - 2022-05-26 [YANKED] ### Fixed - Fix the CI wheel -## [1.0.3] - 2022-05-26 +## [1.0.3] - 2022-05-26 [YANKED] ### Changed From a818535f4ee46d2a862445dfb4332c5fa75f5f0d Mon Sep 17 00:00:00 2001 From: angie Date: Thu, 21 Sep 2023 16:45:41 -0300 Subject: [PATCH 06/14] Add markdown linter to GHA --- .github/workflows/md_lint.yml | 17 ++++++++++++++++ README.md | 38 ++++++++++++++++++++++++----------- 2 files changed, 43 insertions(+), 12 deletions(-) create mode 100644 .github/workflows/md_lint.yml diff --git a/.github/workflows/md_lint.yml b/.github/workflows/md_lint.yml new file mode 100644 index 00000000..489008e3 --- /dev/null +++ b/.github/workflows/md_lint.yml @@ -0,0 +1,17 @@ +name: Lint markdown files + +# Build on every branch push, tag push, and pull request change: +on: [push, pull_request] + +jobs: + checks: + runs-on: ubuntu-latest + name: Lint md files + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Lint markdown files + uses: articulate/actions-markdownlint@v1.1.0 + with: + config: .markdownlint.jsonc diff --git a/README.md b/README.md index 7ae8ad87..776f477b 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,8 @@ A matching MIPS disassembler API and front-ends with built-in instruction analys Currently supports all the CPU instructions for MIPS I, II, III and IV. -Mainly focused on supporting Nintendo 64 binaries, but it should work with other MIPS platforms too. +Mainly focused on supporting Nintendo 64 binaries, but it should work with other +MIPS platforms too. ## Installing @@ -20,21 +21,25 @@ The recommended way to install is using from the PyPi release, via `pip`: pip install spimdisasm ``` -In case you want to mess with the latest development version without wanting to clone the repository, then you could use the following command: +In case you want to mess with the latest development version without wanting to +clone the repository, then you could use the following command: ```bash pip uninstall spimdisasm pip install git+https://github.com/Decompollaborate/spimdisasm.git@develop ``` -NOTE: Installing the development version is not recommended. Proceed at your own risk. +NOTE: Installing the development version is not recommended. Proceed at your own +risk. ## Features - Produces matching assembly. - Supports `.text`, `.data`, `.rodata` and `.bss` disassembly. - - The reloc section from Zelda 64 and some other games is supported too, but no front-end script uses it yet. -- Generates separated files for each section of a file (`.text`, `.data`, `.rodata` and `.bss`). + - The reloc section from Zelda 64 and some other games is supported too, but + no front-end script uses it yet. +- Generates separated files for each section of a file (`.text`, `.data`, + `.rodata` and `.bss`). - Supports multiple files spliting from a single input binary. - Automatic function detection. - Can detect if a function is handwritten too. @@ -45,7 +50,9 @@ NOTE: Installing the development version is not recommended. Proceed at your own - String detection with medium to high success rate. - Allows to set user-defined function and symbol names. - Big, little and middle endian support. -- Autogenerated symbols can be named after the section they come from (`RO_` and `B_` for `.rodata` and `.bss` sections) or its type (`STR_`, `FLT_` and `DBL_` for string, floats and doubles respectively). +- Autogenerated symbols can be named after the section they come from (`RO_` and + `B_` for `.rodata` and `.bss` sections) or its type (`STR_`, `FLT_` and `DBL_` + for string, floats and doubles respectively). - Simple file boundary detection. - Detects boundaries on .text and .rodata sections - Lots of features can be turned on and off. @@ -59,28 +66,35 @@ NOTE: Installing the development version is not recommended. Proceed at your own - `div`/`divu` fix: tweaks a bit the produced `div`, `divu` and `break` instructions. - Support for specific MIPS instruction sets: - N64's RSP instruction disassembly support. - - RSP decoding has been tested to build back to matching assemblies with [armips](https://github.com/Kingcom/armips/). + - RSP decoding has been tested to build back to matching assemblies with + [armips](https://github.com/Kingcom/armips/). - PS1's R3000 GTE instruction set support. - PS2's R5900 EE instruction set support. - (Experimental) Same VRAM overlay support. - - Overlays which are able to reference symbols from other overlays in other categories/types is supported too. + - Overlays which are able to reference symbols from other overlays in other + categories/types is supported too. - NOTE: This feature lacks lots of testing and probably has many bugs. ## How to use -This repo can be used either by using the existing front-end scripts or by creating new programs on top of the back-end API. +This repo can be used either by using the existing front-end scripts or by +creating new programs on top of the back-end API. ### Front-end Every front-end CLI tool has its own `--help` screen. -The included tool can be executed with either `spimdisasm modulename` (for example `spimdisasm disasmdis --help`) or directly `modulename` (for example `spimdisasm --help`) +The included tool can be executed with either `spimdisasm modulename` (for +example `spimdisasm disasmdis --help`) or directly `modulename` (for example +`spimdisasm --help`) -- `singleFileDisasm`: Allows to disassemble a single binary file, producing matching assembly files. +- `singleFileDisasm`: Allows to disassemble a single binary file, producing + matching assembly files. - `disasmdis`: Disassembles raw hex passed to the CLI as a MIPS instruction. -- `elfObjDisasm`: \[EXPERIMENTAL\] Allows to disassemble elf files. Generated assembly files are not guaranteed to match or even be assemblable. +- `elfObjDisasm`: \[EXPERIMENTAL\] Allows to disassemble elf files. Generated + assembly files are not guaranteed to match or even be assemblable. - `rspDisasm`: Disassemblies RSP binaries. From 96adcd2ed123e62ee3224e1722298dcae023f39d Mon Sep 17 00:00:00 2001 From: angie Date: Thu, 21 Sep 2023 16:53:56 -0300 Subject: [PATCH 07/14] Update readme --- README.md | 68 +++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 776f477b..06b07b01 100644 --- a/README.md +++ b/README.md @@ -13,25 +13,6 @@ Currently supports all the CPU instructions for MIPS I, II, III and IV. Mainly focused on supporting Nintendo 64 binaries, but it should work with other MIPS platforms too. -## Installing - -The recommended way to install is using from the PyPi release, via `pip`: - -```bash -pip install spimdisasm -``` - -In case you want to mess with the latest development version without wanting to -clone the repository, then you could use the following command: - -```bash -pip uninstall spimdisasm -pip install git+https://github.com/Decompollaborate/spimdisasm.git@develop -``` - -NOTE: Installing the development version is not recommended. Proceed at your own -risk. - ## Features - Produces matching assembly. @@ -75,6 +56,55 @@ risk. categories/types is supported too. - NOTE: This feature lacks lots of testing and probably has many bugs. +## Installing + +The recommended way to install is using from the PyPi release, via `pip`: + +```bash +python3 -m pip install -U spimdisasm +``` + +If you use a `requirements.txt` file in your repository, then you can add +this library with the following line: + +```txt +ipl3checksum>=1.17.4,<2.0.0 +`````` + +### Development version + +The unstable development version is located at the [develop](https://github.com/Decompollaborate/spimdisasm/tree/develop) +branch. PRs should be made into that branch instead of the main one. + +The recommended way to install a locally cloned repo is by passing the `-e` +(editable) flag to `pip`. + +```bash +python3 -m pip install -e . +``` + +In case you want to mess with the latest development version without wanting to +clone the repository, then you could use the following command: + +```bash +pip uninstall spimdisasm +pip install git+https://github.com/Decompollaborate/spimdisasm.git@develop +``` + +NOTE: Installing the development version is not recommended unless you know what +you are doing. Proceed at your own risk. + +## Versioning and changelog + +This library follows [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +We try to always keep backwards compatibility, so no breaking changes should +happen until a major release (i.e. jumping from 1.X.X to 2.0.0). + +To see what changed on each release check either the [CHANGELOG.md](CHANGELOG.md) +file or check the [releases page on Github](https://github.com/Decompollaborate/spimdisasm/releases). +You can also use [this link](https://github.com/Decompollaborate/spimdisasm/releases/latest) +to check the latest release. + ## How to use This repo can be used either by using the existing front-end scripts or by From 8aece8a3b001a8d8394588a8c1c1c7e44f6af77f Mon Sep 17 00:00:00 2001 From: angie Date: Wed, 27 Sep 2023 10:08:01 -0300 Subject: [PATCH 08/14] Fix not decoding some valid Japense strings on `decodeBytesToStrings` --- CHANGELOG.md | 8 ++++---- pyproject.toml | 2 +- spimdisasm/__init__.py | 2 +- spimdisasm/common/Utils.py | 25 ++++++++++++++++++++----- 4 files changed, 26 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f777bb7..d20ee442 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add `CHANGELOG.md` +### Fixed + +- Fix not decoding some valid Japense strings on `decodeBytesToStrings` + ## [1.17.3] - 2023-09-18 ### Changed @@ -131,8 +135,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [1.15.4] - 2023-07-14 -### Uncategorized - ### Changed - Avoid taking into account invalid instructions when trying to find function boundaries. @@ -281,8 +283,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [1.13.1] - 2023-04-30 -### Uncategorized - ### Added - Add note about R3000GTE and R5900 instruction set support in the README diff --git a/pyproject.toml b/pyproject.toml index 912383a5..4aa963e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ [project] name = "spimdisasm" # Version should be synced with spimdisasm/__init__.py -version = "1.17.4.dev0" +version = "1.17.4.dev1" description = "MIPS disassembler" # license = "MIT" readme = "README.md" diff --git a/spimdisasm/__init__.py b/spimdisasm/__init__.py index b834f895..2548f2c5 100644 --- a/spimdisasm/__init__.py +++ b/spimdisasm/__init__.py @@ -6,7 +6,7 @@ from __future__ import annotations __version_info__: tuple[int, int, int] = (1, 17, 4) -__version__ = ".".join(map(str, __version_info__)) + ".dev0" +__version__ = ".".join(map(str, __version_info__)) + ".dev1" __author__ = "Decompollaborate" from . import common as common diff --git a/spimdisasm/common/Utils.py b/spimdisasm/common/Utils.py index 617217c8..3bbf1a19 100644 --- a/spimdisasm/common/Utils.py +++ b/spimdisasm/common/Utils.py @@ -254,6 +254,11 @@ def getMaybeBooleyFromMaybeStr(booley: str|None) -> bool|None: 0x8D, } +escapeCharactersMaybeReal = { + 0x8C, + 0x8D, +} + def decodeBytesToStrings(buf: bytes, offset: int, stringEncoding: str, terminator: int=0) -> tuple[list[str], int]: result = [] @@ -263,15 +268,25 @@ def decodeBytesToStrings(buf: bytes, offset: int, stringEncoding: str, terminato char = buf[offset + i] if char in bannedEscapeCharacters: return [], -1 - elif char in escapeCharactersSpecialCases: + + if char in escapeCharactersSpecialCases: + usedChar = False if dst: try: - decoded = rabbitizer.Utils.escapeString(dst.decode(stringEncoding)) + decoded = dst.decode(stringEncoding) except UnicodeDecodeError: - return [], -1 - result.append(decoded) + if char not in escapeCharactersMaybeReal: + return [], -1 + try: + dst.append(char) + usedChar = True + decoded = dst.decode(stringEncoding) + except UnicodeDecodeError: + return [], -1 + result.append(rabbitizer.Utils.escapeString(decoded)) dst.clear() - result.append(f"\\x{char:02X}") + if not usedChar: + result.append(f"\\x{char:02X}") else: dst.append(char) i += 1 From 6ba2f0f905822a8db696fc5567cbe884205eed63 Mon Sep 17 00:00:00 2001 From: angie Date: Wed, 27 Sep 2023 10:10:35 -0300 Subject: [PATCH 09/14] Fix typo in readme --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 06b07b01..0e137637 100644 --- a/README.md +++ b/README.md @@ -68,8 +68,8 @@ If you use a `requirements.txt` file in your repository, then you can add this library with the following line: ```txt -ipl3checksum>=1.17.4,<2.0.0 -`````` +spimdisasm>=1.17.4,<2.0.0 +``` ### Development version From 4a13ef1d0886f3c4bdb1e91d34580fa32f3f228e Mon Sep 17 00:00:00 2001 From: angie Date: Wed, 27 Sep 2023 10:42:13 -0300 Subject: [PATCH 10/14] Avoid breaking down some valid Japenese strings --- CHANGELOG.md | 1 + spimdisasm/common/Utils.py | 7 +++++-- spimdisasm/mips/symbols/MipsSymbolBase.py | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d20ee442..95e9fc88 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - Fix not decoding some valid Japense strings on `decodeBytesToStrings` + - Specifically SHIFT-JIS ones with raw characters `0x8C` and `0x8D` ## [1.17.3] - 2023-09-18 diff --git a/spimdisasm/common/Utils.py b/spimdisasm/common/Utils.py index 3bbf1a19..27d39e74 100644 --- a/spimdisasm/common/Utils.py +++ b/spimdisasm/common/Utils.py @@ -274,7 +274,9 @@ def decodeBytesToStrings(buf: bytes, offset: int, stringEncoding: str, terminato if dst: try: decoded = dst.decode(stringEncoding) + correctFirstdecode = True except UnicodeDecodeError: + correctFirstdecode = False if char not in escapeCharactersMaybeReal: return [], -1 try: @@ -283,8 +285,9 @@ def decodeBytesToStrings(buf: bytes, offset: int, stringEncoding: str, terminato decoded = dst.decode(stringEncoding) except UnicodeDecodeError: return [], -1 - result.append(rabbitizer.Utils.escapeString(decoded)) - dst.clear() + if correctFirstdecode: + result.append(rabbitizer.Utils.escapeString(decoded)) + dst.clear() if not usedChar: result.append(f"\\x{char:02X}") else: diff --git a/spimdisasm/mips/symbols/MipsSymbolBase.py b/spimdisasm/mips/symbols/MipsSymbolBase.py index fa1714da..98cc376a 100644 --- a/spimdisasm/mips/symbols/MipsSymbolBase.py +++ b/spimdisasm/mips/symbols/MipsSymbolBase.py @@ -464,7 +464,7 @@ def getNthWordAsString(self, i: int) -> tuple[str, int]: comment = self.generateAsmLineComment(localOffset) result = f"{comment} " - commentPaddingNum = 22 + commentPaddingNum = 16 + common.GlobalConfig.ASM_COMMENT_OFFSET_WIDTH if not common.GlobalConfig.ASM_COMMENT: commentPaddingNum = 1 From a6f9291cf28db6c937cdb65347abfe3a5d21da0c Mon Sep 17 00:00:00 2001 From: angie Date: Wed, 27 Sep 2023 11:00:35 -0300 Subject: [PATCH 11/14] redo the logic on decodeBytesToStrings a bit --- spimdisasm/common/Utils.py | 32 +++++++++++------------ spimdisasm/mips/symbols/MipsSymbolBase.py | 2 +- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/spimdisasm/common/Utils.py b/spimdisasm/common/Utils.py index 27d39e74..c76291cb 100644 --- a/spimdisasm/common/Utils.py +++ b/spimdisasm/common/Utils.py @@ -269,27 +269,25 @@ def decodeBytesToStrings(buf: bytes, offset: int, stringEncoding: str, terminato if char in bannedEscapeCharacters: return [], -1 - if char in escapeCharactersSpecialCases: - usedChar = False + theEscapeCharacterWasARealChar = False + if char in escapeCharactersMaybeReal: + dst.append(char) + try: + decoded = dst.decode(stringEncoding) + theEscapeCharacterWasARealChar = True + except UnicodeDecodeError: + pass + dst.pop() + + if not theEscapeCharacterWasARealChar and char in escapeCharactersSpecialCases: if dst: try: decoded = dst.decode(stringEncoding) - correctFirstdecode = True except UnicodeDecodeError: - correctFirstdecode = False - if char not in escapeCharactersMaybeReal: - return [], -1 - try: - dst.append(char) - usedChar = True - decoded = dst.decode(stringEncoding) - except UnicodeDecodeError: - return [], -1 - if correctFirstdecode: - result.append(rabbitizer.Utils.escapeString(decoded)) - dst.clear() - if not usedChar: - result.append(f"\\x{char:02X}") + return [], -1 + result.append(rabbitizer.Utils.escapeString(decoded)) + dst.clear() + result.append(f"\\x{char:02X}") else: dst.append(char) i += 1 diff --git a/spimdisasm/mips/symbols/MipsSymbolBase.py b/spimdisasm/mips/symbols/MipsSymbolBase.py index 98cc376a..fa1714da 100644 --- a/spimdisasm/mips/symbols/MipsSymbolBase.py +++ b/spimdisasm/mips/symbols/MipsSymbolBase.py @@ -464,7 +464,7 @@ def getNthWordAsString(self, i: int) -> tuple[str, int]: comment = self.generateAsmLineComment(localOffset) result = f"{comment} " - commentPaddingNum = 16 + common.GlobalConfig.ASM_COMMENT_OFFSET_WIDTH + commentPaddingNum = 22 if not common.GlobalConfig.ASM_COMMENT: commentPaddingNum = 1 From 32a134a8bdecbf63bd78ab83bcfe1c6a2cc5aff0 Mon Sep 17 00:00:00 2001 From: angie Date: Wed, 27 Sep 2023 11:31:53 -0300 Subject: [PATCH 12/14] further improve japanese string decoding --- spimdisasm/common/Utils.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/spimdisasm/common/Utils.py b/spimdisasm/common/Utils.py index c76291cb..75aca378 100644 --- a/spimdisasm/common/Utils.py +++ b/spimdisasm/common/Utils.py @@ -254,7 +254,7 @@ def getMaybeBooleyFromMaybeStr(booley: str|None) -> bool|None: 0x8D, } -escapeCharactersMaybeReal = { +escapeCharactersMaybeRealLookAhead = { 0x8C, 0x8D, } @@ -270,13 +270,21 @@ def decodeBytesToStrings(buf: bytes, offset: int, stringEncoding: str, terminato return [], -1 theEscapeCharacterWasARealChar = False - if char in escapeCharactersMaybeReal: + if char in escapeCharactersMaybeRealLookAhead: dst.append(char) try: decoded = dst.decode(stringEncoding) theEscapeCharacterWasARealChar = True except UnicodeDecodeError: - pass + if offset + i + 1 < len(buf): + nextChar = buf[offset + i + 1] + dst.append(nextChar) + try: + decoded = dst.decode(stringEncoding) + theEscapeCharacterWasARealChar = True + except UnicodeDecodeError: + pass + dst.pop() dst.pop() if not theEscapeCharacterWasARealChar and char in escapeCharactersSpecialCases: @@ -284,7 +292,7 @@ def decodeBytesToStrings(buf: bytes, offset: int, stringEncoding: str, terminato try: decoded = dst.decode(stringEncoding) except UnicodeDecodeError: - return [], -1 + return [], -2 result.append(rabbitizer.Utils.escapeString(decoded)) dst.clear() result.append(f"\\x{char:02X}") @@ -294,21 +302,21 @@ def decodeBytesToStrings(buf: bytes, offset: int, stringEncoding: str, terminato if offset + i >= len(buf): # Reached the end of the buffer without finding an 0 - return [], -1 + return [], -3 if dst: try: - decoded = rabbitizer.Utils.escapeString(dst.decode(stringEncoding)) + decoded = dst.decode(stringEncoding) except UnicodeDecodeError: - return [], -1 - result.append(decoded) + return [], -4 + result.append(rabbitizer.Utils.escapeString(decoded)) # To be a valid aligned string, the next word-aligned bytes needs to be zero checkStartOffset = offset + i checkEndOffset = min((checkStartOffset & ~3) + 4, len(buf)) while checkStartOffset < checkEndOffset: if buf[checkStartOffset] != terminator: - return [], -1 + return [], -5 checkStartOffset += 1 return result, i From 8f293cb52175edfcb5bcbc9ba3eec154e4ff3734 Mon Sep 17 00:00:00 2001 From: angie Date: Fri, 29 Sep 2023 12:15:46 -0300 Subject: [PATCH 13/14] Add a special case for japanese characters that include a backslash --- spimdisasm/common/Utils.py | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/spimdisasm/common/Utils.py b/spimdisasm/common/Utils.py index 75aca378..74b1acd1 100644 --- a/spimdisasm/common/Utils.py +++ b/spimdisasm/common/Utils.py @@ -267,8 +267,9 @@ def decodeBytesToStrings(buf: bytes, offset: int, stringEncoding: str, terminato while offset + i < len(buf) and buf[offset + i] != terminator: char = buf[offset + i] if char in bannedEscapeCharacters: - return [], -1 + return [], -10 + # Some of the escape characters are real Japanese characters, so we need to properly check them theEscapeCharacterWasARealChar = False if char in escapeCharactersMaybeRealLookAhead: dst.append(char) @@ -287,12 +288,34 @@ def decodeBytesToStrings(buf: bytes, offset: int, stringEncoding: str, terminato dst.pop() dst.pop() + if char > 0x7F and offset + i + 1 < len(buf): + nextChar = buf[offset + i + 1] + if nextChar == 0x5C: # '\\' + # If the second part of a Japanese character is the 0x5C value ('\\') then we need to + # special handle it. Otherwise when it gets iconv'd then the compiler will get confused + # and think it should try to escape the next character instead. + # So we break down the string here, add this two characters as individual characters and + # we skip them + + if dst: + try: + decoded = dst.decode(stringEncoding) + except UnicodeDecodeError: + return [], -60 + result.append(rabbitizer.Utils.escapeString(decoded)) + dst.clear() + result.append(f"\\x{char:02X}") + result.append(f"\\x{nextChar:02X}") + + i += 2 + continue + if not theEscapeCharacterWasARealChar and char in escapeCharactersSpecialCases: if dst: try: decoded = dst.decode(stringEncoding) except UnicodeDecodeError: - return [], -2 + return [], -70 result.append(rabbitizer.Utils.escapeString(decoded)) dst.clear() result.append(f"\\x{char:02X}") @@ -302,13 +325,13 @@ def decodeBytesToStrings(buf: bytes, offset: int, stringEncoding: str, terminato if offset + i >= len(buf): # Reached the end of the buffer without finding an 0 - return [], -3 + return [], -80 if dst: try: decoded = dst.decode(stringEncoding) except UnicodeDecodeError: - return [], -4 + return [], -90 result.append(rabbitizer.Utils.escapeString(decoded)) # To be a valid aligned string, the next word-aligned bytes needs to be zero @@ -316,7 +339,7 @@ def decodeBytesToStrings(buf: bytes, offset: int, stringEncoding: str, terminato checkEndOffset = min((checkStartOffset & ~3) + 4, len(buf)) while checkStartOffset < checkEndOffset: if buf[checkStartOffset] != terminator: - return [], -5 + return [], -100 checkStartOffset += 1 return result, i From c8f488c3be131a214b5a34e756e940debf0a34fd Mon Sep 17 00:00:00 2001 From: angie Date: Sat, 7 Oct 2023 11:15:28 -0300 Subject: [PATCH 14/14] version bump --- CHANGELOG.md | 3 +++ pyproject.toml | 2 +- spimdisasm/__init__.py | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 95e9fc88..007c48c6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [1.17.4] - 2023-10-07 + ### Added - Add `CHANGELOG.md` @@ -1250,6 +1252,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Version 1.0.0 [unreleased]: https://github.com/Decompollaborate/spimdisasm/compare/master...develop +[1.17.4]: https://github.com/Decompollaborate/spimdisasm/compare/1.17.3...1.17.4 [1.17.3]: https://github.com/Decompollaborate/spimdisasm/compare/1.17.2...1.17.3 [1.17.2]: https://github.com/Decompollaborate/spimdisasm/compare/1.17.1...1.17.2 [1.17.1]: https://github.com/Decompollaborate/spimdisasm/compare/1.17.0...1.17.1 diff --git a/pyproject.toml b/pyproject.toml index 4aa963e7..ba890da1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ [project] name = "spimdisasm" # Version should be synced with spimdisasm/__init__.py -version = "1.17.4.dev1" +version = "1.17.4" description = "MIPS disassembler" # license = "MIT" readme = "README.md" diff --git a/spimdisasm/__init__.py b/spimdisasm/__init__.py index 2548f2c5..bf2d9ca1 100644 --- a/spimdisasm/__init__.py +++ b/spimdisasm/__init__.py @@ -6,7 +6,7 @@ from __future__ import annotations __version_info__: tuple[int, int, int] = (1, 17, 4) -__version__ = ".".join(map(str, __version_info__)) + ".dev1" +__version__ = ".".join(map(str, __version_info__)) __author__ = "Decompollaborate" from . import common as common