Skip to content

Commit

Permalink
Merge pull request #174 from Decompollaborate/develop
Browse files Browse the repository at this point in the history
1.30.0
  • Loading branch information
AngheloAlf authored Sep 10, 2024
2 parents e1f0948 + 7458428 commit 9593eb7
Show file tree
Hide file tree
Showing 10 changed files with 186 additions and 29 deletions.
29 changes: 29 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,34 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [1.30.0] - 2024-09-10

### Changed

- Avoid migrating rodata symbols to functions if they are referenced by other
data or rodata symbols.
- Disallow data and rodata symbols from referencing jumptables.
- BREAKING: Change the rodata migration algorithm.
- This allows for the algorithm to migrate unreferenced symbols that are
between other symbols that do get migrated to the given function.
- The algorithm will now stop to migrate symbols as soon as it finds a symbol
that should not be migrated to the current function (i.e. it should be
migrated to other function, it is referenced by a data symbol, etc).
- This could be an abrupt change for projects that were relying on the old
migration scheme, because some symbols may suddenly disappear, avoiding a
correct build.
- This change should also reduce (and hopefully remove) the gaps generated
between symbols during rodata migration.

### Deprecated

- Depreacte `SymbolBase.isRdata`.

### Fixed

- Fix pointer tracking: fix garbage state of registers after function jumping
outside of the current function.

## [1.29.0] - 2024-09-09

### Added
Expand Down Expand Up @@ -1627,6 +1655,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Version 1.0.0

[unreleased]: https://github.com/Decompollaborate/spimdisasm/compare/master...develop
[1.30.0]: https://github.com/Decompollaborate/spimdisasm/compare/1.29.0...1.30.0
[1.29.0]: https://github.com/Decompollaborate/spimdisasm/compare/1.28.1...1.29.0
[1.28.1]: https://github.com/Decompollaborate/spimdisasm/compare/1.28.0...1.28.1
[1.28.0]: https://github.com/Decompollaborate/spimdisasm/compare/1.27.0...1.28.0
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ If you use a `requirements.txt` file in your repository, then you can add
this library with the following line:

```txt
spimdisasm>=1.29.0,<2.0.0
spimdisasm>=1.30.0,<2.0.0
```

### Development version
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
[project]
name = "spimdisasm"
# Version should be synced with spimdisasm/__init__.py
version = "1.29.0"
version = "1.30.0"
description = "MIPS disassembler"
readme = "README.md"
license = {file = "LICENSE"}
Expand Down
2 changes: 1 addition & 1 deletion spimdisasm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from __future__ import annotations

__version_info__: tuple[int, int, int] = (1, 29, 0)
__version_info__: tuple[int, int, int] = (1, 30, 0)
__version__ = ".".join(map(str, __version_info__))# + "-dev0"
__author__ = "Decompollaborate"

Expand Down
6 changes: 6 additions & 0 deletions spimdisasm/common/GlobalConfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,8 @@ def AGGRESSIVE_STRING_GUESSER(self, value: bool) -> None:
if self.RODATA_STRING_GUESSER_LEVEL >= 0:
self.RODATA_STRING_GUESSER_LEVEL = 1

ALLOW_MIGRATING_CONST_VARIABLES: bool = False

AUTOGENERATED_NAMES_BASED_ON_SECTION_TYPE: bool = True
"""Name autogenerated symbols after the section those are come from
Expand Down Expand Up @@ -336,6 +338,7 @@ def addParametersToArgParse(self, parser: argparse.ArgumentParser) -> None:
backendConfig.add_argument("--string-guesser", help=f"DEPRECATED, prefer `--rodata-string-guesser`. Toggles the string guesser feature. Defaults to {self.STRING_GUESSER}", action=Utils.BooleanOptionalAction)
backendConfig.add_argument("--aggressive-string-guesser", help=f"DEPRECATED, prefer `--rodata-string-guesser`. Makes the string guesser feature to be more aggressive when trying to detect strings. Requires `--string-guesser` to be enabled. Defaults to {self.AGGRESSIVE_STRING_GUESSER}", action=Utils.BooleanOptionalAction)

backendConfig.add_argument("--allow-migrating-const-variables", help=f"Allow migrating const variables. They must be referenced by a single function at a time. Defaults to {self.ALLOW_MIGRATING_CONST_VARIABLES}", action=Utils.BooleanOptionalAction)

backendConfig.add_argument("--name-vars-by-section", help=f"Toggles the naming-after-section feature for autogenerated names. This means autogenerated symbols get a RO_ or B_ prefix if the symbol is from a rodata or bss section. Defaults to {self.AUTOGENERATED_NAMES_BASED_ON_SECTION_TYPE}", action=Utils.BooleanOptionalAction)
backendConfig.add_argument("--name-vars-by-type", help=f"Toggles the naming-after-type feature for autogenerated names. This means autogenerated symbols can get a STR_, FLT_ or DBL_ prefix if the symbol is a string, float or double. Defaults to {self.AUTOGENERATED_NAMES_BASED_ON_DATA_TYPE}", action=Utils.BooleanOptionalAction)
Expand Down Expand Up @@ -500,6 +503,9 @@ def parseArgs(self, args: argparse.Namespace) -> None:
if args.aggressive_string_guesser is not None:
self.AGGRESSIVE_STRING_GUESSER = args.aggressive_string_guesser

if args.allow_migrating_const_variables is not None:
self.ALLOW_MIGRATING_CONST_VARIABLES = args.allow_migrating_const_variables

if args.name_vars_by_section is not None:
self.AUTOGENERATED_NAMES_BASED_ON_SECTION_TYPE = args.name_vars_by_section
if args.name_vars_by_type is not None:
Expand Down
23 changes: 22 additions & 1 deletion spimdisasm/frontendCommon/FrontendUtilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,14 @@ def progressCallback_writeProcessedFiles(i: int, filePath: str, processedFilesCo
def migrateFunctions(processedFiles: dict[common.FileSectionType, list[mips.sections.SectionBase]], functionMigrationPath: Path, progressCallback: ProgressCallbackType|None=None) -> None:
funcTotal = sum(len(x.symbolList) for x in processedFiles.get(common.FileSectionType.Text, []))
rodataFileList = processedFiles.get(common.FileSectionType.Rodata, [])

remainingRodataSyms: list[mips.symbols.SymbolBase] = []
rodataSectionNamesMapping: dict[int, str] = dict()
for x in rodataFileList:
remainingRodataSyms.extend(x.symbolList)
for y in x.symbolList:
rodataSectionNamesMapping[y.vram] = x.getName()

i = 0
for textFile in processedFiles.get(common.FileSectionType.Text, []):
filePath = functionMigrationPath / textFile.getName()
Expand All @@ -180,13 +188,26 @@ def migrateFunctions(processedFiles: dict[common.FileSectionType, list[mips.sect
assert isinstance(func, mips.symbols.SymbolFunction)
entry = mips.FunctionRodataEntry.getEntryForFuncFromPossibleRodataSections(func, rodataFileList)

for sym in entry.iterRodataSyms():
if sym in remainingRodataSyms:
remainingRodataSyms.remove(sym)

funcPath = filePath / (func.getName()+ ".s")
common.Utils.printVerbose(f"Writing function {funcPath}")
with funcPath.open("w") as f:
entry.writeToFile(f, writeFunction=True)

i += 1
mips.FilesHandlers.writeOtherRodata(functionMigrationPath, rodataFileList)

for rodataSym in remainingRodataSyms:
rodataPath = functionMigrationPath / rodataSectionNamesMapping[rodataSym.vram]
rodataPath.mkdir(parents=True, exist_ok=True)
rodataSymbolPath = rodataPath / f"{rodataSym.getName()}.s"
common.Utils.printVerbose(f"Writing unmigrated rodata {rodataSymbolPath}")
with rodataSymbolPath.open("w") as f:
f.write(".section .rodata" + common.GlobalConfig.LINE_ENDS)
f.write(rodataSym.disassemble(migrate=True))


def progressCallback_migrateFunctions(i: int, funcName: str, funcTotal: int) -> None:
global _sLenLastLine
Expand Down
124 changes: 101 additions & 23 deletions spimdisasm/mips/FuncRodataEntry.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@

from __future__ import annotations

from typing import TextIO
from typing import Generator, TextIO

from collections import deque
import dataclasses

from .. import common
Expand Down Expand Up @@ -45,6 +46,12 @@ class FunctionRodataEntry:
def hasRodataSyms(self) -> bool:
return len(self.rodataSyms) > 0 or len(self.lateRodataSyms) > 0

def iterRodataSyms(self) -> Generator[symbols.SymbolBase, None, None]:
for sym in self.rodataSyms:
yield sym
for sym in self.lateRodataSyms:
yield sym

def writeToFile(self, f: TextIO, writeFunction: bool=True) -> None:
if len(self.rodataSyms) > 0:
# Write the rdata
Expand Down Expand Up @@ -95,6 +102,55 @@ def getName(self) -> str:
assert lateRodataSyms == 1, lateRodataSyms
return self.lateRodataSyms[0].getName()



@staticmethod
def _shouldMigrateRodataSymbolToFunction(rodataSym: symbols.SymbolBase, intersection: set[int], funcName: str) -> bool:
functionOwner = rodataSym.contextSym.functionOwnerForMigration
if functionOwner is not None:
# If a function owner was specified for this symbol then it is only
# allowed to be migrated to that function and none other
if functionOwner == funcName:
return True
return False

if rodataSym.vram not in intersection:
return False

if not rodataSym.shouldMigrate():
return False

return True

@staticmethod
def _updateMigrableSymbolsSets(rodataSym: symbols.SymbolBase, intersection: set[int], funcName: str, migrableRodataSyms: set[int], maybeMigrableRodataSyms: set[int], rodataMigratedSomewhereElse: bool) -> bool:
# We try to decide which symbols should be migrated by checking from left
# to right.
# Completely unreferenced symbols may get migrated to the current
# function if they are between two symbols that do get migrated to this
# function.
# This is acomplished by keeping a second set of tentative symbols to
# migrate (`maybeMigrableRodataSyms`) which gets added to the main set
# when we see the next migrable symbol.

if rodataMigratedSomewhereElse:
return rodataMigratedSomewhereElse

if FunctionRodataEntry._shouldMigrateRodataSymbolToFunction(rodataSym, intersection, funcName):
migrableRodataSyms.add(rodataSym.vram)

migrableRodataSyms.update(maybeMigrableRodataSyms)
maybeMigrableRodataSyms.clear()
elif len(migrableRodataSyms) > 0:
if len(rodataSym.contextSym.referenceSymbols) > 0 or len(rodataSym.contextSym.referenceFunctions) > 0:
rodataMigratedSomewhereElse = True
elif rodataSym.shouldMigrate():
maybeMigrableRodataSyms.add(rodataSym.vram)
else:
rodataMigratedSomewhereElse = True

return rodataMigratedSomewhereElse

@staticmethod
def getEntryForFuncFromSection(func: symbols.SymbolFunction, rodataSection: sections.SectionRodata|None) -> FunctionRodataEntry:
"""
Expand All @@ -113,21 +169,31 @@ def getEntryForFuncFromSection(func: symbols.SymbolFunction, rodataSection: sect
lateRodataList: list[symbols.SymbolBase] = []

intersection = func.instrAnalyzer.referencedVrams & rodataSection.symbolsVRams
if len(intersection) == 0:
return FunctionRodataEntry(func)

funcName = func.getName()

migrableRodataSyms: set[int] = set()
migrableLateRodataSyms: set[int] = set()
maybeMigrableRodataSyms: set[int] = set()
maybeMigrableLateRodataSyms: set[int] = set()
rodataMigratedSomewhereElse: bool = False
lateRodataMigratedSomewhereElse: bool = False
for rodataSym in rodataSection.symbolList:
if rodataSym.vram not in intersection and rodataSym.contextSym.functionOwnerForMigration != funcName:
continue

if not rodataSym.shouldMigrate():
continue
if rodataMigratedSomewhereElse:
if not common.GlobalConfig.COMPILER.value.hasLateRodata:
break
if lateRodataMigratedSomewhereElse:
break

if rodataSym.contextSym.isLateRodata():
lateRodataList.append(rodataSym)
lateRodataMigratedSomewhereElse = FunctionRodataEntry._updateMigrableSymbolsSets(rodataSym, intersection, funcName, migrableLateRodataSyms, maybeMigrableLateRodataSyms, lateRodataMigratedSomewhereElse)
else:
rodataMigratedSomewhereElse = FunctionRodataEntry._updateMigrableSymbolsSets(rodataSym, intersection, funcName, migrableRodataSyms, maybeMigrableRodataSyms, rodataMigratedSomewhereElse)

for rodataSym in rodataSection.symbolList:
if rodataSym.vram in migrableLateRodataSyms:
lateRodataList.append(rodataSym)
elif rodataSym.vram in migrableRodataSyms:
rodataList.append(rodataSym)

return FunctionRodataEntry(func, rodataList, lateRodataList)
Expand Down Expand Up @@ -166,13 +232,15 @@ def getAllEntriesFromSections(textSection: sections.SectionText|None, rodataSect
sections.
"""

allUnmigratedRodataSymbols: list[symbols.SymbolBase] = []

rodataSymbols = rodataSection.symbolList if rodataSection is not None else []
for rodataSym in rodataSymbols:
if not rodataSym.shouldMigrate():
# We only care for the symbols which will not be migrated
allUnmigratedRodataSymbols.append(rodataSym)
# The simplest way to know which symbols has not been migrated yet and
# preserve order at the same time seem to be just keeping a list of the
# symbols and remove the ones that have been handled somehow (either by
# migrating to a function or adding an no-function entry for the given
# symbol).
# We use deque instead of a plain list because we want fast removal of
# the first symbol.
remainingRodataSymbols = deque(rodataSection.symbolList if rodataSection is not None else [])
handledSymbols: set[int] = set()

allEntries: list[FunctionRodataEntry] = []

Expand All @@ -182,24 +250,34 @@ def getAllEntriesFromSections(textSection: sections.SectionText|None, rodataSect

entry = FunctionRodataEntry.getEntryForFuncFromSection(func, rodataSection)

# Preserve the order of rodata symbols
for sym in entry.iterRodataSyms():
handledSymbols.add(sym.vram)

# Preserve the order of rodata symbols by looking for symbols that has not been migrated yet
if len(entry.rodataSyms) > 0:
firstFuncRodataSym = entry.rodataSyms[0]

while len(allUnmigratedRodataSymbols) > 0:
rodataSym = allUnmigratedRodataSymbols[0]
while len(remainingRodataSymbols) > 0:
rodataSym = remainingRodataSymbols[0]

if rodataSym.vram in handledSymbols:
# Drop migrated symbols
remainingRodataSymbols.popleft()
continue

if rodataSym.vram >= firstFuncRodataSym.vram:
# Take all the symbols up to the first rodata sym referenced by the current function
# Take all the symbols up to symbols referenced by the current function
break

allEntries.append(FunctionRodataEntry(rodataSyms=[rodataSym]))
del allUnmigratedRodataSymbols[0]
handledSymbols.add(rodataSym.vram)
remainingRodataSymbols.popleft()

allEntries.append(entry)

# Check if there's any rodata symbol remaining and add it to the list
for rodataSym in allUnmigratedRodataSymbols:
allEntries.append(FunctionRodataEntry(rodataSyms=[rodataSym]))
for rodataSym in remainingRodataSymbols:
if rodataSym.vram not in handledSymbols:
allEntries.append(FunctionRodataEntry(rodataSyms=[rodataSym]))

return allEntries
7 changes: 6 additions & 1 deletion spimdisasm/mips/symbols/MipsSymbolBase.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ def isJumpTable(self) -> bool:
return False


#! @deprecated
def isRdata(self) -> bool:
"Checks if the current symbol is .rdata"
return False
Expand Down Expand Up @@ -223,7 +224,8 @@ def analyze(self) -> None:
word = self.words[i]
referencedSym = self.getSymbol(word, tryPlusOffset=False)
if referencedSym is not None:
referencedSym.referenceSymbols.add(self.contextSym)
if not referencedSym.isJumpTable():
referencedSym.referenceSymbols.add(self.contextSym)


def getEndOfLineComment(self, wordIndex: int) -> str:
Expand Down Expand Up @@ -340,6 +342,9 @@ def _allowWordSymbolReference(self, symbolRef: common.ContextSymbol, word: int)
if symType == common.SymbolSpecialType.branchlabel:
return False

if symType == common.SymbolSpecialType.jumptable:
return False

if symType.isTargetLabel():
if word != symbolRef.vram:
# Avoid using addends on labels
Expand Down
10 changes: 10 additions & 0 deletions spimdisasm/mips/symbols/MipsSymbolFunction.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,16 @@ def _runInstructionAnalyzer(self) -> None:
# look-ahead symbol finder
self._lookAheadSymbolFinder(instr, prevInstr, instructionOffset, regsTracker)

if prevInstr.isJumpWithAddress() and not prevInstr.doesLink():
targetVram = prevInstr.getBranchVramGeneric()
if targetVram < self.vram or targetVram >= self.vramEnd:
# Function is jumping outside the current function, so
# the state of the registers is garbage to the rest of the
# function, so just reset everything.
# Jumping without linking outside of functions like this is
# usually caused by tail call optimizations.
regsTracker = rabbitizer.RegistersTracker()

self.instrAnalyzer.processPrevFuncCall(regsTracker, instr, prevInstr, currentVram)

instructionOffset += 4
Expand Down
10 changes: 9 additions & 1 deletion spimdisasm/mips/symbols/MipsSymbolRodata.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def isMaybeConstVariable(self) -> bool:
return False
return True

#! @deprecated
def isRdata(self) -> bool:
"Checks if the current symbol is .rdata"
if self.isMaybeConstVariable():
Expand All @@ -69,7 +70,14 @@ def shouldMigrate(self) -> bool:
if self.contextSym.isMips1Double:
return True

if self.isRdata():
if len(self.contextSym.referenceSymbols) > 0:
return False
if len(self.contextSym.referenceFunctions) > 1:
return False

if self.isMaybeConstVariable():
if common.GlobalConfig.ALLOW_MIGRATING_CONST_VARIABLES:
return True
if not common.GlobalConfig.COMPILER.value.allowRdataMigration:
return False

Expand Down

0 comments on commit 9593eb7

Please sign in to comment.