From 04df6def8862ab5fa47e889516c34435f7413cac Mon Sep 17 00:00:00 2001 From: Peter Matula Date: Wed, 25 Sep 2024 11:29:53 +0200 Subject: [PATCH] fileformat/elf: Prevent creation of duplicate imports while parsing (#1210) * fileformat/elf: Prevent creation of duplicate imports while parsing symbols * fileformat/elf: use ptrs in import cache instead of refs --- .../fileformat/types/import_table/import.h | 2 +- .../types/import_table/import_table.h | 2 +- src/fileformat/file_format/elf/elf_format.cpp | 43 +++++++++++++++++-- src/fileformat/types/import_table/import.cpp | 2 +- .../types/import_table/import_table.cpp | 3 +- 5 files changed, 45 insertions(+), 7 deletions(-) diff --git a/include/retdec/fileformat/types/import_table/import.h b/include/retdec/fileformat/types/import_table/import.h index fcc70cfd1..99936f01f 100644 --- a/include/retdec/fileformat/types/import_table/import.h +++ b/include/retdec/fileformat/types/import_table/import.h @@ -39,7 +39,7 @@ class Import /// @name Getters /// @{ - std::string getName() const; + const std::string& getName() const; std::uint64_t getLibraryIndex() const; std::uint64_t getAddress() const; bool getOrdinalNumber(std::uint64_t &importOrdinalNumber) const; diff --git a/include/retdec/fileformat/types/import_table/import_table.h b/include/retdec/fileformat/types/import_table/import_table.h index 46d5c58ab..2ac915f22 100644 --- a/include/retdec/fileformat/types/import_table/import_table.h +++ b/include/retdec/fileformat/types/import_table/import_table.h @@ -60,7 +60,7 @@ class ImportTable virtual void computeHashes(); void clear(); void addLibrary(std::string name, bool missingDependency = false); - void addImport(std::unique_ptr&& import); + const Import* addImport(std::unique_ptr&& import); bool hasLibraries() const; bool hasLibrary(const std::string &name) const; bool hasLibraryCaseInsensitive(const std::string &name) const; diff --git a/src/fileformat/file_format/elf/elf_format.cpp b/src/fileformat/file_format/elf/elf_format.cpp index 7c1319d8b..3c895f70f 100644 --- a/src/fileformat/file_format/elf/elf_format.cpp +++ b/src/fileformat/file_format/elf/elf_format.cpp @@ -5,6 +5,7 @@ */ #include +#include #include #include @@ -1744,10 +1745,30 @@ void ElfFormat::loadSymbols(const ELFIO::elfio *file, const ELFIO::symbol_sectio telfhashSymbols = {}; } + // Cache keeping track of created imports (i.e. (name, address) pairs) + // to prevent repeated creation of the same imports. + // To save space, this uses string references and relies on imports not + // being moved -- they should not be, as ImportTable stores vector of unique + // pointers, but if that ever changes, this will end badly. + auto createdImportsComparator = []( + const std::pair& lhs, + const std::pair& rhs + ) { + if (*(lhs.first) != *(rhs.first)) { + return *(lhs.first) < *(rhs.first); + } + return lhs.second < rhs.second; + }; + std::set< + std::pair, + decltype(createdImportsComparator) + > createdImports(createdImportsComparator); + for(std::size_t i = 0, e = elfSymbolTable->get_loaded_symbols_num(); i < e; ++i) { auto symbol = std::make_shared(); elfSymbolTable->get_symbol(i, name, value, size, bind, type, link, other); + size ? symbol->setSize(size) : symbol->invalidateSize(); symbol->setType(getSymbolType(bind, type, link)); symbol->setUsageType(getSymbolUsageType(type)); @@ -1781,15 +1802,27 @@ void ElfFormat::loadSymbols(const ELFIO::elfio *file, const ELFIO::symbol_sectio importTable = new ElfImportTable(); } auto keyIter = importNameAddressMap.equal_range(name); + // we create std::set from std::multimap values in order to ensure determinism - std::set> addresses(keyIter.first, keyIter.second); + std::set> addresses; + for (auto it = keyIter.first; it != keyIter.second; ++it) + { + if (!createdImports.count({&it->first, it->second})) { + addresses.insert(*it); + } + } + for(const auto &address : addresses) { auto import = std::make_unique(); import->setName(name); import->setAddress(address.second); import->setUsageType(symbolToImportUsage(symbol->getUsageType())); - importTable->addImport(std::move(import)); + auto* inserted = importTable->addImport(std::move(import)); + + if (inserted) { + createdImports.emplace(&inserted->getName(), inserted->getAddress()); + } } if(keyIter.first == keyIter.second && getSectionFromAddress(value)) { @@ -1797,7 +1830,11 @@ void ElfFormat::loadSymbols(const ELFIO::elfio *file, const ELFIO::symbol_sectio import->setName(name); import->setAddress(value); import->setUsageType(symbolToImportUsage(symbol->getUsageType())); - importTable->addImport(std::move(import)); + auto* inserted = importTable->addImport(std::move(import)); + + if (inserted) { + createdImports.emplace(&inserted->getName(), inserted->getAddress()); + } } } } diff --git a/src/fileformat/types/import_table/import.cpp b/src/fileformat/types/import_table/import.cpp index 9ae78233e..b2e79d272 100644 --- a/src/fileformat/types/import_table/import.cpp +++ b/src/fileformat/types/import_table/import.cpp @@ -13,7 +13,7 @@ namespace fileformat { * Get import name * @return Import name */ -std::string Import::getName() const +const std::string& Import::getName() const { return name; } diff --git a/src/fileformat/types/import_table/import_table.cpp b/src/fileformat/types/import_table/import_table.cpp index df44d5ea6..81077c44c 100644 --- a/src/fileformat/types/import_table/import_table.cpp +++ b/src/fileformat/types/import_table/import_table.cpp @@ -322,9 +322,10 @@ void ImportTable::addLibrary(std::string name, bool isMissingDependency) * Add import * @param import Import which will be added */ -void ImportTable::addImport(std::unique_ptr&& import) +const Import* ImportTable::addImport(std::unique_ptr&& import) { imports.push_back(std::move(import)); + return imports.back().get(); } /**