From bed74b023d8eee277f6a93b2a02358cdaf0eddd9 Mon Sep 17 00:00:00 2001 From: Sonal Santan Date: Mon, 10 Nov 2025 10:25:40 -0800 Subject: [PATCH 1/3] Added code to skip copying over segment data from the stream. Instead now if a client requests segment data via get_data(), ELFIO creates a copy of data by reading over data stored in the matching segments. Signed-off-by: Sonal Santan --- elfio/elfio.hpp | 34 ++++++++++++++++----------- elfio/elfio_segment.hpp | 51 ++++++++++++++++++++++++++++++----------- 2 files changed, 58 insertions(+), 27 deletions(-) diff --git a/elfio/elfio.hpp b/elfio/elfio.hpp index e228776e4..acd29d1d0 100644 --- a/elfio/elfio.hpp +++ b/elfio/elfio.hpp @@ -508,12 +508,12 @@ class elfio if ( auto file_class = header->get_class(); file_class == ELFCLASS64 ) { segments_.emplace_back( new ( std::nothrow ) - segment_impl( convertor, addr_translator ) ); + segment_impl( convertor, addr_translator, sections_ ) ); } else if ( file_class == ELFCLASS32 ) { segments_.emplace_back( new ( std::nothrow ) - segment_impl( convertor, addr_translator ) ); + segment_impl( convertor, addr_translator, sections_ ) ); } else { segments_.pop_back(); @@ -636,12 +636,12 @@ class elfio if ( file_class == ELFCLASS64 ) { segments_.emplace_back( new ( std::nothrow ) segment_impl( - convertor, addr_translator ) ); + convertor, addr_translator, sections_ ) ); } else if ( file_class == ELFCLASS32 ) { segments_.emplace_back( new ( std::nothrow ) segment_impl( - convertor, addr_translator ) ); + convertor, addr_translator, sections_ ) ); } else { segments_.pop_back(); @@ -649,16 +649,6 @@ class elfio } segment* seg = segments_.back().get(); - - if ( !seg->load( stream, - static_cast( offset ) + - static_cast( i ) * entry_size, - is_lazy ) || - stream.fail() ) { - segments_.pop_back(); - return false; - } - seg->set_index( i ); // Add sections to the segments (similar to readelfs algorithm) @@ -688,6 +678,22 @@ class elfio seg->add_section_index( psec->get_index(), 0 ); } } + + // Sonal: If data is already stored in the sections we do not need to + // re-read and make a copy in the segment + if ( seg->get_sections_num() ) + return true; + + // Sonal: If the segment does not point to any section (when can this happen?) + // then follow the classic approach to read in the data + if ( !seg->load( stream, + static_cast( offset ) + + static_cast( i ) * entry_size, + is_lazy ) || + stream.fail() ) { + segments_.pop_back(); + return false; + } } return true; diff --git a/elfio/elfio_segment.hpp b/elfio/elfio_segment.hpp index 2c7b08cea..ca3378d4d 100644 --- a/elfio/elfio_segment.hpp +++ b/elfio/elfio_segment.hpp @@ -30,6 +30,7 @@ THE SOFTWARE. namespace ELFIO { +class elfio; //------------------------------------------------------------------------------ //! \class segment //! \brief Class for accessing segment data @@ -157,8 +158,9 @@ template class segment_impl : public segment //! \param convertor Pointer to the endianness convertor //! \param translator Pointer to the address translator segment_impl( std::shared_ptr convertor, - std::shared_ptr translator ) - : convertor( convertor ), translator( translator ) + std::shared_ptr translator, + const std::vector> &all_sections) + : convertor( convertor ), translator( translator ), all_sections( all_sections ) { } @@ -186,6 +188,11 @@ template class segment_impl : public segment if ( !is_loaded ) { load_data(); } + // Sonal: If data is in sections create a contiguous representation of data + // by copying over from sections. + if ( get_sections_num() ) { + load_section_data(); + } return data.get(); } @@ -340,22 +347,38 @@ template class segment_impl : public segment return false; } - data.reset( new ( std::nothrow ) char[(size_t)size + 1] ); - - pstream->seekg( p_offset ); - if ( nullptr != data.get() && pstream->read( data.get(), size ) ) { - data.get()[size] = 0; - } - else { - data = nullptr; - return false; + // Sonal: data is already stored in the sections. Only load data if + // there is no section for this segment + if (!get_sections_num()) { + data.reset( new ( std::nothrow ) char[(size_t)size + 1] ); + pstream->seekg( p_offset ); + if ( nullptr != data.get() && pstream->read( data.get(), size ) ) { + data.get()[size] = 0; + } + else { + data = nullptr; + return false; + } + is_loaded = true; } - is_loaded = true; - return true; } + bool load_section_data() const + { + Elf_Xword size = get_file_size(); + data.reset( new ( std::nothrow ) char[(size_t)size + 1] ); + char *curr = data.get(); + for ( auto sec_idx : sections ) { + const auto &sec = all_sections[sec_idx]; + // Sonal: Add code for alignment of curr + std::memcpy(curr, sec->get_data(), sec->get_size()); + curr += sec->get_size(); + } + is_loaded = true; + return true; + } //------------------------------------------------------------------------------ //! \brief Save the segment to a stream //! \param stream Output stream @@ -398,6 +421,8 @@ template class segment_impl : public segment false; //!< Flag indicating if the segment is loaded lazily mutable bool is_loaded = false; //!< Flag indicating if the segment is loaded + const std::vector> + &all_sections; //!< Vector of all sections present in this ELF }; } // namespace ELFIO From 87f93add19ce06d68e7ebff31e89c3bc3f3296ad Mon Sep 17 00:00:00 2001 From: Sonal Santan Date: Tue, 11 Nov 2025 07:59:39 -0800 Subject: [PATCH 2/3] Code refactoring to split segment header loading from segment data loading. This allows matching of sections to segments and skip loading of segment data if the data is already present in matched sections Signed-off-by: Sonal Santan --- elfio/elfio.hpp | 88 +++++++++++++++++++++++------------------ elfio/elfio_segment.hpp | 70 +++++++++++++++++++++++--------- 2 files changed, 101 insertions(+), 57 deletions(-) diff --git a/elfio/elfio.hpp b/elfio/elfio.hpp index acd29d1d0..9d722f9a3 100644 --- a/elfio/elfio.hpp +++ b/elfio/elfio.hpp @@ -613,6 +613,40 @@ class elfio // sect_begin=12, sect_size=0 -> shall return false! } + + //------------------------------------------------------------------------------ + //! \brief Add sections to the segment (similar to readelfs algorithm) + void match_sections_to_segment(segment* seg) const + { + Elf64_Off segBaseOffset = seg->get_offset(); + Elf64_Off segEndOffset = segBaseOffset + seg->get_file_size(); + Elf64_Off segVBaseAddr = seg->get_virtual_address(); + Elf64_Off segVEndAddr = segVBaseAddr + seg->get_memory_size(); + for ( const auto& psec : sections ) { + // SHF_ALLOC sections are matched based on the virtual address + // otherwise the file offset is matched + if ( ( ( psec->get_flags() & SHF_ALLOC ) == SHF_ALLOC ) + ? is_sect_in_seg( psec->get_address(), + psec->get_size(), segVBaseAddr, + segVEndAddr ) + : is_sect_in_seg( psec->get_offset(), psec->get_size(), + segBaseOffset, segEndOffset ) ) { + + // If it is a TLS segment, add TLS sections only and vice versa + if ( ( ( seg->get_type() == PT_TLS ) && + ( ( psec->get_flags() & SHF_TLS ) != SHF_TLS ) ) || + ( ( ( psec->get_flags() & SHF_TLS ) == SHF_TLS ) && + ( seg->get_type() != PT_TLS ) ) ) + continue; + + // Alignment of segment shall not be updated, to preserve original value + // It will be re-calculated on saving. + seg->add_section_index( psec->get_index(), 0 ); + } + } + } + + //------------------------------------------------------------------------------ //! \brief Load segments from a stream //! \param stream The input stream to load from @@ -649,47 +683,25 @@ class elfio } segment* seg = segments_.back().get(); - seg->set_index( i ); - // Add sections to the segments (similar to readelfs algorithm) - Elf64_Off segBaseOffset = seg->get_offset(); - Elf64_Off segEndOffset = segBaseOffset + seg->get_file_size(); - Elf64_Off segVBaseAddr = seg->get_virtual_address(); - Elf64_Off segVEndAddr = segVBaseAddr + seg->get_memory_size(); - for ( const auto& psec : sections ) { - // SHF_ALLOC sections are matched based on the virtual address - // otherwise the file offset is matched - if ( ( ( psec->get_flags() & SHF_ALLOC ) == SHF_ALLOC ) - ? is_sect_in_seg( psec->get_address(), - psec->get_size(), segVBaseAddr, - segVEndAddr ) - : is_sect_in_seg( psec->get_offset(), psec->get_size(), - segBaseOffset, segEndOffset ) ) { - - // If it is a TLS segment, add TLS sections only and vice versa - if ( ( ( seg->get_type() == PT_TLS ) && - ( ( psec->get_flags() & SHF_TLS ) != SHF_TLS ) ) || - ( ( ( psec->get_flags() & SHF_TLS ) == SHF_TLS ) && - ( seg->get_type() != PT_TLS ) ) ) - continue; - - // Alignment of segment shall not be updated, to preserve original value - // It will be re-calculated on saving. - seg->add_section_index( psec->get_index(), 0 ); - } + // Load the segment header + if ( !seg->load_header( stream, + static_cast( offset ) + + static_cast( i ) * entry_size, + is_lazy ) || + stream.fail() ) { + segments_.pop_back(); + return false; } - // Sonal: If data is already stored in the sections we do not need to - // re-read and make a copy in the segment - if ( seg->get_sections_num() ) - return true; - - // Sonal: If the segment does not point to any section (when can this happen?) - // then follow the classic approach to read in the data - if ( !seg->load( stream, - static_cast( offset ) + - static_cast( i ) * entry_size, - is_lazy ) || + seg->set_index( i ); + + // Map the sections to the segments + match_sections_to_segment(seg); + + // Now that the sections for this segment have been identified, load the + // segment data without duplication + if ( !seg->load_rest(is_lazy ) || stream.fail() ) { segments_.pop_back(); return false; diff --git a/elfio/elfio_segment.hpp b/elfio/elfio_segment.hpp index ca3378d4d..c6b5ea2e9 100644 --- a/elfio/elfio_segment.hpp +++ b/elfio/elfio_segment.hpp @@ -129,14 +129,21 @@ class segment virtual const std::vector& get_sections() const = 0; //------------------------------------------------------------------------------ - //! \brief Load the segment from a stream + //! \brief Load the segment header from a stream //! \param stream Input stream //! \param header_offset Offset of the segment header //! \param is_lazy Whether to load the segment lazily //! \return True if successful, false otherwise - virtual bool load( std::istream& stream, - std::streampos header_offset, - bool is_lazy ) = 0; + virtual bool load_header( std::istream& stream, + std::streampos header_offset, + bool is_lazy ) = 0; + + //------------------------------------------------------------------------------ + //! \brief Load the segment data only if necessary + //! \param is_lazy Whether to load the segment lazily + //! \return True if successful, false otherwise + virtual bool load_rest( bool is_lazy ) = 0; + //------------------------------------------------------------------------------ //! \brief Save the segment to a stream //! \param stream Output stream @@ -188,8 +195,8 @@ template class segment_impl : public segment if ( !is_loaded ) { load_data(); } - // Sonal: If data is in sections create a contiguous representation of data - // by copying over from sections. + // If data is in the matched sections, create a contiguous representation + // of data by copying over from sections. if ( get_sections_num() ) { load_section_data(); } @@ -284,14 +291,14 @@ template class segment_impl : public segment void set_index( const Elf_Half& value ) override { index = value; } //------------------------------------------------------------------------------ - //! \brief Load the segment from a stream + //! \brief Load the segment header from a stream //! \param stream Input stream //! \param header_offset Offset of the segment header //! \param is_lazy_ Whether to load the segment lazily //! \return True if successful, false otherwise - bool load( std::istream& stream, - std::streampos header_offset, - bool is_lazy_ ) override + bool load_header( std::istream& stream, + std::streampos header_offset, + bool is_lazy_ ) override { pstream = &stream; is_lazy = is_lazy_; @@ -309,15 +316,23 @@ template class segment_impl : public segment is_offset_set = true; + return true; + } + + //------------------------------------------------------------------------------ + //! \brief Load the segment data only if necessary + //! \param is_lazy Whether to load the segment lazily + //! \return True if successful, false otherwise + bool load_rest( bool is_lazy ) override + { if ( !( is_lazy || is_loaded ) ) { return load_data(); } - return true; } //------------------------------------------------------------------------------ - //! \brief Load the data of the segment + //! \brief Load the data of the segment only if necessary //! \return True if successful, false otherwise bool load_data() const { @@ -347,8 +362,8 @@ template class segment_impl : public segment return false; } - // Sonal: data is already stored in the sections. Only load data if - // there is no section for this segment + // If this segment points to sections then the data is already stored in + // the sections. Load data only if there is no section for this segment if (!get_sections_num()) { data.reset( new ( std::nothrow ) char[(size_t)size + 1] ); pstream->seekg( p_offset ); @@ -365,16 +380,33 @@ template class segment_impl : public segment return true; } + + //------------------------------------------------------------------------------ + //! \brief Load the segment data by assembling it from the matched sections + //! \return True if successful, false otherwise bool load_section_data() const { Elf_Xword size = get_file_size(); + Elf_Xword offset = 0; + data.reset( new ( std::nothrow ) char[(size_t)size + 1] ); - char *curr = data.get(); + for ( auto sec_idx : sections ) { - const auto &sec = all_sections[sec_idx]; - // Sonal: Add code for alignment of curr + const auto& sec = all_sections[sec_idx]; + if ( SHT_NOBITS == sec->get_type() ) { + // Needs no storage like .bss + continue; + } + + Elf_Xword section_align = sec->get_addr_align(); + if ( section_align > 1 && offset % section_align != 0 ) { + // Add any holes to meet the alignment requirement + offset += section_align - offset % section_align; + } + char* curr = data.get(); + curr += offset; std::memcpy(curr, sec->get_data(), sec->get_size()); - curr += sec->get_size(); + offset += sec->get_size(); } is_loaded = true; return true; @@ -422,7 +454,7 @@ template class segment_impl : public segment mutable bool is_loaded = false; //!< Flag indicating if the segment is loaded const std::vector> - &all_sections; //!< Vector of all sections present in this ELF + &all_sections; //!< Reference to the vector of all sections in this ELF }; } // namespace ELFIO From ebe3164d4eb5a18369adca8cc48ffb3544f8b6b6 Mon Sep 17 00:00:00 2001 From: Sonal Santan Date: Tue, 11 Nov 2025 18:32:07 -0800 Subject: [PATCH 3/3] Fix the segment::get_data() logic when data is already loaded Signed-off-by: Sonal Santan --- elfio/elfio_segment.hpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/elfio/elfio_segment.hpp b/elfio/elfio_segment.hpp index c6b5ea2e9..a21ea4f24 100644 --- a/elfio/elfio_segment.hpp +++ b/elfio/elfio_segment.hpp @@ -194,11 +194,11 @@ template class segment_impl : public segment { if ( !is_loaded ) { load_data(); - } - // If data is in the matched sections, create a contiguous representation - // of data by copying over from sections. - if ( get_sections_num() ) { - load_section_data(); + // If data is in the matched sections, create a contiguous + // representation of data by copying over from sections. + if ( get_sections_num() ) { + load_section_data(); + } } return data.get(); }