From 6efb4207dd11d5dff8b93792612e1216d01081cc Mon Sep 17 00:00:00 2001 From: donoghuc Date: Wed, 22 Oct 2025 17:09:35 -0700 Subject: [PATCH 1/2] Remove duplicate gems when producting logstash artifacts Bundler is used to manage a gem environment that is shipped with logstash artifacts. By default, bundler will install newer/duplicate gems than shipped with ruby distributions (in logstash's case jruby). Duplicate gems in the shipped environment can cause issues with code loading with ambiguous gem specs or gem activation issues. This commit adds a step to compute the duplicate gems managed with bundler (and therefore direct/transitive dependencies of logstash/plugins) and *removes* copies shipped with jruby. Note that there are two locations to do the deduplication at. Both the stdlib gems as well as what jruby refers to as "bundled" gems. The existing pattern for excluding files from artifacts is used to implement the deduplication. --- rakelib/artifacts.rake | 65 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 53 insertions(+), 12 deletions(-) diff --git a/rakelib/artifacts.rake b/rakelib/artifacts.rake index 2739e1fc6a7..b31ee660633 100644 --- a/rakelib/artifacts.rake +++ b/rakelib/artifacts.rake @@ -82,6 +82,56 @@ namespace "artifact" do res end + def duplicated_gems_exclude_paths + shared_gems_path = 'vendor/jruby/lib/ruby/gems/shared/gems' + default_gemspecs_path = 'vendor/jruby/lib/ruby/gems/shared/specifications/default' + bundle_gems_path = 'vendor/bundle/jruby/*/gems' + + exclusions = [] + + # "bundled" gems in jruby + # https://github.com/jruby/jruby/blob/024123c29d73b672d50730117494f3e4336a0edb/lib/pom.rb#L108-L152 + shared_gem_names = Dir.glob(File.join(shared_gems_path, '*')).map do |path| + match = File.basename(path).match(/^(.+?)-\d+/) + match ? match[1] : nil + end.compact + + # "default" gems in jruby/ruby + # https://github.com/jruby/jruby/blob/024123c29d73b672d50730117494f3e4336a0edb/lib/pom.rb#L21-L106 + default_gem_names = Dir.glob(File.join(default_gemspecs_path, '*.gemspec')).map do |path| + match = File.basename(path).match(/^(.+?)-\d+/) + match ? match[1] : nil + end.compact + + # gems we explicitly manage with bundler (we always want these to take precedence) + bundle_gem_names = Dir.glob(File.join(bundle_gems_path, '*')).map do |path| + match = File.basename(path).match(/^(.+?)-\d+/) + match ? match[1] : nil + end.compact + + shared_duplicates = shared_gem_names & bundle_gem_names + default_duplicates = default_gem_names & bundle_gem_names + all_duplicates = (shared_duplicates + default_duplicates).uniq + puts "Adding duplicate gems to exclude path: #{all_duplicates.sort.join(', ')}" + + # Exclude shared/bundled gems duplicates + shared_duplicates.each do |gem_name| + exclusions << "vendor/jruby/lib/ruby/gems/shared/gems/#{gem_name}-*/**/*" + exclusions << "vendor/jruby/lib/ruby/gems/shared/gems/#{gem_name}-*" + exclusions << "vendor/jruby/lib/ruby/gems/shared/specifications/#{gem_name}-*.gemspec" + end + + # Exclude default gems duplicates + default_duplicates.each do |gem_name| + exclusions << "vendor/jruby/lib/ruby/gems/shared/specifications/default/#{gem_name}-*.gemspec" + exclusions << "vendor/jruby/lib/ruby/stdlib/#{gem_name}.rb" + exclusions << "vendor/jruby/lib/ruby/stdlib/#{gem_name}/**/*" + exclusions << "vendor/jruby/lib/ruby/stdlib/#{gem_name}" + end + + exclusions + end + def default_exclude_paths return @exclude_paths if @exclude_paths @@ -101,18 +151,9 @@ namespace "artifact" do @exclude_paths << 'vendor/**/gems/**/Gemfile.lock' @exclude_paths << 'vendor/**/gems/**/Gemfile' - @exclude_paths << 'vendor/jruby/lib/ruby/gems/shared/gems/rake-*' - # exclude ruby-maven-libs 3.3.9 jars until JRuby ships with >= 3.8.9 - @exclude_paths << 'vendor/bundle/jruby/**/gems/ruby-maven-libs-3.3.9/**/*' - - # remove this after JRuby includes rexml 3.3.x - @exclude_paths << 'vendor/jruby/lib/ruby/gems/shared/gems/rexml-3.2.5/**/*' - @exclude_paths << 'vendor/jruby/lib/ruby/gems/shared/specifications/rexml-3.2.5.gemspec' - - # remove this after JRuby includes net-imap-0.2.4+ - @exclude_paths << 'vendor/jruby/lib/ruby/gems/shared/specifications/net-imap-0.2.3.gemspec' - @exclude_paths << 'vendor/jruby/lib/ruby/gems/shared/gems/net-imap-0.2.3/**/*' - + @exclude_paths.concat(duplicated_gems_exclude_paths) + puts "Full exclude_paths list:" + @exclude_paths.each { |path| puts " - #{path}" } @exclude_paths.freeze end From e638bd294c07b5cde730a4235d841e822a4722ff Mon Sep 17 00:00:00 2001 From: donoghuc Date: Mon, 27 Oct 2025 14:58:51 -0700 Subject: [PATCH 2/2] only remove gemspecs for duplicated stdlib gems --- rakelib/artifacts.rake | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/rakelib/artifacts.rake b/rakelib/artifacts.rake index b31ee660633..1856b39e6ae 100644 --- a/rakelib/artifacts.rake +++ b/rakelib/artifacts.rake @@ -123,10 +123,12 @@ namespace "artifact" do # Exclude default gems duplicates default_duplicates.each do |gem_name| + # CODEREVIEW: removing the code itself causes issues with gem loading. Remove only the gemspecs + # for duplicated gems. This should help the code scanning case. exclusions << "vendor/jruby/lib/ruby/gems/shared/specifications/default/#{gem_name}-*.gemspec" - exclusions << "vendor/jruby/lib/ruby/stdlib/#{gem_name}.rb" - exclusions << "vendor/jruby/lib/ruby/stdlib/#{gem_name}/**/*" - exclusions << "vendor/jruby/lib/ruby/stdlib/#{gem_name}" + # exclusions << "vendor/jruby/lib/ruby/stdlib/#{gem_name}.rb" + # exclusions << "vendor/jruby/lib/ruby/stdlib/#{gem_name}/**/*" + # exclusions << "vendor/jruby/lib/ruby/stdlib/#{gem_name}" end exclusions