From 84905eaa367b2a605e522b1c0f5a8ada2059c8f6 Mon Sep 17 00:00:00 2001 From: Jason Orendorff Date: Thu, 15 Jan 2026 11:52:36 -0800 Subject: [PATCH 1/4] get_tree_size: add tree count limit to prevent DoS For pathological repositories like git bombs with deeply nested tree structures, the previous implementation could hang indefinitely even with a file limit, because it had to visit every tree object to discover the blobs. This change adds a separate tree count that also triggers the limit, ensuring the method terminates promptly regardless of repository structure. The return value is still the blob count for normal repos, preserving existing behavior. --- lib/linguist/source/rugged.rb | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/lib/linguist/source/rugged.rb b/lib/linguist/source/rugged.rb index b246009b16..90981ed269 100644 --- a/lib/linguist/source/rugged.rb +++ b/lib/linguist/source/rugged.rb @@ -44,7 +44,31 @@ def initialize(rugged) end def get_tree_size(commit_id, limit) - get_tree(commit_id).count_recursive(limit) + tree = get_tree(commit_id) + tree_count = 0 + count = 0 + stack = [tree.each] + + while !stack.empty? + begin + entry = stack.last.next + rescue StopIteration + stack.pop + next + end + + if entry[:type] == :tree + tree_count += 1 + return limit if tree_count >= limit + subtree = @rugged.lookup(entry[:oid]) + stack.push(subtree.each) + else + count += 1 + return limit if count >= limit + end + end + + count end def set_attribute_source(commit_id) From 8a6f0850bd3ddcd1ea5446fe573466a626b1da00 Mon Sep 17 00:00:00 2001 From: Jason Orendorff Date: Thu, 15 Jan 2026 12:40:29 -0800 Subject: [PATCH 2/4] Add tests --- test/test_repository.rb | 61 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/test/test_repository.rb b/test/test_repository.rb index 81940566d9..ef247d6f7e 100644 --- a/test/test_repository.rb +++ b/test/test_repository.rb @@ -230,3 +230,64 @@ def diff(old_commit, new_commit) Diff.new end end + +################################################################################ + +class TestGetTreeSize < Minitest::Test + def test_get_tree_size_normal_repo + rugged = Rugged::Repository.new(File.expand_path("../../.git", __FILE__)) + source = Linguist::Repository.new(rugged, rugged.head.target_id) + + # With a high limit, should return the actual blob count + size = source.repository.get_tree_size(rugged.head.target_id, 100_000) + assert size > 0 + assert size < 100_000 + + # With a low limit, should return the limit + assert_equal 10, source.repository.get_tree_size(rugged.head.target_id, 10) + end + + def test_get_tree_size_pathological_repo + # Create a minimal git bomb in a temp directory + Dir.mktmpdir("git-bomb-test") do |dir| + # Initialize repo + system("git", "-C", dir, "init", "-q", out: File::NULL, err: File::NULL) + system("git", "-C", dir, "config", "user.email", "test@test.com") + system("git", "-C", dir, "config", "user.name", "Test") + + # Create a blob + blob_sha = IO.popen(["git", "-C", dir, "hash-object", "-w", "--stdin"], "r+") do |io| + io.write("content") + io.close_write + io.read.strip + end + + # Create deeply nested trees, 1000 levels deep + current_sha = blob_sha + current_mode = "100644" # blob mode + 1000.times do |i| + # Tree entry format: " \0<20-byte SHA>" + entry = "#{current_mode} entry\0#{[current_sha].pack('H*')}" + current_sha = IO.popen(["git", "-C", dir, "hash-object", "-t", "tree", "-w", "--stdin"], "r+b") do |io| + io.write(entry) + io.close_write + io.read.strip + end + current_mode = "40000" # tree mode + end + + # Create commit + commit_content = "tree #{current_sha}\nauthor Test 0 +0000\ncommitter Test 0 +0000\n\ntest" + commit_sha = IO.popen(["git", "-C", dir, "hash-object", "-t", "commit", "-w", "--stdin"], "r+") do |io| + io.write(commit_content) + io.close_write + io.read.strip + end + + # With limit of 100, should hit tree limit quickly (1000 trees > 100) + rugged = Rugged::Repository.new(dir) + source = Linguist::Repository.new(rugged, commit_sha) + assert_equal 100, source.repository.get_tree_size(commit_sha, 100) + end + end +end From 2f96168709ffcb522849f4762b7dc81fb333ca67 Mon Sep 17 00:00:00 2001 From: Jason Orendorff Date: Thu, 22 Jan 2026 08:24:18 -0600 Subject: [PATCH 3/4] Improve test (and make it run much faster) The previous test created 1000 git trees, taking 11s. This creates 32 in <0.5s. This now creates an actual git bomb, if you cloned the repo you'd get 2^32-1 directories. --- test/test_repository.rb | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/test/test_repository.rb b/test/test_repository.rb index ef247d6f7e..e8140831c9 100644 --- a/test/test_repository.rb +++ b/test/test_repository.rb @@ -255,25 +255,21 @@ def test_get_tree_size_pathological_repo system("git", "-C", dir, "config", "user.email", "test@test.com") system("git", "-C", dir, "config", "user.name", "Test") - # Create a blob - blob_sha = IO.popen(["git", "-C", dir, "hash-object", "-w", "--stdin"], "r+") do |io| - io.write("content") - io.close_write - io.read.strip - end - - # Create deeply nested trees, 1000 levels deep - current_sha = blob_sha - current_mode = "100644" # blob mode - 1000.times do |i| - # Tree entry format: " \0<20-byte SHA>" - entry = "#{current_mode} entry\0#{[current_sha].pack('H*')}" + # Create git bomb, 2^32-1 directories, no files + current_sha = nil + mode = "40000" # tree mode + 32.times do |i| current_sha = IO.popen(["git", "-C", dir, "hash-object", "-t", "tree", "-w", "--stdin"], "r+b") do |io| - io.write(entry) + if current_sha then + # Tree entry format: " \0<20-byte SHA>" + sha = [current_sha].pack('H*') + entry0 = "#{mode} entry0\0#{sha}" + entry1 = "#{mode} entry1\0#{sha}" + io.write(entry0 + entry1) + end io.close_write io.read.strip end - current_mode = "40000" # tree mode end # Create commit @@ -284,10 +280,10 @@ def test_get_tree_size_pathological_repo io.read.strip end - # With limit of 100, should hit tree limit quickly (1000 trees > 100) + # Should hit tree limit quickly (2^32 trees > 500) rugged = Rugged::Repository.new(dir) source = Linguist::Repository.new(rugged, commit_sha) - assert_equal 100, source.repository.get_tree_size(commit_sha, 100) + assert_equal 500, source.repository.get_tree_size(commit_sha, 500) end end end From 7aa2ec90e7beabbdfccd40eeae42069454dd4ce1 Mon Sep 17 00:00:00 2001 From: Jason Orendorff Date: Thu, 22 Jan 2026 08:25:19 -0600 Subject: [PATCH 4/4] Simplify get_tree_size --- lib/linguist/source/rugged.rb | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/lib/linguist/source/rugged.rb b/lib/linguist/source/rugged.rb index 90981ed269..d6cbab1f67 100644 --- a/lib/linguist/source/rugged.rb +++ b/lib/linguist/source/rugged.rb @@ -44,28 +44,19 @@ def initialize(rugged) end def get_tree_size(commit_id, limit) - tree = get_tree(commit_id) tree_count = 0 count = 0 - stack = [tree.each] - - while !stack.empty? - begin - entry = stack.last.next - rescue StopIteration - stack.pop - next - end - if entry[:type] == :tree - tree_count += 1 - return limit if tree_count >= limit - subtree = @rugged.lookup(entry[:oid]) - stack.push(subtree.each) - else + get_tree(commit_id).walk(:preorder) do |root, entry| + case entry[:type] + when :blob count += 1 return limit if count >= limit + when :tree + tree_count += 1 + return limit if tree_count >= limit end + true end count