diff --git a/lib/linguist/source/rugged.rb b/lib/linguist/source/rugged.rb index b246009b16..d6cbab1f67 100644 --- a/lib/linguist/source/rugged.rb +++ b/lib/linguist/source/rugged.rb @@ -44,7 +44,22 @@ def initialize(rugged) end def get_tree_size(commit_id, limit) - get_tree(commit_id).count_recursive(limit) + tree_count = 0 + count = 0 + + get_tree(commit_id).walk(:preorder) do |root, entry| + case entry[:type] + when :blob + count += 1 + return limit if count >= limit + when :tree + tree_count += 1 + return limit if tree_count >= limit + end + true + end + + count end def set_attribute_source(commit_id) diff --git a/test/test_repository.rb b/test/test_repository.rb index 81940566d9..e8140831c9 100644 --- a/test/test_repository.rb +++ b/test/test_repository.rb @@ -230,3 +230,60 @@ def diff(old_commit, new_commit) Diff.new end end + +################################################################################ + +class TestGetTreeSize < Minitest::Test + def test_get_tree_size_normal_repo + rugged = Rugged::Repository.new(File.expand_path("../../.git", __FILE__)) + source = Linguist::Repository.new(rugged, rugged.head.target_id) + + # With a high limit, should return the actual blob count + size = source.repository.get_tree_size(rugged.head.target_id, 100_000) + assert size > 0 + assert size < 100_000 + + # With a low limit, should return the limit + assert_equal 10, source.repository.get_tree_size(rugged.head.target_id, 10) + end + + def test_get_tree_size_pathological_repo + # Create a minimal git bomb in a temp directory + Dir.mktmpdir("git-bomb-test") do |dir| + # Initialize repo + system("git", "-C", dir, "init", "-q", out: File::NULL, err: File::NULL) + system("git", "-C", dir, "config", "user.email", "test@test.com") + system("git", "-C", dir, "config", "user.name", "Test") + + # Create git bomb, 2^32-1 directories, no files + current_sha = nil + mode = "40000" # tree mode + 32.times do |i| + current_sha = IO.popen(["git", "-C", dir, "hash-object", "-t", "tree", "-w", "--stdin"], "r+b") do |io| + if current_sha then + # Tree entry format: " \0<20-byte SHA>" + sha = [current_sha].pack('H*') + entry0 = "#{mode} entry0\0#{sha}" + entry1 = "#{mode} entry1\0#{sha}" + io.write(entry0 + entry1) + end + io.close_write + io.read.strip + end + end + + # Create commit + commit_content = "tree #{current_sha}\nauthor Test 0 +0000\ncommitter Test 0 +0000\n\ntest" + commit_sha = IO.popen(["git", "-C", dir, "hash-object", "-t", "commit", "-w", "--stdin"], "r+") do |io| + io.write(commit_content) + io.close_write + io.read.strip + end + + # Should hit tree limit quickly (2^32 trees > 500) + rugged = Rugged::Repository.new(dir) + source = Linguist::Repository.new(rugged, commit_sha) + assert_equal 500, source.repository.get_tree_size(commit_sha, 500) + end + end +end