diff --git a/lib/branch_base/database.rb b/lib/branch_base/database.rb index 517edd9..3ed50a3 100644 --- a/lib/branch_base/database.rb +++ b/lib/branch_base/database.rb @@ -43,21 +43,20 @@ def setup_schema FOREIGN KEY (repo_id) REFERENCES repositories (repo_id) ); - CREATE INDEX IF NOT EXISTS idx_commits_repo_id ON commits (repo_id); - CREATE INDEX IF NOT EXISTS idx_commits_author ON commits (author); - CREATE INDEX IF NOT EXISTS idx_commits_committer ON commits (committer); - CREATE TABLE IF NOT EXISTS branches ( branch_id INTEGER PRIMARY KEY AUTOINCREMENT, repo_id INTEGER NOT NULL, name TEXT NOT NULL, - head_commit TEXT NOT NULL, - FOREIGN KEY (repo_id) REFERENCES repositories (repo_id), - FOREIGN KEY (head_commit) REFERENCES commits (commit_hash) + FOREIGN KEY (repo_id) REFERENCES repositories (repo_id) ); - CREATE INDEX IF NOT EXISTS idx_branches_repo_id ON branches (repo_id); - CREATE INDEX IF NOT EXISTS idx_branches_head_commit ON branches (head_commit); + CREATE TABLE IF NOT EXISTS branch_commits ( + branch_id INTEGER NOT NULL, + commit_hash TEXT NOT NULL, + PRIMARY KEY (branch_id, commit_hash), + FOREIGN KEY (branch_id) REFERENCES branches (branch_id), + FOREIGN KEY (commit_hash) REFERENCES commits (commit_hash) + ); CREATE TABLE IF NOT EXISTS files ( file_id INTEGER PRIMARY KEY AUTOINCREMENT, @@ -68,9 +67,6 @@ def setup_schema FOREIGN KEY (latest_commit) REFERENCES commits (commit_hash) ); - CREATE INDEX IF NOT EXISTS idx_files_repo_id ON files (repo_id); - CREATE INDEX IF NOT EXISTS idx_files_file_path ON files (file_path); - CREATE TABLE IF NOT EXISTS commit_files ( commit_hash TEXT NOT NULL, file_id INTEGER NOT NULL, @@ -88,6 +84,12 @@ def setup_schema FOREIGN KEY (parent_hash) REFERENCES commits (commit_hash) ); + CREATE INDEX IF NOT EXISTS idx_commits_repo_id ON commits (repo_id); + CREATE INDEX IF NOT EXISTS idx_commits_author ON commits (author); + CREATE INDEX IF NOT EXISTS idx_commits_committer ON commits (committer); + CREATE INDEX IF NOT EXISTS idx_branches_repo_id ON branches (repo_id); + CREATE INDEX IF NOT EXISTS idx_files_repo_id ON files (repo_id); + CREATE INDEX IF NOT EXISTS idx_files_file_path ON files (file_path); CREATE INDEX IF NOT EXISTS idx_commit_parents_commit_hash ON commit_parents (commit_hash); CREATE INDEX IF NOT EXISTS idx_commit_parents_parent_hash ON commit_parents (parent_hash); SQL diff --git a/lib/branch_base/repository.rb b/lib/branch_base/repository.rb index 2f789de..e39c7d4 100644 --- a/lib/branch_base/repository.rb +++ b/lib/branch_base/repository.rb @@ -4,12 +4,32 @@ module BranchBase class Repository + attr_reader :repo + def initialize(repo_path) @repo = Rugged::Repository.new(repo_path) end - def walk(&block) - @repo.walk(@repo.head.target.oid, Rugged::SORT_TOPO, &block) + def walk(branch_name = nil, &block) + # Use the provided branch's head commit OID if a branch name is given, + # otherwise, use the repository's HEAD commit OID. + oid = + if branch_name + branch = @repo.branches[branch_name] + raise ArgumentError, "Branch not found: #{branch_name}" unless branch + branch.target.oid + else + @repo.head.target.oid + end + + @repo.walk(oid, Rugged::SORT_TOPO, &block) + end + + def default_branch_name + head_ref = @repo.head.name + head_ref.sub(%r{^refs/heads/}, "") + rescue Rugged::ReferenceError + nil end def path diff --git a/lib/branch_base/sync.rb b/lib/branch_base/sync.rb index 4e581cd..7d638d4 100644 --- a/lib/branch_base/sync.rb +++ b/lib/branch_base/sync.rb @@ -39,32 +39,52 @@ def sync_repository end def sync_branches(repo_id) - BranchBase.logger.debug( - "Syncing branches for repository ID: #{@repo.path}", - ) + BranchBase.logger.debug("Syncing branches for repository ID: #{repo_id}") - batched_branches = [] + default_branch_name = @repo.default_branch_name + return unless default_branch_name @repo.branches.each do |branch| next if branch.name.nil? || branch.target.nil? - commit_oid = - ( - if branch.target.respond_to?(:oid) - branch.target.oid - else - branch.target.target.oid - end - ) - batched_branches << [repo_id, branch.name, commit_oid] + branch_id = insert_branch(repo_id, branch.name) - if batched_branches.size >= BATCH_SIZE - insert_branches(batched_branches) - batched_branches.clear + if branch.name == default_branch_name + insert_branch_commits(branch_id, branch) end end + end + + def insert_branch(repo_id, branch_name) + existing_branch_id = + @db.execute( + "SELECT branch_id FROM branches WHERE name = ? AND repo_id = ?", + [branch_name, repo_id], + ).first + return existing_branch_id[0] if existing_branch_id - insert_branches(batched_branches) unless batched_branches.empty? + @db.execute( + "INSERT INTO branches (repo_id, name) VALUES (?, ?)", + [repo_id, branch_name], + ) + @db.last_insert_row_id + end + + def insert_branch_commits(branch_id, branch) + BranchBase.logger.debug("Syncing branch commits for: #{branch.name}") + + head_commit = branch.target + walker = Rugged::Walker.new(@repo.repo) + walker.push(head_commit) + + walker.each do |commit| + next if commit_exists?(commit.oid) + + @db.execute( + "INSERT OR IGNORE INTO branch_commits (branch_id, commit_hash) VALUES (?, ?)", + [branch_id, commit.oid], + ) + end end def sync_commits(repo_id) diff --git a/spec/branch_base/database_spec.rb b/spec/branch_base/database_spec.rb index c2733b5..91accbc 100644 --- a/spec/branch_base/database_spec.rb +++ b/spec/branch_base/database_spec.rb @@ -16,6 +16,7 @@ commit_files commit_parents sqlite_sequence + branch_commits ] expect(tables.flatten).to match_array(expected_tables) end diff --git a/spec/branch_base/sync_spec.rb b/spec/branch_base/sync_spec.rb index 49cc123..b29a5f9 100644 --- a/spec/branch_base/sync_spec.rb +++ b/spec/branch_base/sync_spec.rb @@ -105,4 +105,50 @@ end end end + + describe "#sync_branch_commits" do + it "associates commits only with the default branch" do + sync.sync_branches(@repo_id) + sync.sync_commits(@repo_id) + + default_branch_name = repo.default_branch_name + default_branch_id = + db + .execute( + "SELECT branch_id FROM branches WHERE repo_id = ? AND name = ?", + [@repo_id, default_branch_name], + ) + .first + &.first + + git_commits = [] + repo.walk(default_branch_name) { |commit| git_commits << commit.oid } + + db_commit_hashes = + db.execute( + "SELECT commit_hash FROM branch_commits WHERE branch_id = ?", + default_branch_id, + ).flatten + + expect(git_commits.size).to eq(db_commit_hashes.size) + git_commits.each do |commit_oid| + expect(db_commit_hashes).to include(commit_oid) + end + + other_branch_ids = + db.execute( + "SELECT branch_id FROM branches WHERE repo_id = ? AND name != ?", + [@repo_id, default_branch_name], + ).flatten + + other_branch_ids.each do |branch_id| + other_branch_commit_hashes = + db.execute( + "SELECT commit_hash FROM branch_commits WHERE branch_id = ?", + branch_id, + ).flatten + expect(other_branch_commit_hashes).to be_empty + end + end + end end