Skip to content

Commit

Permalink
Capture a relationship between branch and branch commits (#6)
Browse files Browse the repository at this point in the history
One commit can also be related to multiple branches.

This does slow down the sync process, so only syncing for default branch. Still slow after that,
so can wait to introduce this
  • Loading branch information
shayonj authored Dec 3, 2023
1 parent 14d442d commit d5c3a86
Show file tree
Hide file tree
Showing 5 changed files with 120 additions and 31 deletions.
26 changes: 14 additions & 12 deletions lib/branch_base/database.rb
Original file line number Diff line number Diff line change
Expand Up @@ -43,21 +43,20 @@ def setup_schema
FOREIGN KEY (repo_id) REFERENCES repositories (repo_id)
);
CREATE INDEX IF NOT EXISTS idx_commits_repo_id ON commits (repo_id);
CREATE INDEX IF NOT EXISTS idx_commits_author ON commits (author);
CREATE INDEX IF NOT EXISTS idx_commits_committer ON commits (committer);
CREATE TABLE IF NOT EXISTS branches (
branch_id INTEGER PRIMARY KEY AUTOINCREMENT,
repo_id INTEGER NOT NULL,
name TEXT NOT NULL,
head_commit TEXT NOT NULL,
FOREIGN KEY (repo_id) REFERENCES repositories (repo_id),
FOREIGN KEY (head_commit) REFERENCES commits (commit_hash)
FOREIGN KEY (repo_id) REFERENCES repositories (repo_id)
);
CREATE INDEX IF NOT EXISTS idx_branches_repo_id ON branches (repo_id);
CREATE INDEX IF NOT EXISTS idx_branches_head_commit ON branches (head_commit);
CREATE TABLE IF NOT EXISTS branch_commits (
branch_id INTEGER NOT NULL,
commit_hash TEXT NOT NULL,
PRIMARY KEY (branch_id, commit_hash),
FOREIGN KEY (branch_id) REFERENCES branches (branch_id),
FOREIGN KEY (commit_hash) REFERENCES commits (commit_hash)
);
CREATE TABLE IF NOT EXISTS files (
file_id INTEGER PRIMARY KEY AUTOINCREMENT,
Expand All @@ -68,9 +67,6 @@ def setup_schema
FOREIGN KEY (latest_commit) REFERENCES commits (commit_hash)
);
CREATE INDEX IF NOT EXISTS idx_files_repo_id ON files (repo_id);
CREATE INDEX IF NOT EXISTS idx_files_file_path ON files (file_path);
CREATE TABLE IF NOT EXISTS commit_files (
commit_hash TEXT NOT NULL,
file_id INTEGER NOT NULL,
Expand All @@ -88,6 +84,12 @@ def setup_schema
FOREIGN KEY (parent_hash) REFERENCES commits (commit_hash)
);
CREATE INDEX IF NOT EXISTS idx_commits_repo_id ON commits (repo_id);
CREATE INDEX IF NOT EXISTS idx_commits_author ON commits (author);
CREATE INDEX IF NOT EXISTS idx_commits_committer ON commits (committer);
CREATE INDEX IF NOT EXISTS idx_branches_repo_id ON branches (repo_id);
CREATE INDEX IF NOT EXISTS idx_files_repo_id ON files (repo_id);
CREATE INDEX IF NOT EXISTS idx_files_file_path ON files (file_path);
CREATE INDEX IF NOT EXISTS idx_commit_parents_commit_hash ON commit_parents (commit_hash);
CREATE INDEX IF NOT EXISTS idx_commit_parents_parent_hash ON commit_parents (parent_hash);
SQL
Expand Down
24 changes: 22 additions & 2 deletions lib/branch_base/repository.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,32 @@

module BranchBase
class Repository
attr_reader :repo

def initialize(repo_path)
@repo = Rugged::Repository.new(repo_path)
end

def walk(&block)
@repo.walk(@repo.head.target.oid, Rugged::SORT_TOPO, &block)
def walk(branch_name = nil, &block)
# Use the provided branch's head commit OID if a branch name is given,
# otherwise, use the repository's HEAD commit OID.
oid =
if branch_name
branch = @repo.branches[branch_name]
raise ArgumentError, "Branch not found: #{branch_name}" unless branch
branch.target.oid
else
@repo.head.target.oid
end

@repo.walk(oid, Rugged::SORT_TOPO, &block)
end

def default_branch_name
head_ref = @repo.head.name
head_ref.sub(%r{^refs/heads/}, "")
rescue Rugged::ReferenceError
nil
end

def path
Expand Down
54 changes: 37 additions & 17 deletions lib/branch_base/sync.rb
Original file line number Diff line number Diff line change
Expand Up @@ -39,32 +39,52 @@ def sync_repository
end

def sync_branches(repo_id)
BranchBase.logger.debug(
"Syncing branches for repository ID: #{@repo.path}",
)
BranchBase.logger.debug("Syncing branches for repository ID: #{repo_id}")

batched_branches = []
default_branch_name = @repo.default_branch_name
return unless default_branch_name

@repo.branches.each do |branch|
next if branch.name.nil? || branch.target.nil?

commit_oid =
(
if branch.target.respond_to?(:oid)
branch.target.oid
else
branch.target.target.oid
end
)
batched_branches << [repo_id, branch.name, commit_oid]
branch_id = insert_branch(repo_id, branch.name)

if batched_branches.size >= BATCH_SIZE
insert_branches(batched_branches)
batched_branches.clear
if branch.name == default_branch_name
insert_branch_commits(branch_id, branch)
end
end
end

def insert_branch(repo_id, branch_name)
existing_branch_id =
@db.execute(
"SELECT branch_id FROM branches WHERE name = ? AND repo_id = ?",
[branch_name, repo_id],
).first
return existing_branch_id[0] if existing_branch_id

insert_branches(batched_branches) unless batched_branches.empty?
@db.execute(
"INSERT INTO branches (repo_id, name) VALUES (?, ?)",
[repo_id, branch_name],
)
@db.last_insert_row_id
end

def insert_branch_commits(branch_id, branch)
BranchBase.logger.debug("Syncing branch commits for: #{branch.name}")

head_commit = branch.target
walker = Rugged::Walker.new(@repo.repo)
walker.push(head_commit)

walker.each do |commit|
next if commit_exists?(commit.oid)

@db.execute(
"INSERT OR IGNORE INTO branch_commits (branch_id, commit_hash) VALUES (?, ?)",
[branch_id, commit.oid],
)
end
end

def sync_commits(repo_id)
Expand Down
1 change: 1 addition & 0 deletions spec/branch_base/database_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
commit_files
commit_parents
sqlite_sequence
branch_commits
]
expect(tables.flatten).to match_array(expected_tables)
end
Expand Down
46 changes: 46 additions & 0 deletions spec/branch_base/sync_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -105,4 +105,50 @@
end
end
end

describe "#sync_branch_commits" do
it "associates commits only with the default branch" do
sync.sync_branches(@repo_id)
sync.sync_commits(@repo_id)

default_branch_name = repo.default_branch_name
default_branch_id =
db
.execute(
"SELECT branch_id FROM branches WHERE repo_id = ? AND name = ?",
[@repo_id, default_branch_name],
)
.first
&.first

git_commits = []
repo.walk(default_branch_name) { |commit| git_commits << commit.oid }

db_commit_hashes =
db.execute(
"SELECT commit_hash FROM branch_commits WHERE branch_id = ?",
default_branch_id,
).flatten

expect(git_commits.size).to eq(db_commit_hashes.size)
git_commits.each do |commit_oid|
expect(db_commit_hashes).to include(commit_oid)
end

other_branch_ids =
db.execute(
"SELECT branch_id FROM branches WHERE repo_id = ? AND name != ?",
[@repo_id, default_branch_name],
).flatten

other_branch_ids.each do |branch_id|
other_branch_commit_hashes =
db.execute(
"SELECT commit_hash FROM branch_commits WHERE branch_id = ?",
branch_id,
).flatten
expect(other_branch_commit_hashes).to be_empty
end
end
end
end

0 comments on commit d5c3a86

Please sign in to comment.