Skip to content

Commit

Permalink
Fixes, optimizations and pretty up
Browse files Browse the repository at this point in the history
  • Loading branch information
shayonj committed Dec 3, 2023
1 parent ae79034 commit f9e7f65
Show file tree
Hide file tree
Showing 12 changed files with 90 additions and 95 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,5 @@

node_modules

.db
*.db
.DS_Store
14 changes: 9 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

You can now easily run, any kind of analytics on your Git directory using the SQLite database.

![plot](./internal/screenshot.png)

## Features ✨

- Synchronize Git repository data into a SQLite database.
Expand Down Expand Up @@ -64,11 +66,13 @@ Once your repository data is synchronized into a SQLite database, you can run va
6. **Authors Who Have Worked on a Specific File**

```sql
SELECT DISTINCT commits.author
FROM commits
JOIN commit_files ON commits.commit_hash = commit_files.commit_hash
JOIN files ON commit_files.file_id = files.file_id
WHERE files.file_path like '%sqlite3%'
SELECT files.file_path, commits.author, COUNT(*) as times_contributed
FROM commits
JOIN commit_files ON commits.commit_hash = commit_files.commit_hash
JOIN files ON commit_files.file_id = files.file_id
WHERE files.file_path LIKE '%connection_adapters/sqlite%'
GROUP BY files.file_path, commits.author
ORDER BY times_contributed DESC;
```

## Installation 📥
Expand Down
Binary file added internal/screenshot.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added internal/screenshot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
16 changes: 16 additions & 0 deletions lib/branch_base.rb
Original file line number Diff line number Diff line change
@@ -1,9 +1,25 @@
# frozen_string_literal: true

require "logger"
require "branch_base/database"
require "branch_base/repository"
require "branch_base/sync"
require "branch_base/cli"

module BranchBase
def self.logger
@logger ||=
Logger
.new($stdout)
.tap do |log|
log.progname = "BranchBase"

log.level = ENV["DEBUG"] ? Logger::DEBUG : Logger::INFO

log.formatter =
proc do |severity, datetime, progname, msg|
"#{datetime}: #{severity} - #{progname}: #{msg}\n"
end
end
end
end
18 changes: 13 additions & 5 deletions lib/branch_base/cli.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,33 @@

module BranchBase
class CLI < Thor
desc "sync REPO_PATH [BRANCH_OR_TAG]",
"Synchronize a specific branch or tag of the Git repository with the SQLite database"
desc "sync REPO_PATH", "Synchronize a Git directory to a SQLite database"
def sync(repo_path)
BranchBase.logger.info("Starting sync process for #{repo_path}...")

full_repo_path = File.expand_path(repo_path)

unless File.directory?(File.join(full_repo_path, ".git"))
puts "The specified path is not a valid Git repository: #{full_repo_path}"
BranchBase.logger.error(
"The specified path is not a valid Git repository: #{full_repo_path}"
)
exit(1)
end

repo_name = File.basename(full_repo_path)
db_filename = "#{repo_name}_git_data.db"
db_directory = full_repo_path
db_filename = File.join(db_directory, "#{repo_name}_git_data.db")

database = Database.new(db_filename)
repository = Repository.new(full_repo_path)
start_time = Time.now
sync = Sync.new(database, repository)

sync.run
puts "Repository data synced successfully for"
elapsed_time = Time.now - start_time
BranchBase.logger.info(
"Repository data synced successfully in #{db_filename} in #{elapsed_time.round(2)} seconds"
)
end
end
end
54 changes: 38 additions & 16 deletions lib/branch_base/sync.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,22 @@ def sync_repository
existing_repo_id =
@db.execute(
"SELECT repo_id FROM repositories WHERE url = ?",
[repo_path],
[repo_path]
).first
return existing_repo_id[0] if existing_repo_id

@db.execute(
"INSERT INTO repositories (name, url) VALUES (?, ?)",
[repo_name, repo_path],
[repo_name, repo_path]
)
@db.last_insert_row_id
end

def sync_branches(repo_id)
BranchBase.logger.debug(
"Syncing branches for repository ID: #{@repo.path}"
)

batched_branches = []

@repo.branches.each do |branch|
Expand Down Expand Up @@ -75,7 +79,7 @@ def sync_commits(repo_id)
commit.author[:name],
commit.committer[:name],
commit.message,
commit.time.to_s,
commit.time.to_s
]

if batched_commits.size >= BATCH_SIZE
Expand Down Expand Up @@ -105,7 +109,7 @@ def sync_commits(repo_id)
def commit_exists?(commit_hash)
@db.execute(
"SELECT COUNT(*) FROM commits WHERE commit_hash = ?",
[commit_hash],
[commit_hash]
).first[
0
].positive?
Expand All @@ -116,21 +120,25 @@ def insert_commit_files(commit, repo_id)
file_path = patch.delta.new_file[:path]
@db.execute(
"INSERT OR IGNORE INTO files (repo_id, file_path, latest_commit) VALUES (?, ?, ?)",
[repo_id, file_path, commit.oid],
[repo_id, file_path, commit.oid]
)
file_id = @db.last_insert_row_id
@db.execute(
"INSERT INTO commit_files (commit_hash, file_id, changes) VALUES (?, ?, ?)",
[commit.oid, file_id, patch.to_s],
[commit.oid, file_id, patch.to_s]
)
end
end

def insert_commit_parents(commit)
BranchBase.logger.debug(
"Inserting parent commits for repository: #{@repo.path}"
)

commit.parent_ids.each do |parent_id|
@db.execute(
"INSERT INTO commit_parents (commit_hash, parent_hash) VALUES (?, ?)",
[commit.oid, parent_id],
[commit.oid, parent_id]
)
end
end
Expand All @@ -140,20 +148,34 @@ def insert_branches(batched_branches)
batched_branches.each do |data|
@db.execute(
"INSERT OR IGNORE INTO branches (repo_id, name, head_commit) VALUES (?, ?, ?)",
data,
data
)
end
end
end

def insert_commits(batched_commits)
BranchBase.logger.debug(
"Inserting commits for repository ID: #{@repo.path}"
)

return if batched_commits.empty?

@db.transaction do
batched_commits.each do |data|
@db.execute(
"INSERT INTO commits (commit_hash, repo_id, author, committer, message, timestamp) VALUES (?, ?, ?, ?, ?, ?)",
data,
)
end
values_clause =
batched_commits
.map do |commit_data|
"(#{commit_data.map { |value| "'#{value}'" }.join(", ")})"
end
.join(", ")

sql = <<~SQL
INSERT INTO commits
(commit_hash, repo_id, author, committer, message, timestamp)
VALUES #{values_clause};
SQL

@db.execute(sql)
end
end

Expand All @@ -163,12 +185,12 @@ def insert_files_and_commit_files(batched_data)
repo_id, file_path, commit_hash, changes = data
@db.execute(
"INSERT OR IGNORE INTO files (repo_id, file_path, latest_commit) VALUES (?, ?, ?)",
[repo_id, file_path, commit_hash],
[repo_id, file_path, commit_hash]
)
file_id = @db.last_insert_row_id
@db.execute(
"INSERT INTO commit_files (commit_hash, file_id, changes) VALUES (?, ?, ?)",
[commit_hash, file_id, changes],
[commit_hash, file_id, changes]
)
end
end
Expand Down
52 changes: 0 additions & 52 deletions spec/branch_base/cli_spec.rb

This file was deleted.

9 changes: 4 additions & 5 deletions spec/branch_base/database_spec.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# frozen_string_literal: true
require "branch_base/database"
require "rspec"
require "spec_helper"

RSpec.describe(BranchBase::Database) do
let(:database) { BranchBase::Database.new(":memory:") }
Expand All @@ -27,7 +26,7 @@
result =
database.execute(
"INSERT INTO repositories (name, url) VALUES (?, ?)",
%w[mock_repo mock_repo/],
%w[mock_repo mock_repo/]
)
expect(result).to be_empty
end
Expand All @@ -47,7 +46,7 @@
database.transaction do
database.execute(
"INSERT INTO repositories (name, url) VALUES (?, ?)",
%w[mock_repo mock_repo/],
%w[mock_repo mock_repo/]
)
raise "Rollback transaction"
end
Expand All @@ -62,7 +61,7 @@
it "returns the last insert row ID" do
database.execute(
"INSERT INTO repositories (name, url) VALUES (?, ?)",
%w[mock_repo mock_repo/],
%w[mock_repo mock_repo/]
)
expect(database.last_insert_row_id).to be > 0
end
Expand Down
4 changes: 1 addition & 3 deletions spec/branch_base/repository_spec.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# frozen_string_literal: true
require "branch_base/repository"
require "rugged"
require "rspec"
require "spec_helper"
require "test_helper"

RSpec.describe(BranchBase::Repository) do
Expand Down
13 changes: 5 additions & 8 deletions spec/branch_base/sync_spec.rb
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
# frozen_string_literal: true
require "branch_base/sync"
require "branch_base/database"
require "branch_base/repository"
require "test_helper"
require "rspec"
require "spec_helper"

RSpec.describe(BranchBase::Sync) do
let(:db) { BranchBase::Database.new(":memory:") }
Expand All @@ -27,7 +24,7 @@
stored_repo =
db.execute(
"SELECT * FROM repositories WHERE repo_id = ?",
repo_id,
repo_id
).first
expect(stored_repo).not_to be_nil
expect(stored_repo[1]).to eq(File.basename(repo_path))
Expand All @@ -40,7 +37,7 @@
existing_repo_id = sync.sync_repository

expect { sync.sync_repository }.not_to(
change { db.execute("SELECT COUNT(*) FROM repositories").first[0] },
change { db.execute("SELECT COUNT(*) FROM repositories").first[0] }
)

expect(sync.sync_repository).to eq(existing_repo_id)
Expand All @@ -59,7 +56,7 @@
expect(db_branches.size).to eq(git_branches.size)
git_branches.each do |branch_name|
expect(
db_branches.any? { |db_branch| db_branch[2] == branch_name },
db_branches.any? { |db_branch| db_branch[2] == branch_name }
).to be(true)
end
end
Expand All @@ -76,7 +73,7 @@
expect(db_commits.size).to eq(git_commits.size)
git_commits.each do |commit|
expect(
db_commits.any? { |db_commit| db_commit[0] == commit.oid },
db_commits.any? { |db_commit| db_commit[0] == commit.oid }
).to be(true)
end
end
Expand Down
2 changes: 2 additions & 0 deletions spec/spec_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
# the additional setup, and require it from the spec files that actually need
# it.
#
require "branch_base"

# See https://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
RSpec.configure do |config|
# rspec-expectations config goes here. You can use an alternate
Expand Down

0 comments on commit f9e7f65

Please sign in to comment.