Skip to content

Commit

Permalink
Remove the gitPython dependency and use github zip link (#3163)
Browse files Browse the repository at this point in the history
  • Loading branch information
krrish-sehgal authored Dec 31, 2024
1 parent 2af5a8a commit fe1fb8f
Show file tree
Hide file tree
Showing 6 changed files with 281 additions and 296 deletions.
1 change: 1 addition & 0 deletions blt/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -625,6 +625,7 @@
"BACKEND": "channels_redis.core.RedisChannelLayer",
"CONFIG": {
"hosts": [os.environ.get("REDISCLOUD_URL")],
# "hosts": [("127.0.0.1", 6379)],
},
},
}
454 changes: 182 additions & 272 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ django-redis = "^5.4.0"
uvicorn = "^0.34.0"
channels = "^4.2.0"
channels-redis = "^4.2.1"
gitpython = "^3.1.43"
async-timeout = "^5.0.1"

[tool.poetry.group.dev.dependencies]
black = "^24.8.0"
Expand Down
86 changes: 64 additions & 22 deletions website/consumers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,20 @@
import difflib
import json
import os
import subprocess
import tempfile
import zipfile
from pathlib import Path

import aiohttp
from channels.generic.websocket import AsyncWebsocketConsumer
from git import Repo

from website.utils import (
compare_model_fields,
cosine_similarity,
extract_django_models,
extract_function_signatures_and_content,
generate_embedding,
git_url_to_zip_url,
)


Expand Down Expand Up @@ -46,6 +48,8 @@ async def receive(self, text_data):
type2 = data.get("type2") # 'github' or 'zip'
repo1 = data.get("repo1") # GitHub URL or ZIP file path
repo2 = data.get("repo2") # GitHub URL or ZIP file path
branch1 = data.get("branch1") # Branch name for the first repository
branch2 = data.get("branch2") # Branch name for the second repository

if not repo1 or not repo2 or not type1 or not type2:
await self.send(
Expand All @@ -58,12 +62,14 @@ async def receive(self, text_data):
return

try:
# Create a temporary directory for repository processing
temp_dir = tempfile.mkdtemp()

# Download or extract the repositories
repo1_path = await self.download_or_extract(repo1, type1, temp_dir, "repo1")
repo2_path = await self.download_or_extract(repo2, type2, temp_dir, "repo2")

zip_repo1 = git_url_to_zip_url(repo1, branch1)
zip_repo2 = git_url_to_zip_url(repo2, branch2)
repo1_path = await self.download_or_extract(zip_repo1, type1, temp_dir, "repo1")
repo2_path = await self.download_or_extract(zip_repo2, type2, temp_dir, "repo2")

# Process similarity analysis
matching_details = await self.run_similarity_analysis(repo1_path, repo2_path)
Expand All @@ -88,7 +94,10 @@ async def receive(self, text_data):
# Handle unexpected errors and send an error message
await self.send(
json.dumps(
{"status": "error", "error": "Please check the repositories and try again."}
{
"status": "error",
"error": "Please check the repositories/branches and try again.",
}
)
)
await self.close()
Expand Down Expand Up @@ -125,29 +134,62 @@ async def download_or_extract(self, source, source_type, temp_dir, repo_name):
"""
dest_path = os.path.join(temp_dir, repo_name)
if source_type == "github":
try:
# Clone the GitHub repository
process = await self.clone_github_repo(source, dest_path)
return dest_path
except subprocess.CalledProcessError as e:
# Handle errors during the cloning process
raise Exception(f"Error cloning GitHub repository: {e.stderr.decode('utf-8')}")
except Exception as e:
# General error handling for unexpected issues
raise Exception(f"Unexpected error during GitHub cloning: {str(e)}")
repo_path = await self.download_and_extract_zip(source, temp_dir, repo_name)
return repo_path

elif source_type == "zip":
# Handle ZIP extraction (Add your ZIP handling logic here)
pass
# Assume `repo_url_or_path` is a direct path to a ZIP file
repo_path = await self.extract_zip(source, temp_dir, repo_name)
return repo_path

return dest_path

async def clone_github_repo(self, repo_url, dest_path):
async def download_and_extract_zip(self, zip_url, temp_dir, repo_name):
"""
Downloads and extracts a ZIP file from a URL.
"""
try:
async with aiohttp.ClientSession() as session:
async with session.get(zip_url) as response:
if response.status != 200:
raise Exception(
f"Failed to download ZIP file. Status code: {response.status}"
)

# Extract the ZIP file
zip_file_path = Path(temp_dir) / f"{repo_name}.zip"
with open(zip_file_path, "wb") as zip_file:
zip_data = await response.read()
zip_file.write(zip_data)

# Extract to a directory
extraction_path = Path(temp_dir) / repo_name
try:
with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
zip_ref.extractall(extraction_path)
except zipfile.BadZipFile as e:
raise Exception(f"Failed to extract ZIP file: {e}")

return str(extraction_path)
except Exception as e:
raise

async def extract_zip(self, zip_file_path, temp_dir, repo_name):
"""
Clones a GitHub repository asynchronously.
Extracts a local ZIP file.
Args:
zip_file_path (str): Path to the local ZIP file.
temp_dir (str): Temporary directory to store files.
repo_name (str): Repository identifier.
Returns:
str: Path to the extracted contents.
"""
loop = asyncio.get_event_loop()
await loop.run_in_executor(None, Repo.clone_from, repo_url, dest_path)
extraction_path = Path(temp_dir) / repo_name
with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
zip_ref.extractall(extraction_path)
return str(extraction_path)

def process_similarity_analysis(self, repo1_path, repo2_path):
"""
Expand Down
25 changes: 24 additions & 1 deletion website/templates/similarity.html
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,14 @@ <h1>Similarity Check</h1>
<option value="github">GitHub</option>
</select>
</div>
<div>
<input id="branch1"
name="branch1"
type="text"
placeholder="Enter the branch name"
class="border border-gray-300 p-2 rounded-md w-full"
required>
</div>
</div>
<div class="flex gap-4 mt-4">
<div>
Expand All @@ -144,6 +152,14 @@ <h1>Similarity Check</h1>
<option value="github">GitHub</option>
</select>
</div>
<div>
<input id="branch2"
name="branch2"
type="text"
placeholder="Enter the branch name"
class="border border-gray-300 p-2 rounded-md w-full"
required>
</div>
</div>
<button type="button" id="fetchReports" class="btn">Fetch Reports</button>
<button type="button" id="cancelRequest" class="btn">Cancel</button>
Expand Down Expand Up @@ -237,8 +253,12 @@ <h2>Results</h2>

const repo1 = document.getElementById("repo1").value;
const type1 = document.getElementById("type1").value;
const branch1 = document.getElementById("branch1").value;

const repo2 = document.getElementById("repo2").value;
const type2 = document.getElementById("type2").value;
const branch2 = document.getElementById("branch2").value;


// Validate input
if (!repo1 || !repo2) {
Expand All @@ -257,7 +277,10 @@ <h2>Results</h2>
repo1: repo1,
type1: type1,
repo2: repo2,
type2: type2
type2: type2,
branch1: branch1,
branch2: branch2

}));
} catch (error) {

Expand Down
9 changes: 9 additions & 0 deletions website/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,3 +469,12 @@ def compare_model_fields(model1, model2):
"field_comparison_details": field_comparison_details,
"overall_field_similarity": round(overall_field_similarity, 2),
}


def git_url_to_zip_url(git_url, branch="master"):
if git_url.endswith(".git"):
base_url = git_url[:-4]
zip_url = f"{base_url}/archive/refs/heads/{branch}.zip"
return zip_url
else:
raise ValueError("Invalid .git URL provided")

0 comments on commit fe1fb8f

Please sign in to comment.