Skip to content

Commit fe1fb8f

Browse files
Remove the gitPython dependency and use github zip link (#3163)
1 parent 2af5a8a commit fe1fb8f

File tree

6 files changed

+281
-296
lines changed

6 files changed

+281
-296
lines changed

blt/settings.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,7 @@
625625
"BACKEND": "channels_redis.core.RedisChannelLayer",
626626
"CONFIG": {
627627
"hosts": [os.environ.get("REDISCLOUD_URL")],
628+
# "hosts": [("127.0.0.1", 6379)],
628629
},
629630
},
630631
}

poetry.lock

Lines changed: 182 additions & 272 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ django-redis = "^5.4.0"
8585
uvicorn = "^0.34.0"
8686
channels = "^4.2.0"
8787
channels-redis = "^4.2.1"
88-
gitpython = "^3.1.43"
88+
async-timeout = "^5.0.1"
8989

9090
[tool.poetry.group.dev.dependencies]
9191
black = "^24.8.0"

website/consumers.py

Lines changed: 64 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,20 @@
22
import difflib
33
import json
44
import os
5-
import subprocess
65
import tempfile
6+
import zipfile
7+
from pathlib import Path
78

9+
import aiohttp
810
from channels.generic.websocket import AsyncWebsocketConsumer
9-
from git import Repo
1011

1112
from website.utils import (
1213
compare_model_fields,
1314
cosine_similarity,
1415
extract_django_models,
1516
extract_function_signatures_and_content,
1617
generate_embedding,
18+
git_url_to_zip_url,
1719
)
1820

1921

@@ -46,6 +48,8 @@ async def receive(self, text_data):
4648
type2 = data.get("type2") # 'github' or 'zip'
4749
repo1 = data.get("repo1") # GitHub URL or ZIP file path
4850
repo2 = data.get("repo2") # GitHub URL or ZIP file path
51+
branch1 = data.get("branch1") # Branch name for the first repository
52+
branch2 = data.get("branch2") # Branch name for the second repository
4953

5054
if not repo1 or not repo2 or not type1 or not type2:
5155
await self.send(
@@ -58,12 +62,14 @@ async def receive(self, text_data):
5862
return
5963

6064
try:
61-
# Create a temporary directory for repository processing
6265
temp_dir = tempfile.mkdtemp()
6366

6467
# Download or extract the repositories
65-
repo1_path = await self.download_or_extract(repo1, type1, temp_dir, "repo1")
66-
repo2_path = await self.download_or_extract(repo2, type2, temp_dir, "repo2")
68+
69+
zip_repo1 = git_url_to_zip_url(repo1, branch1)
70+
zip_repo2 = git_url_to_zip_url(repo2, branch2)
71+
repo1_path = await self.download_or_extract(zip_repo1, type1, temp_dir, "repo1")
72+
repo2_path = await self.download_or_extract(zip_repo2, type2, temp_dir, "repo2")
6773

6874
# Process similarity analysis
6975
matching_details = await self.run_similarity_analysis(repo1_path, repo2_path)
@@ -88,7 +94,10 @@ async def receive(self, text_data):
8894
# Handle unexpected errors and send an error message
8995
await self.send(
9096
json.dumps(
91-
{"status": "error", "error": "Please check the repositories and try again."}
97+
{
98+
"status": "error",
99+
"error": "Please check the repositories/branches and try again.",
100+
}
92101
)
93102
)
94103
await self.close()
@@ -125,29 +134,62 @@ async def download_or_extract(self, source, source_type, temp_dir, repo_name):
125134
"""
126135
dest_path = os.path.join(temp_dir, repo_name)
127136
if source_type == "github":
128-
try:
129-
# Clone the GitHub repository
130-
process = await self.clone_github_repo(source, dest_path)
131-
return dest_path
132-
except subprocess.CalledProcessError as e:
133-
# Handle errors during the cloning process
134-
raise Exception(f"Error cloning GitHub repository: {e.stderr.decode('utf-8')}")
135-
except Exception as e:
136-
# General error handling for unexpected issues
137-
raise Exception(f"Unexpected error during GitHub cloning: {str(e)}")
137+
repo_path = await self.download_and_extract_zip(source, temp_dir, repo_name)
138+
return repo_path
138139

139140
elif source_type == "zip":
140-
# Handle ZIP extraction (Add your ZIP handling logic here)
141-
pass
141+
# Assume `repo_url_or_path` is a direct path to a ZIP file
142+
repo_path = await self.extract_zip(source, temp_dir, repo_name)
143+
return repo_path
142144

143145
return dest_path
144146

145-
async def clone_github_repo(self, repo_url, dest_path):
147+
async def download_and_extract_zip(self, zip_url, temp_dir, repo_name):
148+
"""
149+
Downloads and extracts a ZIP file from a URL.
150+
"""
151+
try:
152+
async with aiohttp.ClientSession() as session:
153+
async with session.get(zip_url) as response:
154+
if response.status != 200:
155+
raise Exception(
156+
f"Failed to download ZIP file. Status code: {response.status}"
157+
)
158+
159+
# Extract the ZIP file
160+
zip_file_path = Path(temp_dir) / f"{repo_name}.zip"
161+
with open(zip_file_path, "wb") as zip_file:
162+
zip_data = await response.read()
163+
zip_file.write(zip_data)
164+
165+
# Extract to a directory
166+
extraction_path = Path(temp_dir) / repo_name
167+
try:
168+
with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
169+
zip_ref.extractall(extraction_path)
170+
except zipfile.BadZipFile as e:
171+
raise Exception(f"Failed to extract ZIP file: {e}")
172+
173+
return str(extraction_path)
174+
except Exception as e:
175+
raise
176+
177+
async def extract_zip(self, zip_file_path, temp_dir, repo_name):
146178
"""
147-
Clones a GitHub repository asynchronously.
179+
Extracts a local ZIP file.
180+
181+
Args:
182+
zip_file_path (str): Path to the local ZIP file.
183+
temp_dir (str): Temporary directory to store files.
184+
repo_name (str): Repository identifier.
185+
186+
Returns:
187+
str: Path to the extracted contents.
148188
"""
149-
loop = asyncio.get_event_loop()
150-
await loop.run_in_executor(None, Repo.clone_from, repo_url, dest_path)
189+
extraction_path = Path(temp_dir) / repo_name
190+
with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
191+
zip_ref.extractall(extraction_path)
192+
return str(extraction_path)
151193

152194
def process_similarity_analysis(self, repo1_path, repo2_path):
153195
"""

website/templates/similarity.html

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,14 @@ <h1>Similarity Check</h1>
127127
<option value="github">GitHub</option>
128128
</select>
129129
</div>
130+
<div>
131+
<input id="branch1"
132+
name="branch1"
133+
type="text"
134+
placeholder="Enter the branch name"
135+
class="border border-gray-300 p-2 rounded-md w-full"
136+
required>
137+
</div>
130138
</div>
131139
<div class="flex gap-4 mt-4">
132140
<div>
@@ -144,6 +152,14 @@ <h1>Similarity Check</h1>
144152
<option value="github">GitHub</option>
145153
</select>
146154
</div>
155+
<div>
156+
<input id="branch2"
157+
name="branch2"
158+
type="text"
159+
placeholder="Enter the branch name"
160+
class="border border-gray-300 p-2 rounded-md w-full"
161+
required>
162+
</div>
147163
</div>
148164
<button type="button" id="fetchReports" class="btn">Fetch Reports</button>
149165
<button type="button" id="cancelRequest" class="btn">Cancel</button>
@@ -237,8 +253,12 @@ <h2>Results</h2>
237253

238254
const repo1 = document.getElementById("repo1").value;
239255
const type1 = document.getElementById("type1").value;
256+
const branch1 = document.getElementById("branch1").value;
257+
240258
const repo2 = document.getElementById("repo2").value;
241259
const type2 = document.getElementById("type2").value;
260+
const branch2 = document.getElementById("branch2").value;
261+
242262

243263
// Validate input
244264
if (!repo1 || !repo2) {
@@ -257,7 +277,10 @@ <h2>Results</h2>
257277
repo1: repo1,
258278
type1: type1,
259279
repo2: repo2,
260-
type2: type2
280+
type2: type2,
281+
branch1: branch1,
282+
branch2: branch2
283+
261284
}));
262285
} catch (error) {
263286

website/utils.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -469,3 +469,12 @@ def compare_model_fields(model1, model2):
469469
"field_comparison_details": field_comparison_details,
470470
"overall_field_similarity": round(overall_field_similarity, 2),
471471
}
472+
473+
474+
def git_url_to_zip_url(git_url, branch="master"):
475+
if git_url.endswith(".git"):
476+
base_url = git_url[:-4]
477+
zip_url = f"{base_url}/archive/refs/heads/{branch}.zip"
478+
return zip_url
479+
else:
480+
raise ValueError("Invalid .git URL provided")

0 commit comments

Comments
 (0)