Skip to content

Commit d9d7f21

Browse files
committed
Enhance code-commit collection capabilities for VCIO.
Update the model to include Patch and PackageCommitPatch. Signed-off-by: ziad hany <ziadhany2016@gmail.com>
1 parent b8ea2ad commit d9d7f21

File tree

7 files changed

+373
-27
lines changed

7 files changed

+373
-27
lines changed

vulnerabilities/importer.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,7 @@ class PackageCommitPatchData:
204204
vcs_url: str
205205
commit_hash: str
206206
patch_text: Optional[str] = None
207+
patch_url: Optional[str] = None
207208

208209
def __post_init__(self):
209210
if not self.commit_hash:
@@ -226,6 +227,7 @@ def _cmp_key(self):
226227
self.vcs_url,
227228
self.commit_hash,
228229
self.patch_text,
230+
self.patch_url,
229231
)
230232

231233
def to_dict(self) -> dict:
@@ -234,6 +236,7 @@ def to_dict(self) -> dict:
234236
"vcs_url": self.vcs_url,
235237
"commit_hash": self.commit_hash,
236238
"patch_text": self.patch_text,
239+
"patch_url": self.patch_url,
237240
}
238241

239242
@classmethod
@@ -243,6 +246,7 @@ def from_dict(cls, data: dict):
243246
vcs_url=data.get("vcs_url"),
244247
commit_hash=data.get("commit_hash"),
245248
patch_text=data.get("patch_text"),
249+
patch_url=data.get("patch_url"),
246250
)
247251

248252

@@ -251,13 +255,22 @@ def from_dict(cls, data: dict):
251255
class PatchData:
252256
patch_url: Optional[str] = None
253257
patch_text: Optional[str] = None
258+
patch_checksum: Optional[str] = dataclasses.field(init=False, default=None)
254259
vcs_url: Optional[str] = None
255260
commit_hash: Optional[str] = None
256-
patch_checksum: Optional[str] = None
257261

258262
def __post_init__(self):
259-
if not self.vcs_url and not self.patch_text and not self.patch_url:
260-
raise ValueError("A patch must include patch_url, patch_text, or vcs_url")
263+
if not (self.patch_url or self.patch_text or self.vcs_url):
264+
raise ValueError("A patch must include either patch_url or patch_text or vcs_url")
265+
266+
if self.vcs_url and not self.commit_hash:
267+
raise ValueError("If vcs_url is provided, commit_hash is required")
268+
269+
if self.commit_hash and not self.vcs_url:
270+
raise ValueError("commit_hash requires vcs_url")
271+
272+
if self.patch_text:
273+
self.patch_checksum = hashlib.sha512(self.patch_text.encode()).hexdigest()
261274

262275
def __lt__(self, other):
263276
if not isinstance(other, PatchData):
@@ -266,20 +279,21 @@ def __lt__(self, other):
266279

267280
def _cmp_key(self):
268281
return (
269-
self.vcs_url,
270-
self.commit_hash,
282+
self.patch_url,
271283
self.patch_text,
284+
self.commit_hash,
285+
self.vcs_url,
272286
self.patch_checksum,
273287
)
274288

275289
def to_dict(self) -> dict:
276290
"""Return a normalized dictionary representation of the commit."""
277291
return {
278292
"patch_url": self.patch_url,
279-
"vcs_url": self.vcs_url,
280-
"commit_hash": self.commit_hash,
281293
"patch_text": self.patch_text,
282294
"patch_checksum": self.patch_checksum,
295+
"vcs_url": self.vcs_url,
296+
"commit_hash": self.commit_hash,
283297
}
284298

285299
@classmethod
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
# Generated by Django 4.2.25 on 2025-12-01 04:27
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
("vulnerabilities", "0105_rename_codecommit_codepatch_and_more"),
10+
]
11+
12+
operations = [
13+
migrations.CreateModel(
14+
name="PackageCommitPatch",
15+
fields=[
16+
(
17+
"id",
18+
models.AutoField(
19+
auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
20+
),
21+
),
22+
(
23+
"commit_hash",
24+
models.CharField(
25+
help_text="The commit hash of the patch in the VCS.", max_length=128
26+
),
27+
),
28+
(
29+
"vcs_url",
30+
models.URLField(
31+
help_text="The Version Control System URL (e.g., git repo URL).",
32+
max_length=1024,
33+
),
34+
),
35+
("patch_url", models.TextField(blank=True, null=True)),
36+
("patch_text", models.TextField(blank=True, null=True)),
37+
("patch_checksum", models.CharField(blank=True, max_length=128, null=True)),
38+
],
39+
options={
40+
"unique_together": {("commit_hash", "vcs_url")},
41+
},
42+
),
43+
migrations.CreateModel(
44+
name="Patch",
45+
fields=[
46+
(
47+
"id",
48+
models.AutoField(
49+
auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
50+
),
51+
),
52+
(
53+
"commit_hash",
54+
models.CharField(
55+
blank=True,
56+
help_text="The commit hash of the patch in the VCS.",
57+
max_length=128,
58+
null=True,
59+
),
60+
),
61+
(
62+
"vcs_url",
63+
models.URLField(
64+
blank=True,
65+
help_text="The Version Control System URL (e.g., git repo URL).",
66+
max_length=1024,
67+
null=True,
68+
),
69+
),
70+
(
71+
"patch_url",
72+
models.URLField(
73+
blank=True, help_text="URL to the patch file or diff.", null=True
74+
),
75+
),
76+
(
77+
"patch_text",
78+
models.TextField(
79+
blank=True,
80+
help_text="The actual text content of the code patch/diff.",
81+
null=True,
82+
),
83+
),
84+
(
85+
"patch_checksum",
86+
models.CharField(
87+
help_text="SHA512 checksum of the patch content.", max_length=128
88+
),
89+
),
90+
],
91+
options={
92+
"unique_together": {("patch_checksum", "patch_url")},
93+
},
94+
),
95+
migrations.RemoveField(
96+
model_name="impactedpackage",
97+
name="fixed_by_commits",
98+
),
99+
migrations.RemoveField(
100+
model_name="impactedpackage",
101+
name="introduced_by_commits",
102+
),
103+
migrations.DeleteModel(
104+
name="CodePatch",
105+
),
106+
migrations.AddField(
107+
model_name="advisoryv2",
108+
name="patches",
109+
field=models.ManyToManyField(
110+
help_text="A list of patches associated with this advisory.",
111+
related_name="advisories",
112+
to="vulnerabilities.patch",
113+
),
114+
),
115+
migrations.AddField(
116+
model_name="impactedpackage",
117+
name="fixed_by_package_commit_patches",
118+
field=models.ManyToManyField(
119+
help_text="PackageCommitPatches that fix this impact.",
120+
related_name="fixed_in_impacts",
121+
to="vulnerabilities.packagecommitpatch",
122+
),
123+
),
124+
migrations.AddField(
125+
model_name="impactedpackage",
126+
name="introduced_by_package_commit_patches",
127+
field=models.ManyToManyField(
128+
help_text="PackageCommitPatches that introduce this impact.",
129+
related_name="introduced_in_impacts",
130+
to="vulnerabilities.packagecommitpatch",
131+
),
132+
),
133+
]

vulnerabilities/models.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2751,6 +2751,20 @@ class Patch(models.Model):
27512751
Used for raw text patches, fallback scenarios, or unsupported VCS types.
27522752
"""
27532753

2754+
commit_hash = models.CharField(
2755+
max_length=128,
2756+
blank=True,
2757+
null=True,
2758+
help_text="The commit hash of the patch in the VCS.",
2759+
)
2760+
2761+
vcs_url = models.URLField(
2762+
max_length=1024,
2763+
blank=True,
2764+
null=True,
2765+
help_text="The Version Control System URL (e.g., git repo URL).",
2766+
)
2767+
27542768
patch_url = models.URLField(
27552769
null=True,
27562770
blank=True,
@@ -2790,14 +2804,13 @@ class PackageCommitPatch(models.Model):
27902804
help_text="The Version Control System URL (e.g., git repo URL).",
27912805
)
27922806

2807+
patch_url = models.TextField(blank=True, null=True)
27932808
patch_text = models.TextField(blank=True, null=True)
27942809
patch_checksum = models.CharField(max_length=128, blank=True, null=True)
27952810

27962811
def save(self, *args, **kwargs):
27972812
if self.patch_text:
2798-
self.patch_checksum = hashlib.sha512(
2799-
self.patch_text.encode("utf-8")
2800-
).hexdigest()
2813+
self.patch_checksum = hashlib.sha512(self.patch_text.encode("utf-8")).hexdigest()
28012814
super().save(*args, **kwargs)
28022815

28032816
class Meta:

vulnerabilities/pipes/advisory.py

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ def get_or_create_advisory_package_commit_patches(
127127
commit_hash=c.commit_hash,
128128
vcs_url=c.vcs_url,
129129
patch_text=getattr(c, "patch_text", None),
130+
patch_url=getattr(c, "patch_url", None),
130131
)
131132
for c in commit_patches_data
132133
if (c.commit_hash, c.vcs_url) not in existing_pairs
@@ -158,6 +159,8 @@ def get_or_create_advisory_patches(
158159
Patch(
159160
patch_url=getattr(c, "patch_url", None),
160161
patch_text=getattr(c, "patch_checksum", None),
162+
commit_hash=getattr(c, "commit_hash", None),
163+
vcs_url=getattr(c, "vcs_url", None),
161164
)
162165
for c in base_patches_data
163166
if (c.patch_url, c.patch_checksum) not in existing_pairs
@@ -174,38 +177,48 @@ def get_or_create_advisory_patches(
174177

175178

176179
def classify_patch_source(vcs_url, commit_hash, patch_text, patch_url):
177-
""""""
180+
"""Classify a patch as a supported VCS commit or generic data using provided args."""
181+
purl = None
182+
183+
if patch_url:
184+
purl = url2purl(patch_url)
185+
186+
if not purl or (purl.type not in VCS_URLS_SUPPORTED_TYPES) or (not purl.version and vcs_url):
187+
purl = url2purl(vcs_url)
188+
189+
if not purl:
190+
return None, PatchData(
191+
patch_text=patch_text,
192+
patch_url=patch_url,
193+
)
178194

179-
purl = url2purl(patch_url)
180195
base_purl = get_core_purl(purl)
181196
purl_string = base_purl.to_string()
197+
182198
vcs_url_p = get_repo_url(purl_string)
183-
commit_hash_p = purl.version if purl else None
199+
commit_hash_p = purl.version
184200

185201
final_vcs_url = vcs_url or vcs_url_p
186202
final_commit_hash = commit_hash or commit_hash_p
187203

188-
if not final_vcs_url:
204+
if not final_vcs_url or not final_commit_hash:
189205
return None, PatchData(
190-
patch_text=patch_text,
191206
patch_url=patch_url,
207+
patch_text=patch_text,
192208
)
193209

194-
if (
195-
purl
196-
and purl.type in VCS_URLS_SUPPORTED_TYPES
197-
and final_vcs_url
198-
and final_commit_hash
199-
and is_commit(final_commit_hash)
200-
):
210+
if purl.type in VCS_URLS_SUPPORTED_TYPES and final_commit_hash and is_commit(final_commit_hash):
201211
return base_purl, PackageCommitPatchData(
202-
vcs_url=final_vcs_url, commit_hash=final_commit_hash, patch_text=patch_text
212+
vcs_url=final_vcs_url,
213+
commit_hash=final_commit_hash,
214+
patch_url=patch_url,
215+
patch_text=patch_text,
203216
)
204217

205218
return None, PatchData(
206-
patch_url=patch_url,
207219
vcs_url=final_vcs_url,
208220
commit_hash=final_commit_hash,
221+
patch_url=patch_url,
209222
patch_text=patch_text,
210223
)
211224

vulnerabilities/tests/pipes/test_vulnerablecode_importer_pipeline_v2.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,6 @@ def dummy_advisory():
8787
],
8888
patches=[
8989
PatchData(
90-
vcs_url="",
91-
commit_hash="",
9290
patch_text="patch_text",
9391
patch_url="example.com/1.patch",
9492
)

vulnerabilities/tests/test_data/aosp/CVE-aosp_test3-expected.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@
2020
{
2121
"vcs_url": "https://github.com/torvalds/linux",
2222
"commit_hash": "0048b4837affd153897ed1222283492070027aa9",
23-
"patch_text": null
23+
"patch_text": null,
24+
"patch_url": "https://github.com/torvalds/linux/commit/0048b4837affd153897ed1222283492070027aa9"
2425
}
2526
]
2627
}

0 commit comments

Comments
 (0)