Skip to content

Commit 520531d

Browse files
committed
Enhance code commit collection capabilities #2022
Signed-off-by: ziad hany <ziadhany2016@gmail.com>
1 parent be89117 commit 520531d

28 files changed

+1512
-157
lines changed

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ MarkupSafe==2.1.1
6464
matplotlib-inline==0.1.3
6565
multidict==6.0.2
6666
mypy-extensions==0.4.3
67-
packageurl-python==0.15.6
67+
packageurl-python==0.17.6
6868
packaging==21.3
6969
paramiko==3.4.0
7070
parso==0.8.3

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ install_requires =
7171
drf-spectacular[sidecar]>=0.24.2
7272

7373
#essentials
74-
packageurl-python>=0.15
74+
packageurl-python>=0.17
7575
univers>=30.12.0
7676
license-expression>=30.0.0
7777

vulnerabilities/importer.py

Lines changed: 129 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import dataclasses
1111
import datetime
1212
import functools
13+
import hashlib
1314
import logging
1415
import traceback
1516
import xml.etree.ElementTree as ET
@@ -37,6 +38,7 @@
3738
from vulnerabilities.severity_systems import ScoringSystem
3839
from vulnerabilities.utils import classproperty
3940
from vulnerabilities.utils import get_reference_id
41+
from vulnerabilities.utils import is_commit
4042
from vulnerabilities.utils import is_cve
4143
from vulnerabilities.utils import nearest_patched_package
4244
from vulnerabilities.utils import purl_to_dict
@@ -194,6 +196,97 @@ def from_url(cls, url):
194196
return cls(url=url)
195197

196198

199+
@dataclasses.dataclass(eq=True)
200+
@functools.total_ordering
201+
class PackageCommitPatchData:
202+
vcs_url: str
203+
commit_hash: str
204+
patch_text: Optional[str] = None
205+
206+
def __post_init__(self):
207+
if not self.commit_hash:
208+
raise ValueError("Commit must have a non-empty commit_hash.")
209+
210+
if not is_commit(self.commit_hash):
211+
raise ValueError(f"Commit must be a valid a commit_hash: {self.commit_hash}.")
212+
213+
if not self.vcs_url:
214+
raise ValueError("Commit must have a non-empty vcs_url.")
215+
216+
def __lt__(self, other):
217+
if not isinstance(other, PackageCommitPatchData):
218+
return NotImplemented
219+
return self._cmp_key() < other._cmp_key()
220+
221+
# TODO: Add cache
222+
def _cmp_key(self):
223+
return (
224+
self.vcs_url,
225+
self.commit_hash,
226+
self.patch_text,
227+
)
228+
229+
def to_dict(self) -> dict:
230+
"""Return a normalized dictionary representation of the commit."""
231+
return {
232+
"vcs_url": self.vcs_url,
233+
"commit_hash": self.commit_hash,
234+
"patch_text": self.patch_text,
235+
}
236+
237+
@classmethod
238+
def from_dict(cls, data: dict):
239+
"""Create a PackageCommitPatchData instance from a dictionary."""
240+
return cls(
241+
vcs_url=data.get("vcs_url"),
242+
commit_hash=data.get("commit_hash"),
243+
patch_text=data.get("patch_text"),
244+
)
245+
246+
247+
@dataclasses.dataclass(eq=True)
248+
@functools.total_ordering
249+
class PatchData:
250+
patch_url: Optional[str] = None
251+
patch_text: Optional[str] = None
252+
patch_checksum: Optional[str] = dataclasses.field(init=False, default=None)
253+
254+
def __post_init__(self):
255+
if not self.patch_url and not self.patch_text:
256+
raise ValueError("A patch must include either patch_url or patch_text")
257+
258+
if self.patch_text:
259+
self.patch_checksum = hashlib.sha512(self.patch_text.encode()).hexdigest()
260+
261+
def __lt__(self, other):
262+
if not isinstance(other, PatchData):
263+
return NotImplemented
264+
return self._cmp_key() < other._cmp_key()
265+
266+
def _cmp_key(self):
267+
return (
268+
self.patch_url,
269+
self.patch_text,
270+
self.patch_checksum,
271+
)
272+
273+
def to_dict(self) -> dict:
274+
"""Return a normalized dictionary representation of the commit."""
275+
return {
276+
"patch_url": self.patch_url,
277+
"patch_text": self.patch_text,
278+
"patch_checksum": self.patch_checksum,
279+
}
280+
281+
@classmethod
282+
def from_dict(cls, data: dict):
283+
"""Create a PatchData instance from a dictionary."""
284+
return cls(
285+
patch_url=data.get("patch_url"),
286+
patch_text=data.get("patch_text"),
287+
)
288+
289+
197290
class UnMergeablePackageError(Exception):
198291
"""
199292
Raised when a package cannot be merged with another one.
@@ -344,21 +437,30 @@ class AffectedPackageV2:
344437
"""
345438
Relate a Package URL with a range of affected versions and fixed versions.
346439
The Package URL must *not* have a version.
347-
AffectedPackage must contain either ``affected_version_range`` or ``fixed_version_range``.
440+
AffectedPackage must contain either ``affected_version_range`` or ``fixed_version_range`` or ``introduced_by_commits`` or ``fixed_by_commits``.
348441
"""
349442

350443
package: PackageURL
351444
affected_version_range: Optional[VersionRange] = None
352445
fixed_version_range: Optional[VersionRange] = None
446+
introduced_by_commit_patches: List[PackageCommitPatchData] = dataclasses.field(
447+
default_factory=list
448+
)
449+
fixed_by_commit_patches: List[PackageCommitPatchData] = dataclasses.field(default_factory=list)
353450

354451
def __post_init__(self):
355452
if self.package.version:
356453
raise ValueError(f"Affected Package URL {self.package!r} cannot have a version.")
357454

358-
if not (self.affected_version_range or self.fixed_version_range):
455+
if not (
456+
self.affected_version_range
457+
or self.fixed_version_range
458+
or self.introduced_by_commit_patches
459+
or self.fixed_by_commit_patches
460+
):
359461
raise ValueError(
360-
f"Affected Package {self.package!r} should have either fixed version range or an "
361-
"affected version range."
462+
f"Affected package {self.package!r} must have either a fixed version range, "
463+
"an affected version range, introduced commit patches, or fixed commit patches."
362464
)
363465

364466
def __lt__(self, other):
@@ -372,6 +474,8 @@ def _cmp_key(self):
372474
str(self.package),
373475
str(self.affected_version_range or ""),
374476
str(self.fixed_version_range or ""),
477+
str(self.introduced_by_commit_patches or []),
478+
str(self.fixed_by_commit_patches or []),
375479
)
376480

377481
def to_dict(self):
@@ -385,6 +489,12 @@ def to_dict(self):
385489
"package": purl_to_dict(self.package),
386490
"affected_version_range": affected_version_range,
387491
"fixed_version_range": fixed_version_range,
492+
"introduced_by_commit_patches": [
493+
commit.to_dict() for commit in self.introduced_by_commit_patches
494+
],
495+
"fixed_by_commit_patches": [
496+
commit.to_dict() for commit in self.fixed_by_commit_patches
497+
],
388498
}
389499

390500
@classmethod
@@ -396,6 +506,10 @@ def from_dict(cls, affected_pkg: dict):
396506
fixed_version_range = None
397507
affected_range = affected_pkg["affected_version_range"]
398508
fixed_range = affected_pkg["fixed_version_range"]
509+
introduced_by_commit_patches = (
510+
affected_pkg.get("introduced_by_package_commit_patches") or []
511+
)
512+
fixed_by_commit_patches = affected_pkg.get("fixed_by_package_commit_patches") or []
399513

400514
try:
401515
affected_version_range = VersionRange.from_string(affected_range)
@@ -417,6 +531,12 @@ def from_dict(cls, affected_pkg: dict):
417531
package=package,
418532
affected_version_range=affected_version_range,
419533
fixed_version_range=fixed_version_range,
534+
introduced_by_commit_patches=[
535+
PackageCommitPatchData.from_dict(commit) for commit in introduced_by_commit_patches
536+
],
537+
fixed_by_commit_patches=[
538+
PackageCommitPatchData.from_dict(commit) for commit in fixed_by_commit_patches
539+
],
420540
)
421541

422542

@@ -441,6 +561,7 @@ class AdvisoryData:
441561
)
442562
references: List[Reference] = dataclasses.field(default_factory=list)
443563
references_v2: List[ReferenceV2] = dataclasses.field(default_factory=list)
564+
patches: List[PatchData] = dataclasses.field(default_factory=list)
444565
date_published: Optional[datetime.datetime] = None
445566
weaknesses: List[int] = dataclasses.field(default_factory=list)
446567
severities: List[VulnerabilitySeverity] = dataclasses.field(default_factory=list)
@@ -473,6 +594,7 @@ def to_dict(self):
473594
"summary": self.summary,
474595
"affected_packages": [pkg.to_dict() for pkg in self.affected_packages],
475596
"references_v2": [ref.to_dict() for ref in self.references_v2],
597+
"patches": [patch.to_dict() for patch in self.patches],
476598
"severities": [sev.to_dict() for sev in self.severities],
477599
"date_published": self.date_published.isoformat() if self.date_published else None,
478600
"weaknesses": self.weaknesses,
@@ -533,6 +655,7 @@ class AdvisoryDataV2:
533655
summary: Optional[str] = ""
534656
affected_packages: List[AffectedPackage] = dataclasses.field(default_factory=list)
535657
references: List[ReferenceV2] = dataclasses.field(default_factory=list)
658+
patches: List[PatchData] = dataclasses.field(default_factory=list)
536659
date_published: Optional[datetime.datetime] = None
537660
weaknesses: List[int] = dataclasses.field(default_factory=list)
538661
url: Optional[str] = None
@@ -557,6 +680,7 @@ def to_dict(self):
557680
"summary": self.summary,
558681
"affected_packages": [pkg.to_dict() for pkg in self.affected_packages],
559682
"references": [ref.to_dict() for ref in self.references],
683+
"patches": [ref.to_dict() for ref in self.patches],
560684
"date_published": self.date_published.isoformat() if self.date_published else None,
561685
"weaknesses": self.weaknesses,
562686
"url": self.url if self.url else "",
@@ -574,6 +698,7 @@ def from_dict(cls, advisory_data):
574698
if pkg is not None
575699
],
576700
"references": [Reference.from_dict(ref) for ref in advisory_data["references"]],
701+
"patches": [PatchData.from_dict(ref) for ref in advisory_data["patches"]],
577702
"date_published": datetime.datetime.fromisoformat(date_published)
578703
if date_published
579704
else None,

vulnerabilities/importers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
from vulnerabilities.pipelines import nvd_importer
4242
from vulnerabilities.pipelines import pypa_importer
4343
from vulnerabilities.pipelines import pysec_importer
44+
from vulnerabilities.pipelines.v2_importers import aosp_importer as aosp_importer_v2
4445
from vulnerabilities.pipelines.v2_importers import apache_httpd_importer as apache_httpd_v2
4546
from vulnerabilities.pipelines.v2_importers import archlinux_importer as archlinux_importer_v2
4647
from vulnerabilities.pipelines.v2_importers import curl_importer as curl_importer_v2
@@ -81,6 +82,7 @@
8182
mozilla_importer_v2.MozillaImporterPipeline,
8283
github_osv_importer_v2.GithubOSVImporterPipeline,
8384
redhat_importer_v2.RedHatImporterPipeline,
85+
aosp_importer_v2.AospImporterPipeline,
8486
nvd_importer.NVDImporterPipeline,
8587
github_importer.GitHubAPIImporterPipeline,
8688
gitlab_importer.GitLabImporterPipeline,

vulnerabilities/importers/curl.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def parse_advisory_data(raw_data) -> AdvisoryData:
9797
... ]
9898
... }
9999
>>> parse_advisory_data(raw_data)
100-
AdvisoryData(advisory_id='', aliases=['CVE-2024-2379'], summary='QUIC certificate check bypass with wolfSSL', affected_packages=[AffectedPackage(package=PackageURL(type='generic', namespace='curl.se', name='curl', version=None, qualifiers={}, subpath=None), affected_version_range=GenericVersionRange(constraints=(VersionConstraint(comparator='=', version=SemverVersion(string='8.6.0')),)), fixed_version=SemverVersion(string='8.7.0'))], references=[Reference(reference_id='', reference_type='', url='https://curl.se/docs/CVE-2024-2379.html', severities=[VulnerabilitySeverity(system=Cvssv3ScoringSystem(identifier='cvssv3.1', name='CVSSv3.1 Base Score', url='https://www.first.org/cvss/v3-1/', notes='CVSSv3.1 base score and vector'), value='Low', scoring_elements='', published_at=None, url=None)]), Reference(reference_id='', reference_type='', url='https://hackerone.com/reports/2410774', severities=[])], references_v2=[], date_published=datetime.datetime(2024, 3, 27, 8, 0, tzinfo=datetime.timezone.utc), weaknesses=[297], severities=[], url='https://curl.se/docs/CVE-2024-2379.json', original_advisory_text=None)
100+
AdvisoryData(advisory_id='', aliases=['CVE-2024-2379'], summary='QUIC certificate check bypass with wolfSSL', affected_packages=[AffectedPackage(package=PackageURL(type='generic', namespace='curl.se', name='curl', version=None, qualifiers={}, subpath=None), affected_version_range=GenericVersionRange(constraints=(VersionConstraint(comparator='=', version=SemverVersion(string='8.6.0')),)), fixed_version=SemverVersion(string='8.7.0'))], references=[Reference(reference_id='', reference_type='', url='https://curl.se/docs/CVE-2024-2379.html', severities=[VulnerabilitySeverity(system=Cvssv3ScoringSystem(identifier='cvssv3.1', name='CVSSv3.1 Base Score', url='https://www.first.org/cvss/v3-1/', notes='CVSSv3.1 base score and vector'), value='Low', scoring_elements='', published_at=None, url=None)]), Reference(reference_id='', reference_type='', url='https://hackerone.com/reports/2410774', severities=[])], references_v2=[], patches=[], date_published=datetime.datetime(2024, 3, 27, 8, 0, tzinfo=datetime.timezone.utc), weaknesses=[297], severities=[], url='https://curl.se/docs/CVE-2024-2379.json', original_advisory_text=None)
101101
"""
102102

103103
affected = get_item(raw_data, "affected")[0] if len(get_item(raw_data, "affected")) > 0 else []
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
# Generated by Django 4.2.25 on 2025-12-01 14:42
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
("vulnerabilities", "0103_codecommit_impactedpackage_affecting_commits_and_more"),
10+
]
11+
12+
operations = [
13+
migrations.CreateModel(
14+
name="PackageCommitPatch",
15+
fields=[
16+
(
17+
"id",
18+
models.AutoField(
19+
auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
20+
),
21+
),
22+
(
23+
"commit_hash",
24+
models.CharField(
25+
help_text="The commit hash of the patch in the VCS.", max_length=128
26+
),
27+
),
28+
(
29+
"vcs_url",
30+
models.URLField(
31+
help_text="The Version Control System URL (e.g., git repo URL).",
32+
max_length=1024,
33+
),
34+
),
35+
("patch_text", models.TextField(blank=True, null=True)),
36+
("patch_checksum", models.CharField(blank=True, max_length=128, null=True)),
37+
],
38+
options={
39+
"unique_together": {("commit_hash", "vcs_url")},
40+
},
41+
),
42+
migrations.CreateModel(
43+
name="Patch",
44+
fields=[
45+
(
46+
"id",
47+
models.AutoField(
48+
auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
49+
),
50+
),
51+
(
52+
"patch_url",
53+
models.URLField(
54+
blank=True, help_text="URL to the patch file or diff.", null=True
55+
),
56+
),
57+
(
58+
"patch_text",
59+
models.TextField(
60+
blank=True,
61+
help_text="The actual text content of the code patch/diff.",
62+
null=True,
63+
),
64+
),
65+
(
66+
"patch_checksum",
67+
models.CharField(
68+
help_text="SHA512 checksum of the patch content.", max_length=128
69+
),
70+
),
71+
],
72+
options={
73+
"unique_together": {("patch_checksum", "patch_url")},
74+
},
75+
),
76+
migrations.RemoveField(
77+
model_name="impactedpackage",
78+
name="affecting_commits",
79+
),
80+
migrations.RemoveField(
81+
model_name="impactedpackage",
82+
name="fixed_by_commits",
83+
),
84+
migrations.DeleteModel(
85+
name="CodeCommit",
86+
),
87+
migrations.AddField(
88+
model_name="advisoryv2",
89+
name="patches",
90+
field=models.ManyToManyField(
91+
help_text="A list of patches associated with this advisory.",
92+
related_name="advisories",
93+
to="vulnerabilities.patch",
94+
),
95+
),
96+
migrations.AddField(
97+
model_name="impactedpackage",
98+
name="fixed_by_package_commit_patches",
99+
field=models.ManyToManyField(
100+
help_text="PackageCommitPatches that fix this impact.",
101+
related_name="fixed_in_impacts",
102+
to="vulnerabilities.packagecommitpatch",
103+
),
104+
),
105+
migrations.AddField(
106+
model_name="impactedpackage",
107+
name="introduced_by_package_commit_patches",
108+
field=models.ManyToManyField(
109+
help_text="PackageCommitPatches that introduce this impact.",
110+
related_name="introduced_in_impacts",
111+
to="vulnerabilities.packagecommitpatch",
112+
),
113+
),
114+
]

0 commit comments

Comments
 (0)