Skip to content

Commit

Permalink
feat: Add ability to read CPE identifiers from CycloneDX triage data (i…
Browse files Browse the repository at this point in the history
…ntel#3990)

* feat: Prefer vendor from CPE before PURL

* feat: Support CPE 2.2 strings when decoding product information

* test: Improve SBOM manager test coverage

* test: Improved coverage of SBOM manager

* test: Enabled tests on test_bad_ext_ref_cyclonedx_file

* feat: Rolled back changes to decode PURL product name

* fix: Cleanup merge conflicts
  • Loading branch information
cinix committed Apr 8, 2024
1 parent 1fe4be7 commit 812e8de
Show file tree
Hide file tree
Showing 14 changed files with 597 additions and 42 deletions.
215 changes: 181 additions & 34 deletions cve_bin_tool/sbom_manager/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,15 @@

class SBOMManager:
"""
SBOMManager is a class that manages the Software Bill of Materials (SBOM) data.
Class: InputEngine
This class is responsible for parsing various SBOM file formats (SPDX, CycloneDX, SWID) in the CVE Bin Tool.
It provides methods for scanning SBOM files, parsing them, and retrieving vendor information.
Attributes:
- sbom_data (DefaultDict[ProductInfo, TriageData]): Dictionary containing parsed SBOM data.
"""

SBOMtype = ["spdx", "cyclonedx", "swid"]
Expand Down Expand Up @@ -95,6 +102,14 @@ def common_prefix_split(self, product, version) -> list[ProductInfo]:
return parsed_data

def scan_file(self) -> dict[ProductInfo, TriageData]:
"""
Parses the SBOM input file and returns the product information and
corresponding triage data.
Returns:
- dict[ProductInfo, TriageData]: Parsed SBOM data.
"""
self.logger.debug(
f"Processing SBOM {self.filename} of type {self.type.upper()}"
)
Expand All @@ -115,16 +130,18 @@ def scan_file(self) -> dict[ProductInfo, TriageData]:

# Now process list of modules to create [vendor, product, version] tuples
parsed_data: list[ProductInfo] = []
for m in modules:
if m and m[0]:
# Using lower to normalize product names across databases
product, version = m[0].lower(), m[1]
if version != "":
# Now add vendor to create product record....
vendor_set = self.get_vendor(product)
for vendor in vendor_set:
# if vendor is not None:
parsed_data.append(ProductInfo(vendor, product, version))
for module_vendor, product, version in modules:
# Using lower to normalize product names across databases
product = product.lower()

if module_vendor is None:
# Now add vendor to create product record....
vendor_set = self.get_vendor(product)
for vendor in vendor_set:
# if vendor is not None:
parsed_data.append(ProductInfo(vendor, product, version))
else:
parsed_data.append(ProductInfo(module_vendor, product, version))

for row in parsed_data:
self.sbom_data[row]["default"] = {
Expand All @@ -138,9 +155,22 @@ def scan_file(self) -> dict[ProductInfo, TriageData]:
return self.sbom_data

def get_vendor(self, product: str) -> list:
"""
Get the list of vendors for the product name.
There may be more than one vendor for a given product name and all
matches are returned.
Args:
- product (str): Product name.
Returns:
- list: The list of vendors for the product
"""
vendorlist: list[str] = []
vendor_package_pair = self.cvedb.get_vendor_product_pairs(product)
if vendor_package_pair != []:
if vendor_package_pair:
# To handle multiple vendors, return all combinations of product/vendor mappings
for v in vendor_package_pair:
vendor = v["vendor"]
Expand All @@ -149,13 +179,34 @@ def get_vendor(self, product: str) -> list:
vendorlist.append("UNKNOWN")
return vendorlist

def is_valid_purl(self, purl_string):
"""Returns true if give purl_string is a valid purl string"""
purl_pattern = r"^\w+://[\w\-.]+/[\w\-.]+(?:/[\w\-.]+)*(?:\?[\w\-.]+=[\w\-.]+(?:&[\w\-.]+=[\w\-.]+)*)?$"
def is_valid_purl(self, purl_string: str):
"""
Validate the PURL string is the correct form.
Args:
- purl_string (str): Package URL string
Returns:
- bool: True if the purl_string parameter is a valid purl string, False otherwise.
"""
purl_pattern = r"^(?P<scheme>.+):(?P<type>.+)/(?P<namespace>.+)/(?P<name>.+)@(?P<version>.+)\??(?P<qualifiers>.*)#?(?P<subpath>.*)$"
return re.match(purl_pattern, purl_string) is not None

def parse_sbom(self):
"""parse SBOM, using PURL identifiers preferentially if found"""
def parse_sbom(self) -> [(str, str, str)]:
"""
Parse the SBOM to extract a list of modules, including vendor, product, and version information.
The parsed product information can be retrieved from different components of the SBOM, with the following order of preference:
1. CPE 2.3 Identifiers
2. CPE 2.2 Identifiers
3. Package URLs (purl)
4. Name and Version from the SBOM (Vendor will be unspecified)
Returns:
- List[(str, str, str)]: A list of tuples, each containing vendor, product, and version information for a module.
"""

# Set up SBOM parser
sbom_parser = SBOMParser(sbom_type=self.type)
Expand All @@ -173,28 +224,124 @@ def parse_sbom(self):
packages = [x for x in sbom_parser.get_sbom()["packages"].values()]
LOGGER.debug(f"Parsed SBOM {self.filename} {packages}")
for package in packages:
purl_found = False
# If PURL record found, use this data in preference to package data
vendor = None
package_name = None
version = None

# If Package URL or CPE record found, use this data in preference to package data
ext_ref = package.get("externalreference")
if ext_ref is not None:
for ref in ext_ref:
if ref[1] == "purl":
if self.is_valid_purl(ref[2]):
# Process purl identifier
purl_info = PackageURL.from_string(ref[2]).to_dict()
if purl_info["name"] and purl_info["version"]:
modules.append(
[purl_info["name"], purl_info["version"]]
)
purl_found = True
if not purl_found:
if package.get("version") is not None:
modules.append([package["name"], package["version"]])
else:
LOGGER.debug(f"No version found in {package}")
vendor, package_name, version = self.parse_ext_ref(ext_ref=ext_ref)

# For any data not found in CPE or the Package URL get from package data
if not vendor:
pass # Because no vendor was detected then all vendors with this named package
# will be included in the output.

if not package_name:
package_name = package["name"]

if (not version) and (package.get("version") is not None):
version = package["version"]
else:
LOGGER.debug(f"No version found in {package}")

if version:
# Found at least package and version, save the results
modules.append([vendor, package_name, version])

LOGGER.debug(f"Parsed SBOM {self.filename} {modules}")
return modules

def parse_ext_ref(self, ext_ref) -> (str | None, str | None, str | None):
"""
Parse external references in an SBOM to extract module information.
Two passes are made through the external references, giving priority to CPE types,
which will always match the CVE database.
Args:
- ext_ref (List[List[str]]): List of lists representing external references.
Each inner list contains [category, type, locator].
Returns:
- Optional[Tuple[str | None, str | None, str | None]]: A tuple containing the vendor, product, and version
information extracted from the external references, or None if not found.
"""
decoded = {}
for ref in ext_ref:
if ref[1] == "cpe23Type":
decoded["cpe23Type"] = self.decode_cpe23(ref[2])

elif ref[1] == "cpe22Type":
decoded["cpe22Type"] = self.decode_cpe22(ref[2])

elif ref[1] == "purl":
decoded["purl"] = self.decode_purl(ref[2])

# No ext-ref matches, return none
return decoded.get(
"cpe23Type",
decoded.get("cpe22Type", decoded.get("purl", (None, None, None))),
)

def decode_cpe22(self, cpe22) -> (str | None, str | None, str | None):
"""
Decode a CPE 2.2 formatted string to extract vendor, product, and version information.
Args:
- cpe22 (str): CPE 2.2 formatted string.
Returns:
- Tuple[str | None, str | None, str | None]: A tuple containing the vendor, product, and version
information extracted from the CPE 2.2 string, or None if the information is incomplete.
"""
cpe = cpe22.split(":")
vendor, product, version = cpe[2], cpe[3], cpe[4]
# Return available data, convert empty fields to None
return [vendor or None, product or None, version or None]

def decode_cpe23(self, cpe23) -> (str | None, str | None, str | None):
"""
Decode a CPE 2.3 formatted string to extract vendor, product, and version information.
Args:
- cpe23 (str): CPE 2.3 formatted string.
Returns:
- Tuple[str | None, str | None, str | None]: A tuple containing the vendor, product, and version
information extracted from the CPE 2.3 string, or None if the information is incomplete.
"""
cpe = cpe23.split(":")
vendor, product, version = cpe[3], cpe[4], cpe[5]
# Return available data, convert empty fields to None
return [vendor or None, product or None, version or None]

def decode_purl(self, purl) -> (str | None, str | None, str | None):
"""
Decode a Package URL (purl) to extract version information.
Args:
- purl (str): Package URL (purl) string.
Returns:
- Tuple[str | None, str | None, str | None]: A tuple containing the vendor (which is always None for purl),
product, and version information extracted from the purl string, or None if the purl is invalid or incomplete.
"""
vendor = None # Because the vendor and product identifiers in the purl don't always align
product = None # with the CVE DB, only the version is parsed.
version = None
if self.is_valid_purl(purl):
# Process purl identifier
purl_info = PackageURL.from_string(purl).to_dict()
version = purl_info.get("version")

return [vendor or None, product or None, version or None]


if __name__ == "__main__":
import sys
Expand Down
2 changes: 1 addition & 1 deletion cve_bin_tool/sbom_manager/swid_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def extract(self, swid: str) -> list[str]:
# Format of swid is "URI: <vendor>-<product>-<version>"
item = swid[swid.find(":") + 1 :].split("-")
# As some version numbers have leading 'v', it is removed
return [item[1], item[2].upper().replace("V", "")]
return [item[0].strip(" "), item[1], item[2].upper().replace("V", "")]


if __name__ == "__main__":
Expand Down
88 changes: 88 additions & 0 deletions test/sbom/cyclonedx_bad_cpe22.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
{
"$schema": "http://cyclonedx.org/schema/bom-1.5.schema.json",
"bomFormat": "CycloneDX",
"specVersion": "1.5",
"serialNumber": "urn:uuid:80c1b198-5175-4fda-86c8-1cc725b6c532",
"version": 1,
"metadata": {
"timestamp": "2024-03-30T18:21:29Z",
"tools": {
"components": [
{
"name": "cve-bin-tool",
"version": "3.3rc2",
"type": "application"
}
]
},
"component": {
"type": "application",
"bom-ref": "CDXRef-DOCUMENT",
"name": "SBOM_CVEBINTOOL-product_1-0-0-66_all-deb"
}
},
"components": [
{
"type": "application",
"bom-ref": "1-CVEBINTOOL-product_1-0-0-66_all-deb",
"name": "CVEBINTOOL-product_1-0-0-66_all-deb"
},
{
"type": "library",
"bom-ref": "2-libjpeg",
"name": "libjpeg-novendor",
"version": "8b",
"supplier": {
"name": "ijg"
},
"cpe": "cpe:/a::libjpeg:8b"
},
{
"type": "library",
"bom-ref": "3-libexpat",
"name": "libexpat",
"version": "2.0.1",
"supplier": {
"name": "libexpat project"
},
"cpe": "cpe:/a:libexpat_project::2.0.1"
},
{
"type": "library",
"bom-ref": "4-ncurses",
"name": "ncurses-noversion",
"version": "5.9.noversion",
"supplier": {
"name": "gnu"
},
"cpe": "cpe:/a:gnu:ncurses:"
},
{
"type": "library",
"bom-ref": "5-zlib",
"name": "zlib",
"version": "1.2.3",
"supplier": {
"name": "zlib"
},
"cpe": "cpe:/a:zlib:zlib:1.2.3"
}
],
"dependencies": [
{
"ref": "CDXRef-DOCUMENT",
"dependsOn": [
"1-CVEBINTOOL-product_1-0-0-66_all-deb"
]
},
{
"ref": "1-CVEBINTOOL-product_1-0-0-66_all-deb",
"dependsOn": [
"2-libjpeg",
"3-libexpat",
"4-ncurses",
"5-zlib"
]
}
]
}
Loading

0 comments on commit 812e8de

Please sign in to comment.