Skip to content

Commit

Permalink
fix(sbom): deduplicate SBOM packages by ID (#1366)
Browse files Browse the repository at this point in the history
In chainguard-dev/melange#1474, we updated our
SBOMs to have distinct SPDX packages for the APK itself, vs. the build
config used to produce it, vs. the upstream source(s) pulled in for the
build. This helps us produce more idiomatic and descriptive SPDX data.

But this caused a problem downstream in apko — when we aggregate APKs'
SBOMs, and specifically their SPDX packages, into a single list of SPDX
packages for the image, we end up with duplicate package IDs when
multiple APKs were build from the same build config or the same upstream
source.

This PR adds tests to catch this case, and it fixes the duplicate
package issue by dropping subsequent instances of a given package ID.

It also further improves on the existing SBOM-related tests.

Signed-off-by: Dan Luhring <dluhring@chainguard.dev>
  • Loading branch information
luhring authored Oct 24, 2024
1 parent 04b5ca4 commit 395e822
Show file tree
Hide file tree
Showing 12 changed files with 434 additions and 53 deletions.
39 changes: 28 additions & 11 deletions pkg/sbom/generator/spdx/spdx.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"time"
"unicode/utf8"

"github.com/charmbracelet/log"
purl "github.com/package-url/packageurl-go"
"sigs.k8s.io/release-utils/version"

Expand Down Expand Up @@ -137,6 +138,18 @@ func (sx *SPDX) Generate(opts *options.Options, path string) error {
}
}

dedupedPackages := make([]Package, 0, len(doc.Packages))
seenIDs := make(map[string]struct{})
for i := range doc.Packages {
if _, ok := seenIDs[doc.Packages[i].ID]; !ok {
seenIDs[doc.Packages[i].ID] = struct{}{}
dedupedPackages = append(dedupedPackages, doc.Packages[i])
} else {
log.Info("duplicate package ID found in SBOM, deduplicating package...", "ID", doc.Packages[i].ID)
}
}
doc.Packages = dedupedPackages

if err := renderDoc(doc, path); err != nil {
return fmt.Errorf("rendering document: %w", err)
}
Expand Down Expand Up @@ -178,7 +191,9 @@ func replacePackage(doc *Document, originalID, newID string) {
}
}

// locateApkSBOM returns the SBOM
// locateApkSBOM returns the path to the SBOM in the given filesystem, using the
// given Package's name and version. It returns an empty string if the SBOM is
// not found.
func locateApkSBOM(fsys apkfs.FullFS, p *Package) (string, error) {
re := regexp.MustCompile(`-r\d+$`)
for _, s := range []string{
Expand Down Expand Up @@ -209,51 +224,53 @@ func (sx *SPDX) ProcessInternalApkSBOM(opts *options.Options, doc *Document, p *
return fmt.Errorf("inspecting FS for internal apk SBOM: %w", err)
}
if path == "" {
// The SBOM does not exist.
// (So just ignore that the package was specified to the SPDX Generate method?)
return nil
}

internalDoc, err := sx.ParseInternalSBOM(opts, path)
apkSBOMDoc, err := sx.ParseInternalSBOM(opts, path)
if err != nil {
// TODO: Log error parsing apk SBOM
return nil
}

// Cycle the top level elements...
elementIDs := map[string]struct{}{}
for _, elementID := range internalDoc.DocumentDescribes {
elementIDs[elementID] = struct{}{}
idsDescribedByAPKSBOM := map[string]struct{}{}
for _, elementID := range apkSBOMDoc.DocumentDescribes {
idsDescribedByAPKSBOM[elementID] = struct{}{}
}

// ... searching for a 1st level package
targetElementIDs := map[string]struct{}{}
for _, pkg := range internalDoc.Packages {
for _, pkg := range apkSBOMDoc.Packages {
// that matches the name
if p.Name != pkg.Name {
continue
}

if _, ok := elementIDs[pkg.ID]; !ok {
if _, ok := idsDescribedByAPKSBOM[pkg.ID]; !ok {
continue
}

targetElementIDs[pkg.ID] = struct{}{}
if len(targetElementIDs) == len(elementIDs) {
if len(targetElementIDs) == len(idsDescribedByAPKSBOM) {
// Exit early if we found them all.
break
}
}

// Copy the targetElementIDs
todo := make(map[string]struct{}, len(internalDoc.Relationships))
todo := make(map[string]struct{}, len(apkSBOMDoc.Relationships))
for id := range targetElementIDs {
todo[id] = struct{}{}
}

if err := copySBOMElements(internalDoc, doc, todo); err != nil {
if err := copySBOMElements(apkSBOMDoc, doc, todo); err != nil {
return fmt.Errorf("copying element: %w", err)
}

if err := mergeLicensingInfos(internalDoc, doc); err != nil {
if err := mergeLicensingInfos(apkSBOMDoc, doc); err != nil {
return fmt.Errorf("merging LicensingInfos: %w", err)
}

Expand Down
116 changes: 78 additions & 38 deletions pkg/sbom/generator/spdx/spdx_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
package spdx

import (
"encoding/json"
"fmt"
"os"
"path"
Expand Down Expand Up @@ -58,13 +59,24 @@ var testOpts = &options.Options{
},
}

// TODO: clean this up and make consistent with the other test cases
func TestGenerate(t *testing.T) {
dir := t.TempDir()
fsys := apkfs.NewMemFS()
sx := New(fsys)
path := filepath.Join(dir, testOpts.FileName+"."+sx.Ext())
err := sx.Generate(testOpts, path)
require.NoError(t, err)
require.FileExists(t, path)
}

func TestSPDX_Generate(t *testing.T) {
tests := []struct {
name string
opts *options.Options
}{
{
name: "custom license",
name: "custom-license",
opts: &options.Options{
OS: options.OSInfo{
Name: "unknown",
Expand All @@ -75,19 +87,15 @@ func TestSPDX_Generate(t *testing.T) {
Packages: []*apk.InstalledPackage{
{
Package: apk.Package{
Name: "font-ubuntu",
Version: "0.869-r1",
Arch: "x86_64",
Description: "Ubuntu font family",
License: "LicenseRef-ubuntu-font",
Origin: "font-ubuntu",
Name: "font-ubuntu",
Version: "0.869-r1",
},
},
},
},
},
{
name: "no supplier",
name: "no-supplier",
opts: &options.Options{
OS: options.OSInfo{
Name: "Apko Images, Plc",
Expand All @@ -98,12 +106,33 @@ func TestSPDX_Generate(t *testing.T) {
Packages: []*apk.InstalledPackage{
{
Package: apk.Package{
Name: "libattr1",
Version: "2.5.1-r2",
Arch: "x86_64",
Description: "library for managing filesystem extended attributes",
License: "GPL-2.0-or-later",
Origin: "attr",
Name: "libattr1",
Version: "2.5.1-r2",
},
},
},
},
},
{
name: "package-deduplicating",
opts: &options.Options{
OS: options.OSInfo{
Name: "unknown",
ID: "unknown",
Version: "3.0",
},
FileName: "sbom",
Packages: []*apk.InstalledPackage{
{
Package: apk.Package{
Name: "logstash-8",
Version: "8.15.3-r4",
},
},
{
Package: apk.Package{
Name: "logstash-8-compat",
Version: "8.15.3-r4",
},
},
},
Expand All @@ -113,49 +142,60 @@ func TestSPDX_Generate(t *testing.T) {

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
pkgName := tt.opts.Packages[0].Name
apkSBOMPath := filepath.Join("testdata", "apk_sboms", fmt.Sprintf("%s.spdx.json", pkgName))
apkSBOMBytes, err := os.ReadFile(apkSBOMPath)
require.NoError(t, err)

fsys := apkfs.NewMemFS()
sbomDir := path.Join("var", "lib", "db", "sbom")
err = fsys.MkdirAll(sbomDir, 0750)
err := fsys.MkdirAll(sbomDir, 0750)
require.NoError(t, err)

sbomDestPath := path.Join(sbomDir, fmt.Sprintf("%s.spdx.json", pkgName))
err = fsys.WriteFile(sbomDestPath, apkSBOMBytes, 0644)
require.NoError(t, err)
for _, apkPkg := range tt.opts.Packages {
apkSBOMName := fmt.Sprintf("%s-%s.spdx.json", apkPkg.Name, apkPkg.Version)
apkSBOMTestdataPath := filepath.Join("testdata", "apk_sboms", apkSBOMName)
apkSBOMBytes, err := os.ReadFile(apkSBOMTestdataPath)
require.NoError(t, err)

sbomDestPath := path.Join(sbomDir, apkSBOMName)
err = fsys.WriteFile(sbomDestPath, apkSBOMBytes, 0644)
require.NoError(t, err)
}

sx := New(fsys)
imageSBOMDestPath := filepath.Join(t.TempDir(), pkgName+"."+sx.Ext())
imageSBOMName := fmt.Sprintf("%s.spdx.json", tt.name)
imageSBOMDestPath := filepath.Join(t.TempDir(), imageSBOMName)
err = sx.Generate(tt.opts, imageSBOMDestPath)
require.NoError(t, err)

actual, err := os.ReadFile(imageSBOMDestPath)
require.NoError(t, err)

expectedImageSBOMPath := filepath.Join("testdata", "expected_image_sboms", fmt.Sprintf("%s.spdx.json", pkgName))
expectedImageSBOMPath := filepath.Join("testdata", "expected_image_sboms", imageSBOMName)
expected, err := os.ReadFile(expectedImageSBOMPath)
require.NoError(t, err)

if diff := cmp.Diff(expected, actual); diff != "" {
t.Errorf("Unexpected image SBOM (-want, +got): \n%s", diff)
}
t.Run("goldenfile diff", func(t *testing.T) {
if diff := cmp.Diff(expected, actual); diff != "" {
t.Errorf("Unexpected image SBOM (-want, +got): \n%s", diff)
}
})

t.Run("unique SPDX IDs", func(t *testing.T) {
doc := new(Document)
err := json.Unmarshal(actual, doc)
if err != nil {
t.Fatalf("unmarshalling SBOM: %v", err)
}

ids := make(map[string]struct{})
for _, p := range doc.Packages {
if _, ok := ids[p.ID]; ok {
t.Errorf("duplicate SPDX ID found: %s", p.ID)
}
ids[p.ID] = struct{}{}
}
})
})
}
}

func TestGenerate(t *testing.T) {
dir := t.TempDir()
fsys := apkfs.NewMemFS()
sx := New(fsys)
path := filepath.Join(dir, testOpts.FileName+"."+sx.Ext())
err := sx.Generate(testOpts, path)
require.NoError(t, err)
require.FileExists(t, path)
}

func TestReproducible(t *testing.T) {
// Create two sboms based on the same input and ensure
// they are identical
Expand Down
21 changes: 21 additions & 0 deletions pkg/sbom/generator/spdx/testdata/apk_sboms/_generate.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/sh

set -euo pipefail

# Define an array of package name-version strings
packages=(
"font-ubuntu-0.869-r1"
"libattr1-2.5.1-r2"
"logstash-8-8.15.3-r4"
"logstash-8-compat-8.15.3-r4"
)

# Base URL for downloading APKs
base_url="https://packages.wolfi.dev/os/x86_64"

# Loop through the array and process each package
for pkg in "${packages[@]}"; do
url="${base_url}/${pkg}.apk"
output_path="${pkg}.spdx.json"
curl -q "$url" | tar Ozx "var/lib/db/sbom/${pkg}.spdx.json" >"$output_path" 2>/dev/null
done
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
{
"SPDXID": "SPDXRef-DOCUMENT",
"name": "apk-logstash-8-8.15.3-r4",
"spdxVersion": "SPDX-2.3",
"creationInfo": {
"created": "2024-10-24T15:16:49Z",
"creators": [
"Tool: melange (v0.14.7)",
"Organization: Chainguard, Inc"
],
"licenseListVersion": "3.22"
},
"dataLicense": "CC0-1.0",
"documentNamespace": "https://spdx.org/spdxdocs/chainguard/melange/0434ffabb9f0343f5dada78abdb2e419b2be48fb",
"documentDescribes": [
"SPDXRef-Package-logstash-8-8.15.3-r4"
],
"packages": [
{
"SPDXID": "SPDXRef-Package-logstash-8-8.15.3-r4",
"name": "logstash-8",
"versionInfo": "8.15.3-r4",
"filesAnalyzed": false,
"licenseConcluded": "NOASSERTION",
"licenseDeclared": "Apache-2.0",
"downloadLocation": "NOASSERTION",
"originator": "Organization: Wolfi",
"supplier": "Organization: Wolfi",
"copyrightText": "\n",
"externalRefs": [
{
"referenceCategory": "PACKAGE-MANAGER",
"referenceLocator": "pkg:apk/wolfi/logstash-8@8.15.3-r4?arch=x86_64",
"referenceType": "purl"
}
]
},
{
"SPDXID": "SPDXRef-Package-logstash-8.yaml-c7d40faa38ddffa98700cfa4c2f9bde196acc504",
"name": "logstash-8.yaml",
"versionInfo": "c7d40faa38ddffa98700cfa4c2f9bde196acc504",
"filesAnalyzed": false,
"licenseConcluded": "NOASSERTION",
"licenseDeclared": "Apache-2.0",
"downloadLocation": "NOASSERTION",
"originator": "Organization: Wolfi",
"supplier": "Organization: Wolfi",
"externalRefs": [
{
"referenceCategory": "PACKAGE-MANAGER",
"referenceLocator": "pkg:github/wolfi-dev/os@c7d40faa38ddffa98700cfa4c2f9bde196acc504#logstash-8.yaml",
"referenceType": "purl"
}
]
},
{
"SPDXID": "SPDXRef-Package-github.com-elastic-logstash-v8.15.3-8364c8e89cfb113e38ec3f966df7eb1e9abe9d33-0",
"name": "logstash",
"versionInfo": "v8.15.3",
"filesAnalyzed": false,
"licenseConcluded": "NOASSERTION",
"licenseDeclared": "Apache-2.0",
"downloadLocation": "NOASSERTION",
"originator": "Organization: Elastic",
"supplier": "Organization: Elastic",
"externalRefs": [
{
"referenceCategory": "PACKAGE-MANAGER",
"referenceLocator": "pkg:github/elastic/logstash@v8.15.3",
"referenceType": "purl"
}
]
}
],
"relationships": [
{
"spdxElementId": "SPDXRef-Package-logstash-8-8.15.3-r4",
"relationshipType": "DESCRIBED_BY",
"relatedSpdxElement": "SPDXRef-Package-logstash-8.yaml-c7d40faa38ddffa98700cfa4c2f9bde196acc504"
},
{
"spdxElementId": "SPDXRef-Package-logstash-8-8.15.3-r4",
"relationshipType": "GENERATED_FROM",
"relatedSpdxElement": "SPDXRef-Package-github.com-elastic-logstash-v8.15.3-8364c8e89cfb113e38ec3f966df7eb1e9abe9d33-0"
}
]
}
Loading

0 comments on commit 395e822

Please sign in to comment.