fix(sbom): use correct field for licenses in CycloneDX reports#9057
fix(sbom): use correct field for licenses in CycloneDX reports#9057DmitriyLewen merged 14 commits intoaquasecurity:mainfrom
Conversation
knqyf263
left a comment
There was a problem hiding this comment.
I remember we did something similar for SPDX. If we define a method, can we avoid repeating code in SPDX and CycloneDX?
diff --git a/pkg/licensing/expression/types.go b/pkg/licensing/expression/types.go
index 7399f7462..c237464e1 100644
--- a/pkg/licensing/expression/types.go
+++ b/pkg/licensing/expression/types.go
@@ -14,6 +14,7 @@ var (
type Expression interface {
String() string
+ IsSPDXLicense() bool
}
type Token struct {
@@ -42,6 +43,10 @@ func (s SimpleExpr) String() string {
return s.License
}
+func (s SimpleExpr) IsSPDXLicense() bool {
+ return ValidateSPDXLicense(s.String())
+}
+
type CompoundExpr struct {
left Expression
conjunction Token
@@ -49,7 +54,11 @@ type CompoundExpr struct {
}
func NewCompoundExpr(left Expression, conjunction Token, right Expression) CompoundExpr {
- return CompoundExpr{left: left, conjunction: conjunction, right: right}
+ return CompoundExpr{
+ left: left,
+ conjunction: conjunction,
+ right: right,
+ }
}
func (c CompoundExpr) Conjunction() Token {
@@ -81,3 +90,11 @@ func (c CompoundExpr) String() string {
}
return fmt.Sprintf("%s %s %s", left, c.conjunction.literal, right)
}
+
+func (c CompoundExpr) IsSPDXLicense() bool {
+ if c.conjunction.token == WITH {
+ // e.g. A WITH B
+ return c.left.IsSPDXLicense() && ValidateSPDXException(c.right.String())
+ }
+ return c.left.IsSPDXLicense() && c.right.IsSPDXLicense()
+}- rename IsSPDXExpression to IsSPDXLicense - don't check exceptions in IsSPDXLicense - keep and check exceptions in foundSPDXExceptions - don't check nested licenses if this is valid license (for optimization) - add test case
|
I used your logic. 40ea836:
PS |
|
We can't skip validation for So it means that we will overcheck SPDX expressions.
But after changes we will check:
|
|
To be honest, I still don’t quite understand why this commit is necessary. I think the issue with unnecessary recursion could be resolved by simply returning a diff --git a/pkg/sbom/spdx/marshal.go b/pkg/sbom/spdx/marshal.go
index e170beea2..e2aa9860e 100644
--- a/pkg/sbom/spdx/marshal.go
+++ b/pkg/sbom/spdx/marshal.go
@@ -451,7 +451,7 @@ func (m *Marshaler) normalizeLicenses(licenses []string) (string, []*spdx.OtherL
if e.IsSPDXExpression() {
// Use SimpleExpr for a valid SPDX license with an exception,
// to avoid parsing the license and exception separately.
- return e
+ return expression.SimpleExpr{License: e.String()}
}
licenseName = e.String()
diff --git a/pkg/sbom/spdx/marshal_private_test.go b/pkg/sbom/spdx/marshal_private_test.go
index 60d7945ef..26c540d65 100644
--- a/pkg/sbom/spdx/marshal_private_test.go
+++ b/pkg/sbom/spdx/marshal_private_test.go
@@ -70,10 +70,24 @@ func TestMarshaler_normalizeLicenses(t *testing.T) {
{
name: "happy path with WITH operator",
input: []string{
- "AFL 2.0",
+ "GPLv2",
+ "AFL 3.0 with wrong-exceptions",
+ "LGPL 2.0 and unknown-license and GNU LESSER",
"AFL 3.0 with Autoconf-exception-3.0",
},
- wantLicenseName: "AFL-2.0 AND AFL-3.0 WITH Autoconf-exception-3.0",
+ wantLicenseName: "GPL-2.0-only AND LicenseRef-51373b28fab165e9 AND LGPL-2.0-only AND LicenseRef-a0bb0951a6dfbdbe AND LGPL-2.1-only AND AFL-3.0 WITH Autoconf-exception-3.0",
+ wantOtherLicenses: []*spdx.OtherLicense{
+ {
+ LicenseIdentifier: "LicenseRef-51373b28fab165e9",
+ LicenseName: "AFL-3.0 WITH wrong-exceptions",
+ ExtractedText: `This component is licensed under "AFL-3.0 WITH wrong-exceptions"`,
+ },
+ {
+ LicenseIdentifier: "LicenseRef-a0bb0951a6dfbdbe",
+ LicenseName: "unknown-license",
+ ExtractedText: `This component is licensed under "unknown-license"`,
+ },
+ },
},
{
name: "happy path with non-SPDX exception",Both the existing tests and the one you added are passing. If there are any cases that are failing, I’d appreciate it if you could share a test or an example. |
|
Additionally, I'm unsure why we need a complex logic, like git diff pkg/sbom/cyclonedx/marshal.go
diff --git a/pkg/sbom/cyclonedx/marshal.go b/pkg/sbom/cyclonedx/marshal.go
index 9c1e00e71..08314b051 100644
--- a/pkg/sbom/cyclonedx/marshal.go
+++ b/pkg/sbom/cyclonedx/marshal.go
@@ -8,7 +8,6 @@ import (
"sort"
"strconv"
"strings"
- "sync"
cdx "github.com/CycloneDX/cyclonedx-go"
"github.com/package-url/packageurl-go"
@@ -312,35 +311,33 @@ func (m *Marshaler) normalizeLicense(license string) cdx.LicenseChoice {
license = strings.ReplaceAll(license, "-with-", " WITH ")
license = strings.ReplaceAll(license, "-WITH-", " WITH ")
- var licenseChoice cdx.LicenseChoice
- onceValidateSPDXLicense := sync.Once{}
- validateSPDXLicense := func(expr expression.Expression) expression.Expression {
- // We only need to check the main license (not its parts).
- onceValidateSPDXLicense.Do(func() {
- _, compoundExpr := expr.(expression.CompoundExpr)
- switch {
- case !expr.IsSPDXExpression():
- // Use licenseChoice.license.name for invalid SPDX ID / SPDX expression
- licenseChoice.License = &cdx.License{Name: expr.String()}
- case compoundExpr:
- // Use licenseChoice.expression for valid SPDX expression (with any conjunction)
- // e.g. "GPL-2.0 WITH Classpath-exception-2.0" or "GPL-2.0 AND MIT"
- licenseChoice.Expression = expr.String()
- default:
- // Use licenseChoice.license.id for valid SPDX ID
- licenseChoice.License = &cdx.License{ID: expr.String()}
- }
- })
-
- return expr
- }
-
- _, err := expression.Normalize(license, licensing.NormalizeLicense, expression.NormalizeForSPDX, validateSPDXLicense)
+ normalizedLicenses, err := expression.Normalize(license, licensing.NormalizeLicense, expression.NormalizeForSPDX)
if err != nil {
// Not fail on the invalid license
m.logger.Warn("Unable to marshal SPDX licenses", log.String("license", license))
return cdx.LicenseChoice{}
}
+
+ // Case 1: the license is not a valid SPDX ID or SPDX expression
+ if !normalizedLicenses.IsSPDXExpression() {
+ // Use LicenseChoice.License.Name for invalid SPDX ID / SPDX expression
+ return cdx.LicenseChoice{
+ License: &cdx.License{Name: normalizedLicenses.String()},
+ }
+ }
+
+ // Case 2: the license is a valid SPDX ID or SPDX expression
+ var licenseChoice cdx.LicenseChoice
+ switch normalizedLicenses.(type) {
+ case expression.SimpleExpr:
+ // Use LicenseChoice.License.ID for valid SPDX ID
+ licenseChoice.License = &cdx.License{ID: normalizedLicenses.String()}
+ case expression.CompoundExpr:
+ // Use LicenseChoice.Expression for valid SPDX expression (with any conjunction)
+ // e.g. "GPL-2.0 WITH Classpath-exception-2.0" or "GPL-2.0 AND MIT"
+ licenseChoice.Expression = normalizedLicenses.String()
+ }
+
return licenseChoice
} |
This reverts commit d621fc4.
- rename IsSPDXExpression to IsSPDXLicense and check only SPDX license - spdx: return SimpleExpr for expresion with `WITH` operator - spdx: add test cases for WITH operator - cyclonedx: check IsSPDXLicense after normalize license
I don't know why i didn't think about this...
I don't know why i tried to solve this issue in I updated PR - 94e9d07 |
pkg/licensing/expression/types.go
Outdated
|
|
||
| type Expression interface { | ||
| String() string | ||
| IsSPDXLicense() bool |
There was a problem hiding this comment.
Isn't IsSPDXExpression better than IsSPDXLicense? Both a single license and a compound license are also expressions according to the spec.
idstring = 1*(ALPHA / DIGIT / "-" / "." )
license-id = <short form license identifier in Annex A.1>
license-exception-id = <short form license exception identifier in Annex A.2>
license-ref = ["DocumentRef-"(idstring)":"]"LicenseRef-"(idstring)
simple-expression = license-id / license-id"+" / license-ref
compound-expression = (simple-expression /
simple-expression "WITH" license-exception-id /
compound-expression "AND" compound-expression /
compound-expression "OR" compound-expression /
"(" compound-expression ")" )
license-expression = (simple-expression / compound-expression)
There was a problem hiding this comment.
I renamed this function because "IsSPDXLicense" was not quite correct.
I don't know why i renamed this function today...
Thanks!
updated in 363074a
This PR contains the following updates: | Package | Update | Change | |---|---|---| | [mirror.gcr.io/aquasec/trivy](https://www.aquasec.com/products/trivy/) ([source](https://github.com/aquasecurity/trivy)) | minor | `0.64.1` -> `0.65.0` | --- ### Release Notes <details> <summary>aquasecurity/trivy (mirror.gcr.io/aquasec/trivy)</summary> ### [`v0.65.0`](https://github.com/aquasecurity/trivy/blob/HEAD/CHANGELOG.md#0650-2025-07-30) [Compare Source](aquasecurity/trivy@v0.64.1...v0.65.0) ##### Features - add graceful shutdown with signal handling ([#​9242](aquasecurity/trivy#9242)) ([2c05882](aquasecurity/trivy@2c05882)) - add HTTP request/response tracing support ([#​9125](aquasecurity/trivy#9125)) ([aa5b32a](aquasecurity/trivy@aa5b32a)) - **alma:** add AlmaLinux 10 support ([#​9207](aquasecurity/trivy#9207)) ([861d51e](aquasecurity/trivy@861d51e)) - **flag:** add schema validation for `--server` flag ([#​9270](aquasecurity/trivy#9270)) ([ed4640e](aquasecurity/trivy@ed4640e)) - **image:** add Docker context resolution ([#​9166](aquasecurity/trivy#9166)) ([99cd4e7](aquasecurity/trivy@99cd4e7)) - **license:** observe pkg types option in license scanner ([#​9091](aquasecurity/trivy#9091)) ([d44af8c](aquasecurity/trivy@d44af8c)) - **misconf:** add private ip google access attribute to subnetwork ([#​9199](aquasecurity/trivy#9199)) ([263845c](aquasecurity/trivy@263845c)) - **misconf:** added logging and versioning to the gcp storage bucket ([#​9226](aquasecurity/trivy#9226)) ([110f80e](aquasecurity/trivy@110f80e)) - **repo:** add git repository metadata to reports ([#​9252](aquasecurity/trivy#9252)) ([f4b2cf1](aquasecurity/trivy@f4b2cf1)) - **report:** add CVSS vectors in sarif report ([#​9157](aquasecurity/trivy#9157)) ([60723e6](aquasecurity/trivy@60723e6)) - **sbom:** add SHA-512 hash support for CycloneDX SBOM ([#​9126](aquasecurity/trivy#9126)) ([12d6706](aquasecurity/trivy@12d6706)) ##### Bug Fixes - **alma:** parse epochs from rpmqa file ([#​9101](aquasecurity/trivy#9101)) ([82db2fc](aquasecurity/trivy@82db2fc)) - also check `filepath` when removing duplicate packages ([#​9142](aquasecurity/trivy#9142)) ([4d10a81](aquasecurity/trivy@4d10a81)) - **aws:** update amazon linux 2 EOL date ([#​9176](aquasecurity/trivy#9176)) ([0ecfed6](aquasecurity/trivy@0ecfed6)) - **cli:** Add more non-sensitive flags to telemetry ([#​9110](aquasecurity/trivy#9110)) ([7041a39](aquasecurity/trivy@7041a39)) - **cli:** ensure correct command is picked by telemetry ([#​9260](aquasecurity/trivy#9260)) ([b4ad00f](aquasecurity/trivy@b4ad00f)) - **cli:** panic: attempt to get os.Args\[1] when len(os.Args) < 2 ([#​9206](aquasecurity/trivy#9206)) ([adfa879](aquasecurity/trivy@adfa879)) - **license:** add missed `GFDL-NIV-1.1` and `GFDL-NIV-1.2` into Trivy mapping ([#​9116](aquasecurity/trivy#9116)) ([a692f29](aquasecurity/trivy@a692f29)) - **license:** handle WITH operator for `LaxSplitLicenses` ([#​9232](aquasecurity/trivy#9232)) ([b4193d0](aquasecurity/trivy@b4193d0)) - migrate from `*.list` to `*.md5sums` files for `dpkg` ([#​9131](aquasecurity/trivy#9131)) ([f224de3](aquasecurity/trivy@f224de3)) - **misconf:** correctly adapt azure storage account ([#​9138](aquasecurity/trivy#9138)) ([51aa022](aquasecurity/trivy@51aa022)) - **misconf:** correctly parse empty port ranges in google\_compute\_firewall ([#​9237](aquasecurity/trivy#9237)) ([77bab7b](aquasecurity/trivy@77bab7b)) - **misconf:** fix log bucket in schema ([#​9235](aquasecurity/trivy#9235)) ([7ebc129](aquasecurity/trivy@7ebc129)) - **misconf:** skip rewriting expr if attr is nil ([#​9113](aquasecurity/trivy#9113)) ([42ccd3d](aquasecurity/trivy@42ccd3d)) - **nodejs:** don't use prerelease logic for compare npm constraints ([#​9208](aquasecurity/trivy#9208)) ([fe96436](aquasecurity/trivy@fe96436)) - prevent graceful shutdown message on normal exit ([#​9244](aquasecurity/trivy#9244)) ([6095984](aquasecurity/trivy@6095984)) - **rootio:** check full version to detect `root.io` packages ([#​9117](aquasecurity/trivy#9117)) ([c2ddd44](aquasecurity/trivy@c2ddd44)) - **rootio:** fix severity selection ([#​9181](aquasecurity/trivy#9181)) ([6fafbeb](aquasecurity/trivy@6fafbeb)) - **sbom:** merge in-graph and out-of-graph OS packages in scan results ([#​9194](aquasecurity/trivy#9194)) ([aa944cc](aquasecurity/trivy@aa944cc)) - **sbom:** use correct field for licenses in CycloneDX reports ([#​9057](aquasecurity/trivy#9057)) ([143da88](aquasecurity/trivy@143da88)) - **secret:** add UTF-8 validation in secret scanner to prevent protobuf marshalling errors ([#​9253](aquasecurity/trivy#9253)) ([54832a7](aquasecurity/trivy@54832a7)) - **secret:** fix line numbers for multiple-line secrets ([#​9104](aquasecurity/trivy#9104)) ([e579746](aquasecurity/trivy@e579746)) - **server:** add HTTP transport setup to server mode ([#​9217](aquasecurity/trivy#9217)) ([1163b04](aquasecurity/trivy@1163b04)) - supporting .egg-info/METADATA in python.Packaging analyzer ([#​9151](aquasecurity/trivy#9151)) ([e306e2d](aquasecurity/trivy@e306e2d)) - **terraform:** `for_each` on a map returns a resource for every key ([#​9156](aquasecurity/trivy#9156)) ([153318f](aquasecurity/trivy@153318f)) </details> --- ### Configuration 📅 **Schedule**: Branch creation - At any time (no schedule defined), Automerge - At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR is behind base branch, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://github.com/renovatebot/renovate). <!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiI0MS4xLjMiLCJ1cGRhdGVkSW5WZXIiOiI0MS4xLjMiLCJ0YXJnZXRCcmFuY2giOiJtYWluIiwibGFiZWxzIjpbImltYWdlIl19--> Reviewed-on: https://gitea.alexlebens.dev/alexlebens/infrastructure/pulls/1073 Co-authored-by: Renovate Bot <renovate-bot@alexlebens.net> Co-committed-by: Renovate Bot <renovate-bot@alexlebens.net>
Description
We need to use one of three correct field for component license (see #9042 for more details)
Solution
Normalize and check each package license:
1.1. valid SPDX ID use licenseChoise.License.ID
1.2. text license - use licenseChoise.License.Name
1.3. invalid SPDX ID - use licenseChoise.License.Name
1.4. all parts of expression are valid SPDX IDs/SPDX exceptions - use licenseChoise.Expression
1.5. for other cases use licenseChoise.License.Name
Examples of changes:
CycloneDX report for
centos:8:Before
After:
Related issues
Checklist