diff --git a/internal/formats/common/cyclonedxhelpers/component.go b/internal/formats/common/cyclonedxhelpers/component.go index d4f6606aeaf..a0d185f5111 100644 --- a/internal/formats/common/cyclonedxhelpers/component.go +++ b/internal/formats/common/cyclonedxhelpers/component.go @@ -86,6 +86,10 @@ func decodeComponent(c *cyclonedx.Component) *pkg.Package { p.Type = pkg.TypeFromPURL(p.PURL) } + if p.Language == "" { + p.Language = pkg.LanguageFromPURL(p.PURL) + } + return p } diff --git a/internal/formats/common/cyclonedxhelpers/component_test.go b/internal/formats/common/cyclonedxhelpers/component_test.go index ab7f3b812bb..586d7c89d2a 100644 --- a/internal/formats/common/cyclonedxhelpers/component_test.go +++ b/internal/formats/common/cyclonedxhelpers/component_test.go @@ -191,3 +191,31 @@ func Test_deriveBomRef(t *testing.T) { }) } } + +func Test_decodeComponent(t *testing.T) { + javaComponentWithNoSyftProperties := cyclonedx.Component{ + Name: "ch.qos.logback/logback-classic", + Version: "1.2.3", + PackageURL: "pkg:maven/ch.qos.logback/logback-classic@1.2.3", + Type: "library", + BOMRef: "pkg:maven/ch.qos.logback/logback-classic@1.2.3", + } + + tests := []struct { + name string + component cyclonedx.Component + want pkg.Language + }{ + { + name: "derive language from pURL if missing", + component: javaComponentWithNoSyftProperties, + want: pkg.Java, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, decodeComponent(&tt.component).Language) + }) + } +} diff --git a/syft/pkg/cataloger/catalog.go b/syft/pkg/cataloger/catalog.go index dfd242a5922..fa0e4d72da0 100644 --- a/syft/pkg/cataloger/catalog.go +++ b/syft/pkg/cataloger/catalog.go @@ -70,6 +70,13 @@ func Catalog(resolver source.FileResolver, release *linux.Release, catalogers .. // generate PURL (note: this is excluded from package ID, so is safe to mutate) p.PURL = pkg.URL(p, release) + // if we were not able to identify the language we have an opportunity + // to try and get this value from the PURL. Worst case we assert that + // we could not identify the language at either stage and set UnknownLanguage + if p.Language == "" { + p.Language = pkg.LanguageFromPURL(p.PURL) + } + // create file-to-package relationships for files owned by the package owningRelationships, err := packageFileOwnershipRelationships(p, resolver) if err != nil { diff --git a/syft/pkg/language.go b/syft/pkg/language.go index b4b3734a363..d9c4905b1dd 100644 --- a/syft/pkg/language.go +++ b/syft/pkg/language.go @@ -11,7 +11,7 @@ type Language string const ( // the full set of supported programming languages - UnknownLanguage Language = "UnknownLanguage" + UnknownLanguage Language = "" Java Language = "java" JavaScript Language = "javascript" Python Language = "python" diff --git a/test/integration/encode_decode_cycle_test.go b/test/integration/encode_decode_cycle_test.go index 348a309d229..012715dc3cd 100644 --- a/test/integration/encode_decode_cycle_test.go +++ b/test/integration/encode_decode_cycle_test.go @@ -7,6 +7,7 @@ import ( "github.com/anchore/syft/internal/formats/cyclonedxxml" "github.com/anchore/syft/internal/formats/syftjson" "github.com/anchore/syft/syft/source" + "github.com/google/go-cmp/cmp" "regexp" "testing" @@ -21,12 +22,14 @@ import ( ) // TestEncodeDecodeEncodeCycleComparison is testing for differences in how SBOM documents get encoded on multiple cycles. -// By encding and decoding the sbom we can compare the differences between the set of resulting objects. However, +// By encoding and decoding the sbom we can compare the differences between the set of resulting objects. However, // this requires specific comparisons being done, and select redactions/omissions being made. Additionally, there are // already unit tests on each format encoder-decoder for properly functioning comparisons in depth, so there is no need // to do an object-to-object comparison. For this reason this test focuses on a bytes-to-bytes comparison after an // encode-decode-encode loop which will detect lossy behavior in both directions. func TestEncodeDecodeEncodeCycleComparison(t *testing.T) { + // use second image for relationships + images := []string{"image-pkg-coverage", "image-owning-package"} tests := []struct { formatOption sbom.FormatID redactor func(in []byte) []byte @@ -34,7 +37,11 @@ func TestEncodeDecodeEncodeCycleComparison(t *testing.T) { }{ { formatOption: syftjson.ID, - json: true, + redactor: func(in []byte) []byte { + in = regexp.MustCompile("\"(id|parent)\": \"[^\"]+\",").ReplaceAll(in, []byte{}) + return in + }, + json: true, }, { formatOption: cyclonedxjson.ID, @@ -55,9 +62,8 @@ func TestEncodeDecodeEncodeCycleComparison(t *testing.T) { } for _, test := range tests { - // use second image for relationships - for _, image := range []string{"image-pkg-coverage", "image-owning-package"} { - t.Run(fmt.Sprintf("%s/%s", test.formatOption, image), func(t *testing.T) { + t.Run(fmt.Sprintf("%s", test.formatOption), func(t *testing.T) { + for _, image := range images { originalSBOM, _ := catalogFixtureImage(t, image, source.SquashedScope) format := syft.FormatByID(test.formatOption) @@ -81,15 +87,15 @@ func TestEncodeDecodeEncodeCycleComparison(t *testing.T) { if test.json { s1 := string(by1) s2 := string(by2) - assert.JSONEq(t, s1, s2) - } else { - if !assert.True(t, bytes.Equal(by1, by2)) { - dmp := diffmatchpatch.New() - diffs := dmp.DiffMain(string(by1), string(by2), true) - t.Errorf("diff: %s", dmp.DiffPrettyText(diffs)) + if diff := cmp.Diff(s1, s2); diff != "" { + t.Errorf("Encode/Decode mismatch (-want +got):\n%s", diff) } + } else if !assert.True(t, bytes.Equal(by1, by2)) { + dmp := diffmatchpatch.New() + diffs := dmp.DiffMain(string(by1), string(by2), true) + t.Errorf("diff: %s", dmp.DiffPrettyText(diffs)) } - }) - } + } + }) } }