Skip to content

Commit

Permalink
fix: anchore#953 Derive language from pURL - https://github.com/ancho…
Browse files Browse the repository at this point in the history
…re/syft… (anchore#957)

Signed-off-by: Christopher Phillips <christopher.phillips@anchore.com>
  • Loading branch information
jonmcewen committed Apr 26, 2022
1 parent 5df2a53 commit 2a7265a
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 14 deletions.
4 changes: 4 additions & 0 deletions internal/formats/common/cyclonedxhelpers/component.go
Expand Up @@ -86,6 +86,10 @@ func decodeComponent(c *cyclonedx.Component) *pkg.Package {
p.Type = pkg.TypeFromPURL(p.PURL)
}

if p.Language == "" {
p.Language = pkg.LanguageFromPURL(p.PURL)
}

return p
}

Expand Down
28 changes: 28 additions & 0 deletions internal/formats/common/cyclonedxhelpers/component_test.go
Expand Up @@ -191,3 +191,31 @@ func Test_deriveBomRef(t *testing.T) {
})
}
}

func Test_decodeComponent(t *testing.T) {
javaComponentWithNoSyftProperties := cyclonedx.Component{
Name: "ch.qos.logback/logback-classic",
Version: "1.2.3",
PackageURL: "pkg:maven/ch.qos.logback/logback-classic@1.2.3",
Type: "library",
BOMRef: "pkg:maven/ch.qos.logback/logback-classic@1.2.3",
}

tests := []struct {
name string
component cyclonedx.Component
want pkg.Language
}{
{
name: "derive language from pURL if missing",
component: javaComponentWithNoSyftProperties,
want: pkg.Java,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, tt.want, decodeComponent(&tt.component).Language)
})
}
}
7 changes: 7 additions & 0 deletions syft/pkg/cataloger/catalog.go
Expand Up @@ -70,6 +70,13 @@ func Catalog(resolver source.FileResolver, release *linux.Release, catalogers ..
// generate PURL (note: this is excluded from package ID, so is safe to mutate)
p.PURL = pkg.URL(p, release)

// if we were not able to identify the language we have an opportunity
// to try and get this value from the PURL. Worst case we assert that
// we could not identify the language at either stage and set UnknownLanguage
if p.Language == "" {
p.Language = pkg.LanguageFromPURL(p.PURL)
}

// create file-to-package relationships for files owned by the package
owningRelationships, err := packageFileOwnershipRelationships(p, resolver)
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion syft/pkg/language.go
Expand Up @@ -11,7 +11,7 @@ type Language string

const (
// the full set of supported programming languages
UnknownLanguage Language = "UnknownLanguage"
UnknownLanguage Language = ""
Java Language = "java"
JavaScript Language = "javascript"
Python Language = "python"
Expand Down
32 changes: 19 additions & 13 deletions test/integration/encode_decode_cycle_test.go
Expand Up @@ -7,6 +7,7 @@ import (
"github.com/anchore/syft/internal/formats/cyclonedxxml"
"github.com/anchore/syft/internal/formats/syftjson"
"github.com/anchore/syft/syft/source"
"github.com/google/go-cmp/cmp"
"regexp"
"testing"

Expand All @@ -21,20 +22,26 @@ import (
)

// TestEncodeDecodeEncodeCycleComparison is testing for differences in how SBOM documents get encoded on multiple cycles.
// By encding and decoding the sbom we can compare the differences between the set of resulting objects. However,
// By encoding and decoding the sbom we can compare the differences between the set of resulting objects. However,
// this requires specific comparisons being done, and select redactions/omissions being made. Additionally, there are
// already unit tests on each format encoder-decoder for properly functioning comparisons in depth, so there is no need
// to do an object-to-object comparison. For this reason this test focuses on a bytes-to-bytes comparison after an
// encode-decode-encode loop which will detect lossy behavior in both directions.
func TestEncodeDecodeEncodeCycleComparison(t *testing.T) {
// use second image for relationships
images := []string{"image-pkg-coverage", "image-owning-package"}
tests := []struct {
formatOption sbom.FormatID
redactor func(in []byte) []byte
json bool
}{
{
formatOption: syftjson.ID,
json: true,
redactor: func(in []byte) []byte {
in = regexp.MustCompile("\"(id|parent)\": \"[^\"]+\",").ReplaceAll(in, []byte{})
return in
},
json: true,
},
{
formatOption: cyclonedxjson.ID,
Expand All @@ -55,9 +62,8 @@ func TestEncodeDecodeEncodeCycleComparison(t *testing.T) {
}

for _, test := range tests {
// use second image for relationships
for _, image := range []string{"image-pkg-coverage", "image-owning-package"} {
t.Run(fmt.Sprintf("%s/%s", test.formatOption, image), func(t *testing.T) {
t.Run(fmt.Sprintf("%s", test.formatOption), func(t *testing.T) {
for _, image := range images {
originalSBOM, _ := catalogFixtureImage(t, image, source.SquashedScope)

format := syft.FormatByID(test.formatOption)
Expand All @@ -81,15 +87,15 @@ func TestEncodeDecodeEncodeCycleComparison(t *testing.T) {
if test.json {
s1 := string(by1)
s2 := string(by2)
assert.JSONEq(t, s1, s2)
} else {
if !assert.True(t, bytes.Equal(by1, by2)) {
dmp := diffmatchpatch.New()
diffs := dmp.DiffMain(string(by1), string(by2), true)
t.Errorf("diff: %s", dmp.DiffPrettyText(diffs))
if diff := cmp.Diff(s1, s2); diff != "" {
t.Errorf("Encode/Decode mismatch (-want +got):\n%s", diff)
}
} else if !assert.True(t, bytes.Equal(by1, by2)) {
dmp := diffmatchpatch.New()
diffs := dmp.DiffMain(string(by1), string(by2), true)
t.Errorf("diff: %s", dmp.DiffPrettyText(diffs))
}
})
}
}
})
}
}

0 comments on commit 2a7265a

Please sign in to comment.