From db970ac361d88947abdc02de5af80fe921bb5fe7 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Thu, 10 Feb 2022 10:39:53 -0500 Subject: [PATCH 1/3] enhance pURL generation for java packages Signed-off-by: Alex Goodman --- syft/pkg/cataloger/common/cpe/java.go | 6 +-- syft/pkg/cataloger/common/cpe/java_test.go | 2 +- syft/pkg/cataloger/java/archive_filename.go | 17 +------ syft/pkg/cataloger/java/archive_parser.go | 25 ++++++++++- .../pkg/cataloger/java/archive_parser_test.go | 4 ++ syft/pkg/cataloger/java/package_url.go | 25 +++++++++++ syft/pkg/cataloger/java/package_url_test.go | 45 +++++++++++++++++++ .../pkg/cataloger/java/parse_java_manifest.go | 2 +- syft/pkg/java_metadata.go | 21 ++------- 9 files changed, 107 insertions(+), 40 deletions(-) create mode 100644 syft/pkg/cataloger/java/package_url.go create mode 100644 syft/pkg/cataloger/java/package_url_test.go diff --git a/syft/pkg/cataloger/common/cpe/java.go b/syft/pkg/cataloger/common/cpe/java.go index 1ca5d8f19de..be86549e7e5 100644 --- a/syft/pkg/cataloger/common/cpe/java.go +++ b/syft/pkg/cataloger/common/cpe/java.go @@ -40,11 +40,11 @@ var ( ) func candidateProductsForJava(p pkg.Package) []string { - return productsFromArtifactAndGroupIDs(artifactIDFromJavaPackage(p), groupIDsFromJavaPackage(p)) + return productsFromArtifactAndGroupIDs(artifactIDFromJavaPackage(p), GroupIDsFromJavaPackage(p)) } func candidateVendorsForJava(p pkg.Package) fieldCandidateSet { - gidVendors := vendorsFromGroupIDs(groupIDsFromJavaPackage(p)) + gidVendors := vendorsFromGroupIDs(GroupIDsFromJavaPackage(p)) nameVendors := vendorsFromJavaManifestNames(p) return newFieldCandidateSetFromSets(gidVendors, nameVendors) } @@ -173,7 +173,7 @@ func artifactIDFromJavaPackage(p pkg.Package) string { return artifactID } -func groupIDsFromJavaPackage(p pkg.Package) (groupIDs []string) { +func GroupIDsFromJavaPackage(p pkg.Package) (groupIDs []string) { metadata, ok := p.Metadata.(pkg.JavaMetadata) if !ok { return nil diff --git a/syft/pkg/cataloger/common/cpe/java_test.go b/syft/pkg/cataloger/common/cpe/java_test.go index a830f3c1de8..0f87e5b56fe 100644 --- a/syft/pkg/cataloger/common/cpe/java_test.go +++ b/syft/pkg/cataloger/common/cpe/java_test.go @@ -333,7 +333,7 @@ func Test_groupIDsFromJavaPackage(t *testing.T) { } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - assert.ElementsMatch(t, test.expects, groupIDsFromJavaPackage(test.pkg)) + assert.ElementsMatch(t, test.expects, GroupIDsFromJavaPackage(test.pkg)) }) } } diff --git a/syft/pkg/cataloger/java/archive_filename.go b/syft/pkg/cataloger/java/archive_filename.go index 625f5bf736a..870e4ec7deb 100644 --- a/syft/pkg/cataloger/java/archive_filename.go +++ b/syft/pkg/cataloger/java/archive_filename.go @@ -55,28 +55,13 @@ type archiveFilename struct { version string } -// TODO: Remove this method once we're using Go 1.15+. -// -// Go 1.15 introduces a `SubexpIndex` method for the Regexp type that would let -// this code be made more elegant. Once we've reached 1.15, we should eliminate -// this function in favor of that method. -func subexpIndex(re *regexp.Regexp, name string) int { - for i, subexpName := range re.SubexpNames() { - if subexpName == name { - return i - } - } - - return -1 -} - func getSubexp(matches []string, subexpName string, re *regexp.Regexp, raw string) string { if len(matches) < 1 { log.Warnf("unexpectedly empty matches for archive '%s'", raw) return "" } - index := subexpIndex(re, subexpName) + index := re.SubexpIndex(subexpName) if index < 1 { log.Warnf("unexpected index of '%s' capture group for Java archive '%s'", subexpName, raw) return "" diff --git a/syft/pkg/cataloger/java/archive_parser.go b/syft/pkg/cataloger/java/archive_parser.go index 75dd3368356..b96f8f1827d 100644 --- a/syft/pkg/cataloger/java/archive_parser.go +++ b/syft/pkg/cataloger/java/archive_parser.go @@ -122,6 +122,13 @@ func (j *archiveParser) parse() ([]*pkg.Package, []artifact.Relationship, error) pkgs = append([]*pkg.Package{parentPkg}, pkgs...) } + // add pURLs to all packages found + // note: since package information may change after initial creation when parsing multiple locations within the + // jar, we wait until the conclusion of the parsing process before synthesizing pURLs. + for _, p := range pkgs { + addPURL(p) + } + return pkgs, relationships, nil } @@ -348,7 +355,7 @@ func newPackageFromMavenData(pomProperties pkg.PomProperties, pomProject *pkg.Po } if packageIdentitiesMatch(p, parentPkg) { - updatePackage(p, parentPkg) + updateParentPackage(p, parentPkg) return nil } @@ -379,7 +386,7 @@ func packageIdentitiesMatch(p pkg.Package, parentPkg *pkg.Package) bool { return false } -func updatePackage(p pkg.Package, parentPkg *pkg.Package) { +func updateParentPackage(p pkg.Package, parentPkg *pkg.Package) { // we've run across more information about our parent package, add this info to the parent package metadata // the pom properties is typically a better source of information for name and version than the manifest parentPkg.Name = p.Name @@ -401,3 +408,17 @@ func updatePackage(p pkg.Package, parentPkg *pkg.Package) { parentPkg.Metadata = parentMetadata } } + +func addPURL(p *pkg.Package) { + purl := packageURL(*p) + if purl == "" { + return + } + + metadata, ok := p.Metadata.(pkg.JavaMetadata) + if !ok { + return + } + metadata.PURL = purl + p.Metadata = metadata +} diff --git a/syft/pkg/cataloger/java/archive_parser_test.go b/syft/pkg/cataloger/java/archive_parser_test.go index ac1b881c01e..748bc10cce1 100644 --- a/syft/pkg/cataloger/java/archive_parser_test.go +++ b/syft/pkg/cataloger/java/archive_parser_test.go @@ -134,6 +134,7 @@ func TestParseJar(t *testing.T) { Version: "1.0-SNAPSHOT", Extra: map[string]string{}, }, + PURL: "pkg:maven/io.jenkins.plugins/example-jenkins-plugin@1.0-SNAPSHOT", }, }, }, @@ -154,6 +155,7 @@ func TestParseJar(t *testing.T) { "Manifest-Version": "1.0", }, }, + PURL: "pkg:maven/example-java-app-gradle/example-java-app-gradle@0.1.0", }, }, }, @@ -191,6 +193,7 @@ func TestParseJar(t *testing.T) { Version: "0.1.0", Extra: map[string]string{}, }, + PURL: "pkg:maven/org.anchore/example-java-app-maven@0.1.0", }, }, "joda-time": { @@ -219,6 +222,7 @@ func TestParseJar(t *testing.T) { Description: "Date and time library to replace JDK date handling", URL: "http://www.joda.org/joda-time/", }, + PURL: "pkg:maven/joda-time/joda-time@2.9.2", }, }, }, diff --git a/syft/pkg/cataloger/java/package_url.go b/syft/pkg/cataloger/java/package_url.go new file mode 100644 index 00000000000..9d5cccd3eb9 --- /dev/null +++ b/syft/pkg/cataloger/java/package_url.go @@ -0,0 +1,25 @@ +package java + +import ( + "github.com/anchore/packageurl-go" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/common/cpe" +) + +// PackageURL returns the PURL for the specific java package (see https://github.com/package-url/purl-spec) +func packageURL(p pkg.Package) string { + var groupID = p.Name + groupIDs := cpe.GroupIDsFromJavaPackage(p) + if len(groupIDs) > 0 { + groupID = groupIDs[0] + } + + pURL := packageurl.NewPackageURL( + packageurl.TypeMaven, // TODO: should we filter down by package types here? + groupID, + p.Name, + p.Version, + nil, // TODO: there are probably several qualifiers that can be specified here + "") + return pURL.ToString() +} diff --git a/syft/pkg/cataloger/java/package_url_test.go b/syft/pkg/cataloger/java/package_url_test.go new file mode 100644 index 00000000000..5124b2eb2f3 --- /dev/null +++ b/syft/pkg/cataloger/java/package_url_test.go @@ -0,0 +1,45 @@ +package java + +import ( + "github.com/anchore/syft/syft/pkg" + "github.com/stretchr/testify/assert" + "testing" +) + +func Test_packageURL(t *testing.T) { + tests := []struct { + pkg pkg.Package + expect string + }{ + { + pkg: pkg.Package{ + Name: "example-java-app-maven", + Version: "0.1.0", + Language: pkg.Java, + Type: pkg.JavaPkg, + MetadataType: pkg.JavaMetadataType, + Metadata: pkg.JavaMetadata{ + VirtualPath: "test-fixtures/java-builds/packages/example-java-app-maven-0.1.0.jar", + Manifest: &pkg.JavaManifest{ + Main: map[string]string{ + "Manifest-Version": "1.0", + }, + }, + PomProperties: &pkg.PomProperties{ + Path: "META-INF/maven/org.anchore/example-java-app-maven/pom.properties", + GroupID: "org.anchore", + ArtifactID: "example-java-app-maven", + Version: "0.1.0", + Extra: map[string]string{}, + }, + }, + }, + expect: "pkg:maven/org.anchore/example-java-app-maven@0.1.0", + }, + } + for _, tt := range tests { + t.Run(tt.expect, func(t *testing.T) { + assert.Equal(t, tt.expect, packageURL(tt.pkg)) + }) + } +} diff --git a/syft/pkg/cataloger/java/parse_java_manifest.go b/syft/pkg/cataloger/java/parse_java_manifest.go index 7d9c7b7f621..93bb92b4938 100644 --- a/syft/pkg/cataloger/java/parse_java_manifest.go +++ b/syft/pkg/cataloger/java/parse_java_manifest.go @@ -33,7 +33,7 @@ func parseJavaManifest(path string, reader io.Reader) (*pkg.JavaManifest, error) // empty lines denote section separators if strings.TrimSpace(line) == "" { - // we don't want to allocate a new section map that won't necessarily be used, do that once there is + // we don't expect to allocate a new section map that won't necessarily be used, do that once there is // a non-empty line to process // do not process line continuations after this diff --git a/syft/pkg/java_metadata.go b/syft/pkg/java_metadata.go index 7fe919ae566..8581c218eac 100644 --- a/syft/pkg/java_metadata.go +++ b/syft/pkg/java_metadata.go @@ -5,13 +5,12 @@ import ( "github.com/anchore/syft/syft/linux" - "github.com/anchore/packageurl-go" "github.com/anchore/syft/internal" ) var _ urlIdentifier = (*JavaMetadata)(nil) -var JenkinsPluginPomPropertiesGroupIDs = []string{ +var jenkinsPluginPomPropertiesGroupIDs = []string{ "io.jenkins.plugins", "org.jenkins.plugins", "org.jenkins-ci.plugins", @@ -25,6 +24,7 @@ type JavaMetadata struct { Manifest *JavaManifest `mapstructure:"Manifest" json:"manifest,omitempty"` PomProperties *PomProperties `mapstructure:"PomProperties" json:"pomProperties,omitempty" cyclonedx:"-"` PomProject *PomProject `mapstructure:"PomProject" json:"pomProject,omitempty"` + PURL string `hash:"ignore" json:"-"` // pURLs and CPEs are ignored for package IDs Parent *Package `hash:"ignore" json:"-"` // note: the parent cannot be included in the minimal definition of uniqueness since this field is not reproducible in an encode-decode cycle (is lossy). } @@ -59,7 +59,7 @@ type PomParent struct { // PkgTypeIndicated returns the package Type indicated by the data contained in the PomProperties. func (p PomProperties) PkgTypeIndicated() Type { - if internal.HasAnyOfPrefixes(p.GroupID, JenkinsPluginPomPropertiesGroupIDs...) || strings.Contains(p.GroupID, ".jenkins.plugin") { + if internal.HasAnyOfPrefixes(p.GroupID, jenkinsPluginPomPropertiesGroupIDs...) || strings.Contains(p.GroupID, ".jenkins.plugin") { return JenkinsPluginPkg } @@ -74,18 +74,5 @@ type JavaManifest struct { // PackageURL returns the PURL for the specific Maven package (see https://github.com/package-url/purl-spec) func (m JavaMetadata) PackageURL(_ *linux.Release) string { - if m.PomProperties != nil { - pURL := packageurl.NewPackageURL( - packageurl.TypeMaven, - m.PomProperties.GroupID, - m.PomProperties.ArtifactID, - m.PomProperties.Version, - nil, // TODO: there are probably several qualifiers that can be specified here - "") - return pURL.ToString() - } - - // TODO: support non-maven artifacts - - return "" + return m.PURL } From 8904daed6725e1a9c5eb7092baa2997dc4a7f1ba Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Thu, 10 Feb 2022 13:19:16 -0500 Subject: [PATCH 2/3] optionally split out npm namespaces for pURL generation Signed-off-by: Alex Goodman --- syft/pkg/java_metadata_test.go | 36 ------------------- syft/pkg/language.go | 18 ++++++---- syft/pkg/npm_package_json_metadata.go | 15 ++++++-- syft/pkg/npm_package_json_metadata_test.go | 42 ++++++++++++++++++++++ syft/pkg/url_test.go | 18 +++------- 5 files changed, 70 insertions(+), 59 deletions(-) create mode 100644 syft/pkg/npm_package_json_metadata_test.go diff --git a/syft/pkg/java_metadata_test.go b/syft/pkg/java_metadata_test.go index 07a6ca97793..d538ee5a9d0 100644 --- a/syft/pkg/java_metadata_test.go +++ b/syft/pkg/java_metadata_test.go @@ -3,7 +3,6 @@ package pkg import ( "testing" - "github.com/sergi/go-diff/diffmatchpatch" "github.com/stretchr/testify/assert" ) @@ -110,38 +109,3 @@ func TestPomProperties_PkgTypeIndicated(t *testing.T) { }) } } - -func TestJavaMetadata_pURL(t *testing.T) { - tests := []struct { - metadata JavaMetadata - expected string - }{ - { - metadata: JavaMetadata{ - PomProperties: &PomProperties{ - Path: "p", - Name: "n", - GroupID: "g.id", - ArtifactID: "a", - Version: "v", - }, - }, - expected: "pkg:maven/g.id/a@v", - }, - { - metadata: JavaMetadata{}, - expected: "", - }, - } - - for _, test := range tests { - t.Run(test.expected, func(t *testing.T) { - actual := test.metadata.PackageURL(nil) - if actual != test.expected { - dmp := diffmatchpatch.New() - diffs := dmp.DiffMain(test.expected, actual, true) - t.Errorf("diff: %s", dmp.DiffPrettyText(diffs)) - } - }) - } -} diff --git a/syft/pkg/language.go b/syft/pkg/language.go index e0168f4759a..42e96b7ab43 100644 --- a/syft/pkg/language.go +++ b/syft/pkg/language.go @@ -1,6 +1,10 @@ package pkg -import "github.com/anchore/packageurl-go" +import ( + "strings" + + "github.com/anchore/packageurl-go" +) // Language represents a single programming language. type Language string @@ -43,16 +47,16 @@ func LanguageFromPURL(p string) Language { } func LanguageByName(name string) Language { - switch name { - case packageurl.TypeMaven, purlGradlePkgType: + switch strings.ToLower(name) { + case packageurl.TypeMaven, string(purlGradlePkgType), string(JavaPkg), string(Java): return Java - case packageurl.TypeComposer: + case packageurl.TypeComposer, string(PhpComposerPkg), string(PHP): return PHP - case packageurl.TypeGolang: + case packageurl.TypeGolang, string(GoModulePkg), string(Go): return Go - case packageurl.TypeNPM: + case packageurl.TypeNPM, string(JavaScript): return JavaScript - case packageurl.TypePyPi: + case packageurl.TypePyPi, string(Python): return Python case packageurl.TypeGem: return Ruby diff --git a/syft/pkg/npm_package_json_metadata.go b/syft/pkg/npm_package_json_metadata.go index 3246b5909a1..2f9a0180ab1 100644 --- a/syft/pkg/npm_package_json_metadata.go +++ b/syft/pkg/npm_package_json_metadata.go @@ -1,6 +1,8 @@ package pkg import ( + "strings" + "github.com/anchore/packageurl-go" "github.com/anchore/syft/syft/linux" ) @@ -21,10 +23,19 @@ type NpmPackageJSONMetadata struct { // PackageURL returns the PURL for the specific NPM package (see https://github.com/package-url/purl-spec) func (p NpmPackageJSONMetadata) PackageURL(_ *linux.Release) string { + var namespace string + name := p.Name + + fields := strings.SplitN(p.Name, "/", 2) + if len(fields) > 1 { + namespace = fields[0] + name = fields[1] + } + return packageurl.NewPackageURL( packageurl.TypeNPM, - "", - p.Name, + namespace, + name, p.Version, nil, "", diff --git a/syft/pkg/npm_package_json_metadata_test.go b/syft/pkg/npm_package_json_metadata_test.go new file mode 100644 index 00000000000..87b12960e42 --- /dev/null +++ b/syft/pkg/npm_package_json_metadata_test.go @@ -0,0 +1,42 @@ +package pkg + +import ( + "github.com/anchore/packageurl-go" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "testing" +) + +func TestNpmPackageJSONMetadata_PackageURL(t *testing.T) { + + tests := []struct { + name string + metadata NpmPackageJSONMetadata + expected string + }{ + { + name: "no namespace", + metadata: NpmPackageJSONMetadata{ + Name: "arborist", + Version: "2.6.2", + }, + expected: "pkg:npm/arborist@2.6.2", + }, + { + name: "split by namespace", + metadata: NpmPackageJSONMetadata{ + Name: "@npmcli/arborist", + Version: "2.6.2", + }, + expected: "pkg:npm/@npmcli/arborist@2.6.2", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + actual := tt.metadata.PackageURL(nil) + assert.Equal(t, tt.expected, actual) + _, err := packageurl.FromString(actual) + require.NoError(t, err) + }) + } +} diff --git a/syft/pkg/url_test.go b/syft/pkg/url_test.go index 2abae9368d3..dad951b6ba9 100644 --- a/syft/pkg/url_test.go +++ b/syft/pkg/url_test.go @@ -141,13 +141,8 @@ func TestPackageURL(t *testing.T) { Version: "bad-v0.1.0", Type: JavaPkg, Metadata: JavaMetadata{ - PomProperties: &PomProperties{ - Path: "p", - Name: "n", - GroupID: "g.id", - ArtifactID: "a", - Version: "v", - }, + PomProperties: &PomProperties{}, + PURL: "pkg:maven/g.id/a@v", // assembled by the java cataloger }, }, @@ -160,13 +155,8 @@ func TestPackageURL(t *testing.T) { Version: "bad-v0.1.0", Type: JenkinsPluginPkg, Metadata: JavaMetadata{ - PomProperties: &PomProperties{ - Path: "p", - Name: "n", - GroupID: "g.id", - ArtifactID: "a", - Version: "v", - }, + PomProperties: &PomProperties{}, + PURL: "pkg:maven/g.id/a@v", // assembled by the java cataloger }, }, From 271c706e76dd1936c50f8659923e7c1d472e5f89 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Thu, 10 Feb 2022 13:35:21 -0500 Subject: [PATCH 3/3] nit updates Signed-off-by: Alex Goodman --- syft/pkg/cataloger/java/package_url_test.go | 2 +- syft/pkg/cataloger/java/parse_java_manifest.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/syft/pkg/cataloger/java/package_url_test.go b/syft/pkg/cataloger/java/package_url_test.go index 5124b2eb2f3..27cf46ef91a 100644 --- a/syft/pkg/cataloger/java/package_url_test.go +++ b/syft/pkg/cataloger/java/package_url_test.go @@ -30,7 +30,7 @@ func Test_packageURL(t *testing.T) { GroupID: "org.anchore", ArtifactID: "example-java-app-maven", Version: "0.1.0", - Extra: map[string]string{}, + Extra: make(map[string]string), }, }, }, diff --git a/syft/pkg/cataloger/java/parse_java_manifest.go b/syft/pkg/cataloger/java/parse_java_manifest.go index 93bb92b4938..7d9c7b7f621 100644 --- a/syft/pkg/cataloger/java/parse_java_manifest.go +++ b/syft/pkg/cataloger/java/parse_java_manifest.go @@ -33,7 +33,7 @@ func parseJavaManifest(path string, reader io.Reader) (*pkg.JavaManifest, error) // empty lines denote section separators if strings.TrimSpace(line) == "" { - // we don't expect to allocate a new section map that won't necessarily be used, do that once there is + // we don't want to allocate a new section map that won't necessarily be used, do that once there is // a non-empty line to process // do not process line continuations after this