Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add pURL generation for java packages + fix NPM pURL generation #812

Merged
merged 3 commits into from Feb 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 3 additions & 3 deletions syft/pkg/cataloger/common/cpe/java.go
Expand Up @@ -40,11 +40,11 @@ var (
)

func candidateProductsForJava(p pkg.Package) []string {
return productsFromArtifactAndGroupIDs(artifactIDFromJavaPackage(p), groupIDsFromJavaPackage(p))
return productsFromArtifactAndGroupIDs(artifactIDFromJavaPackage(p), GroupIDsFromJavaPackage(p))
}

func candidateVendorsForJava(p pkg.Package) fieldCandidateSet {
gidVendors := vendorsFromGroupIDs(groupIDsFromJavaPackage(p))
gidVendors := vendorsFromGroupIDs(GroupIDsFromJavaPackage(p))
nameVendors := vendorsFromJavaManifestNames(p)
return newFieldCandidateSetFromSets(gidVendors, nameVendors)
}
Expand Down Expand Up @@ -173,7 +173,7 @@ func artifactIDFromJavaPackage(p pkg.Package) string {
return artifactID
}

func groupIDsFromJavaPackage(p pkg.Package) (groupIDs []string) {
func GroupIDsFromJavaPackage(p pkg.Package) (groupIDs []string) {
metadata, ok := p.Metadata.(pkg.JavaMetadata)
if !ok {
return nil
Expand Down
2 changes: 1 addition & 1 deletion syft/pkg/cataloger/common/cpe/java_test.go
Expand Up @@ -333,7 +333,7 @@ func Test_groupIDsFromJavaPackage(t *testing.T) {
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
assert.ElementsMatch(t, test.expects, groupIDsFromJavaPackage(test.pkg))
assert.ElementsMatch(t, test.expects, GroupIDsFromJavaPackage(test.pkg))
})
}
}
Expand Down
17 changes: 1 addition & 16 deletions syft/pkg/cataloger/java/archive_filename.go
Expand Up @@ -55,28 +55,13 @@ type archiveFilename struct {
version string
}

// TODO: Remove this method once we're using Go 1.15+.
//
// Go 1.15 introduces a `SubexpIndex` method for the Regexp type that would let
// this code be made more elegant. Once we've reached 1.15, we should eliminate
// this function in favor of that method.
func subexpIndex(re *regexp.Regexp, name string) int {
for i, subexpName := range re.SubexpNames() {
if subexpName == name {
return i
}
}

return -1
}

func getSubexp(matches []string, subexpName string, re *regexp.Regexp, raw string) string {
if len(matches) < 1 {
log.Warnf("unexpectedly empty matches for archive '%s'", raw)
return ""
}

index := subexpIndex(re, subexpName)
index := re.SubexpIndex(subexpName)
wagoodman marked this conversation as resolved.
Show resolved Hide resolved
if index < 1 {
log.Warnf("unexpected index of '%s' capture group for Java archive '%s'", subexpName, raw)
return ""
Expand Down
25 changes: 23 additions & 2 deletions syft/pkg/cataloger/java/archive_parser.go
Expand Up @@ -122,6 +122,13 @@ func (j *archiveParser) parse() ([]*pkg.Package, []artifact.Relationship, error)
pkgs = append([]*pkg.Package{parentPkg}, pkgs...)
}

// add pURLs to all packages found
// note: since package information may change after initial creation when parsing multiple locations within the
// jar, we wait until the conclusion of the parsing process before synthesizing pURLs.
for _, p := range pkgs {
addPURL(p)
}

return pkgs, relationships, nil
}

Expand Down Expand Up @@ -348,7 +355,7 @@ func newPackageFromMavenData(pomProperties pkg.PomProperties, pomProject *pkg.Po
}

if packageIdentitiesMatch(p, parentPkg) {
updatePackage(p, parentPkg)
updateParentPackage(p, parentPkg)
return nil
}

Expand Down Expand Up @@ -379,7 +386,7 @@ func packageIdentitiesMatch(p pkg.Package, parentPkg *pkg.Package) bool {
return false
}

func updatePackage(p pkg.Package, parentPkg *pkg.Package) {
func updateParentPackage(p pkg.Package, parentPkg *pkg.Package) {
// we've run across more information about our parent package, add this info to the parent package metadata
// the pom properties is typically a better source of information for name and version than the manifest
parentPkg.Name = p.Name
Expand All @@ -401,3 +408,17 @@ func updatePackage(p pkg.Package, parentPkg *pkg.Package) {
parentPkg.Metadata = parentMetadata
}
}

func addPURL(p *pkg.Package) {
purl := packageURL(*p)
if purl == "" {
return
}

metadata, ok := p.Metadata.(pkg.JavaMetadata)
if !ok {
return
}
metadata.PURL = purl
p.Metadata = metadata
}
4 changes: 4 additions & 0 deletions syft/pkg/cataloger/java/archive_parser_test.go
Expand Up @@ -134,6 +134,7 @@ func TestParseJar(t *testing.T) {
Version: "1.0-SNAPSHOT",
Extra: map[string]string{},
},
PURL: "pkg:maven/io.jenkins.plugins/example-jenkins-plugin@1.0-SNAPSHOT",
},
},
},
Expand All @@ -154,6 +155,7 @@ func TestParseJar(t *testing.T) {
"Manifest-Version": "1.0",
},
},
PURL: "pkg:maven/example-java-app-gradle/example-java-app-gradle@0.1.0",
},
},
},
Expand Down Expand Up @@ -191,6 +193,7 @@ func TestParseJar(t *testing.T) {
Version: "0.1.0",
Extra: map[string]string{},
},
PURL: "pkg:maven/org.anchore/example-java-app-maven@0.1.0",
},
},
"joda-time": {
Expand Down Expand Up @@ -219,6 +222,7 @@ func TestParseJar(t *testing.T) {
Description: "Date and time library to replace JDK date handling",
URL: "http://www.joda.org/joda-time/",
},
PURL: "pkg:maven/joda-time/joda-time@2.9.2",
},
},
},
Expand Down
25 changes: 25 additions & 0 deletions syft/pkg/cataloger/java/package_url.go
@@ -0,0 +1,25 @@
package java

import (
"github.com/anchore/packageurl-go"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/common/cpe"
)

// PackageURL returns the PURL for the specific java package (see https://github.com/package-url/purl-spec)
func packageURL(p pkg.Package) string {
var groupID = p.Name
groupIDs := cpe.GroupIDsFromJavaPackage(p)
if len(groupIDs) > 0 {
groupID = groupIDs[0]
}

pURL := packageurl.NewPackageURL(
packageurl.TypeMaven, // TODO: should we filter down by package types here?
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this always maven? ... I see the purl spec doesn't seem to have any other java types, which is probably ok

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm also mixed on this.

I see the purl spec doesn't seem to have any other java types

That's what made me pause on excluding these. If we don't have a pURL for these packages, then package type extraction will be impossible. So it seems right to at least include a purl that matches the java ecosystem. (I suspect in the future this will change)

groupID,
p.Name,
p.Version,
nil, // TODO: there are probably several qualifiers that can be specified here
"")
return pURL.ToString()
}
45 changes: 45 additions & 0 deletions syft/pkg/cataloger/java/package_url_test.go
@@ -0,0 +1,45 @@
package java

import (
"github.com/anchore/syft/syft/pkg"
"github.com/stretchr/testify/assert"
"testing"
)

func Test_packageURL(t *testing.T) {
tests := []struct {
pkg pkg.Package
expect string
}{
{
pkg: pkg.Package{
Name: "example-java-app-maven",
Version: "0.1.0",
Language: pkg.Java,
Type: pkg.JavaPkg,
MetadataType: pkg.JavaMetadataType,
Metadata: pkg.JavaMetadata{
VirtualPath: "test-fixtures/java-builds/packages/example-java-app-maven-0.1.0.jar",
Manifest: &pkg.JavaManifest{
Main: map[string]string{
"Manifest-Version": "1.0",
},
},
PomProperties: &pkg.PomProperties{
Path: "META-INF/maven/org.anchore/example-java-app-maven/pom.properties",
GroupID: "org.anchore",
ArtifactID: "example-java-app-maven",
Version: "0.1.0",
Extra: make(map[string]string),
},
},
},
expect: "pkg:maven/org.anchore/example-java-app-maven@0.1.0",
},
}
for _, tt := range tests {
t.Run(tt.expect, func(t *testing.T) {
assert.Equal(t, tt.expect, packageURL(tt.pkg))
})
}
}
21 changes: 4 additions & 17 deletions syft/pkg/java_metadata.go
Expand Up @@ -5,13 +5,12 @@ import (

"github.com/anchore/syft/syft/linux"

"github.com/anchore/packageurl-go"
"github.com/anchore/syft/internal"
)

var _ urlIdentifier = (*JavaMetadata)(nil)

var JenkinsPluginPomPropertiesGroupIDs = []string{
var jenkinsPluginPomPropertiesGroupIDs = []string{
"io.jenkins.plugins",
"org.jenkins.plugins",
"org.jenkins-ci.plugins",
Expand All @@ -25,6 +24,7 @@ type JavaMetadata struct {
Manifest *JavaManifest `mapstructure:"Manifest" json:"manifest,omitempty"`
PomProperties *PomProperties `mapstructure:"PomProperties" json:"pomProperties,omitempty" cyclonedx:"-"`
PomProject *PomProject `mapstructure:"PomProject" json:"pomProject,omitempty"`
PURL string `hash:"ignore" json:"-"` // pURLs and CPEs are ignored for package IDs
Parent *Package `hash:"ignore" json:"-"` // note: the parent cannot be included in the minimal definition of uniqueness since this field is not reproducible in an encode-decode cycle (is lossy).
}

Expand Down Expand Up @@ -59,7 +59,7 @@ type PomParent struct {

// PkgTypeIndicated returns the package Type indicated by the data contained in the PomProperties.
func (p PomProperties) PkgTypeIndicated() Type {
if internal.HasAnyOfPrefixes(p.GroupID, JenkinsPluginPomPropertiesGroupIDs...) || strings.Contains(p.GroupID, ".jenkins.plugin") {
if internal.HasAnyOfPrefixes(p.GroupID, jenkinsPluginPomPropertiesGroupIDs...) || strings.Contains(p.GroupID, ".jenkins.plugin") {
return JenkinsPluginPkg
}

Expand All @@ -74,18 +74,5 @@ type JavaManifest struct {

// PackageURL returns the PURL for the specific Maven package (see https://github.com/package-url/purl-spec)
func (m JavaMetadata) PackageURL(_ *linux.Release) string {
if m.PomProperties != nil {
pURL := packageurl.NewPackageURL(
packageurl.TypeMaven,
m.PomProperties.GroupID,
m.PomProperties.ArtifactID,
m.PomProperties.Version,
nil, // TODO: there are probably several qualifiers that can be specified here
"")
return pURL.ToString()
}

// TODO: support non-maven artifacts

return ""
return m.PURL
}
36 changes: 0 additions & 36 deletions syft/pkg/java_metadata_test.go
Expand Up @@ -3,7 +3,6 @@ package pkg
import (
"testing"

"github.com/sergi/go-diff/diffmatchpatch"
"github.com/stretchr/testify/assert"
)

Expand Down Expand Up @@ -110,38 +109,3 @@ func TestPomProperties_PkgTypeIndicated(t *testing.T) {
})
}
}

func TestJavaMetadata_pURL(t *testing.T) {
tests := []struct {
metadata JavaMetadata
expected string
}{
{
metadata: JavaMetadata{
PomProperties: &PomProperties{
Path: "p",
Name: "n",
GroupID: "g.id",
ArtifactID: "a",
Version: "v",
},
},
expected: "pkg:maven/g.id/a@v",
},
{
metadata: JavaMetadata{},
expected: "",
},
}

for _, test := range tests {
t.Run(test.expected, func(t *testing.T) {
actual := test.metadata.PackageURL(nil)
if actual != test.expected {
dmp := diffmatchpatch.New()
diffs := dmp.DiffMain(test.expected, actual, true)
t.Errorf("diff: %s", dmp.DiffPrettyText(diffs))
}
})
}
}
18 changes: 11 additions & 7 deletions syft/pkg/language.go
@@ -1,6 +1,10 @@
package pkg

import "github.com/anchore/packageurl-go"
import (
"strings"

"github.com/anchore/packageurl-go"
)

// Language represents a single programming language.
type Language string
Expand Down Expand Up @@ -43,16 +47,16 @@ func LanguageFromPURL(p string) Language {
}

func LanguageByName(name string) Language {
switch name {
case packageurl.TypeMaven, purlGradlePkgType:
switch strings.ToLower(name) {
case packageurl.TypeMaven, string(purlGradlePkgType), string(JavaPkg), string(Java):
return Java
case packageurl.TypeComposer:
case packageurl.TypeComposer, string(PhpComposerPkg), string(PHP):
return PHP
case packageurl.TypeGolang:
case packageurl.TypeGolang, string(GoModulePkg), string(Go):
return Go
case packageurl.TypeNPM:
case packageurl.TypeNPM, string(JavaScript):
return JavaScript
case packageurl.TypePyPi:
case packageurl.TypePyPi, string(Python):
return Python
case packageurl.TypeGem:
return Ruby
Expand Down
15 changes: 13 additions & 2 deletions syft/pkg/npm_package_json_metadata.go
@@ -1,6 +1,8 @@
package pkg

import (
"strings"

"github.com/anchore/packageurl-go"
"github.com/anchore/syft/syft/linux"
)
Expand All @@ -21,10 +23,19 @@ type NpmPackageJSONMetadata struct {

// PackageURL returns the PURL for the specific NPM package (see https://github.com/package-url/purl-spec)
func (p NpmPackageJSONMetadata) PackageURL(_ *linux.Release) string {
var namespace string
name := p.Name

fields := strings.SplitN(p.Name, "/", 2)
if len(fields) > 1 {
namespace = fields[0]
name = fields[1]
}

return packageurl.NewPackageURL(
packageurl.TypeNPM,
"",
p.Name,
namespace,
wagoodman marked this conversation as resolved.
Show resolved Hide resolved
name,
p.Version,
nil,
"",
Expand Down