From d7a23e4bb2a8b08c3ed41e99e4c433551eacbfaf Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Thu, 27 Jan 2022 09:35:16 -0500 Subject: [PATCH] Extract language and package type from pURLs on SBOM decode (#777) * add language detection from pURLs Signed-off-by: Alex Goodman * add package type detection from pURLs Signed-off-by: Alex Goodman * add cargo and npm pURL support Signed-off-by: Alex Goodman * fix npm tests and linting Signed-off-by: Alex Goodman --- syft/pkg/cargo_package_metadata.go | 19 +++++ .../javascript/parse_package_json.go | 42 +++++----- .../javascript/parse_package_json_test.go | 12 +++ syft/pkg/java_metadata.go | 2 +- syft/pkg/language.go | 28 +++++++ syft/pkg/language_test.go | 66 +++++++++++++++ syft/pkg/npm_package_json_metadata.go | 21 +++++ syft/pkg/type.go | 32 +++++++ syft/pkg/type_test.go | 83 +++++++++++++++++++ syft/pkg/url.go | 3 + 10 files changed, 287 insertions(+), 21 deletions(-) create mode 100644 syft/pkg/language_test.go create mode 100644 syft/pkg/type_test.go diff --git a/syft/pkg/cargo_package_metadata.go b/syft/pkg/cargo_package_metadata.go index 8ab6da20ed1..936062d4452 100644 --- a/syft/pkg/cargo_package_metadata.go +++ b/syft/pkg/cargo_package_metadata.go @@ -1,5 +1,12 @@ package pkg +import ( + "github.com/anchore/packageurl-go" + "github.com/anchore/syft/syft/linux" +) + +var _ urlIdentifier = (*CargoPackageMetadata)(nil) + type CargoPackageMetadata struct { Name string `toml:"name" json:"name"` Version string `toml:"version" json:"version"` @@ -19,3 +26,15 @@ func (p CargoPackageMetadata) Pkg() *Package { Metadata: p, } } + +// PackageURL returns the PURL for the specific rust package (see https://github.com/package-url/purl-spec) +func (p CargoPackageMetadata) PackageURL(_ *linux.Release) string { + return packageurl.NewPackageURL( + "cargo", + "", + p.Name, + p.Version, + nil, + "", + ).ToString() +} diff --git a/syft/pkg/cataloger/javascript/parse_package_json.go b/syft/pkg/cataloger/javascript/parse_package_json.go index 64910616a18..d80781d6fb4 100644 --- a/syft/pkg/cataloger/javascript/parse_package_json.go +++ b/syft/pkg/cataloger/javascript/parse_package_json.go @@ -21,27 +21,27 @@ import ( // integrity check var _ common.ParserFn = parsePackageJSON -// PackageJSON represents a JavaScript package.json file -type PackageJSON struct { +// packageJSON represents a JavaScript package.json file +type packageJSON struct { Version string `json:"version"` Latest []string `json:"latest"` - Author Author `json:"author"` + Author author `json:"author"` License json.RawMessage `json:"license"` Licenses []license `json:"licenses"` Name string `json:"name"` Homepage string `json:"homepage"` Description string `json:"description"` Dependencies map[string]string `json:"dependencies"` - Repository Repository `json:"repository"` + Repository repository `json:"repository"` } -type Author struct { +type author struct { Name string `json:"name" mapstruct:"name"` Email string `json:"email" mapstruct:"email"` URL string `json:"url" mapstruct:"url"` } -type Repository struct { +type repository struct { Type string `json:"type" mapstructure:"type"` URL string `json:"url" mapstructure:"url"` } @@ -50,10 +50,10 @@ type Repository struct { // ---> name: "Isaac Z. Schlueter" email: "i@izs.me" url: "http://blog.izs.me" var authorPattern = regexp.MustCompile(`^\s*(?P[^<(]*)(\s+<(?P.*)>)?(\s\((?P.*)\))?\s*$`) -func (a *Author) UnmarshalJSON(b []byte) error { +func (a *author) UnmarshalJSON(b []byte) error { var authorStr string var fields map[string]string - var author Author + var auth author if err := json.Unmarshal(b, &authorStr); err != nil { // string parsing did not work, assume a map was given @@ -62,21 +62,21 @@ func (a *Author) UnmarshalJSON(b []byte) error { return fmt.Errorf("unable to parse package.json author: %w", err) } } else { - // parse out "name (url)" into an Author struct + // parse out "name (url)" into an author struct fields = internal.MatchNamedCaptureGroups(authorPattern, authorStr) } // translate the map into a structure - if err := mapstructure.Decode(fields, &author); err != nil { + if err := mapstructure.Decode(fields, &auth); err != nil { return fmt.Errorf("unable to decode package.json author: %w", err) } - *a = author + *a = auth return nil } -func (a *Author) AuthorString() string { +func (a *author) AuthorString() string { result := a.Name if a.Email != "" { result += fmt.Sprintf(" <%s>", a.Email) @@ -87,10 +87,10 @@ func (a *Author) AuthorString() string { return result } -func (r *Repository) UnmarshalJSON(b []byte) error { +func (r *repository) UnmarshalJSON(b []byte) error { var repositoryStr string var fields map[string]string - var repository Repository + var repo repository if err := json.Unmarshal(b, &repositoryStr); err != nil { // string parsing did not work, assume a map was given @@ -99,11 +99,11 @@ func (r *Repository) UnmarshalJSON(b []byte) error { return fmt.Errorf("unable to parse package.json author: %w", err) } // translate the map into a structure - if err := mapstructure.Decode(fields, &repository); err != nil { + if err := mapstructure.Decode(fields, &repo); err != nil { return fmt.Errorf("unable to decode package.json author: %w", err) } - *r = repository + *r = repo } else { r.URL = repositoryStr } @@ -134,7 +134,7 @@ func licenseFromJSON(b []byte) (string, error) { return "", errors.New("unable to unmarshal license field as either string or object") } -func (p PackageJSON) licensesFromJSON() ([]string, error) { +func (p packageJSON) licensesFromJSON() ([]string, error) { if p.License == nil && p.Licenses == nil { // This package.json doesn't specify any licenses whatsoever return []string{}, nil @@ -167,7 +167,7 @@ func parsePackageJSON(path string, reader io.Reader) ([]*pkg.Package, []artifact dec := json.NewDecoder(reader) for { - var p PackageJSON + var p packageJSON if err := dec.Decode(&p); err == io.EOF { break } else if err != nil { @@ -185,7 +185,7 @@ func parsePackageJSON(path string, reader io.Reader) ([]*pkg.Package, []artifact return packages, nil, nil } -func newPackageJSONPackage(p PackageJSON) *pkg.Package { +func newPackageJSONPackage(p packageJSON) *pkg.Package { licenses, err := p.licensesFromJSON() if err != nil { log.Warnf("unable to extract licenses from javascript package.json: %+v", err) @@ -199,6 +199,8 @@ func newPackageJSONPackage(p PackageJSON) *pkg.Package { Type: pkg.NpmPkg, MetadataType: pkg.NpmPackageJSONMetadataType, Metadata: pkg.NpmPackageJSONMetadata{ + Name: p.Name, + Version: p.Version, Author: p.Author.AuthorString(), Homepage: p.Homepage, URL: p.Repository.URL, @@ -207,7 +209,7 @@ func newPackageJSONPackage(p PackageJSON) *pkg.Package { } } -func (p PackageJSON) hasNameAndVersionValues() bool { +func (p packageJSON) hasNameAndVersionValues() bool { return p.Name != "" && p.Version != "" } diff --git a/syft/pkg/cataloger/javascript/parse_package_json_test.go b/syft/pkg/cataloger/javascript/parse_package_json_test.go index 99861a11143..2c9d6ea07ab 100644 --- a/syft/pkg/cataloger/javascript/parse_package_json_test.go +++ b/syft/pkg/cataloger/javascript/parse_package_json_test.go @@ -24,6 +24,8 @@ func TestParsePackageJSON(t *testing.T) { Language: pkg.JavaScript, MetadataType: pkg.NpmPackageJSONMetadataType, Metadata: pkg.NpmPackageJSONMetadata{ + Name: "npm", + Version: "6.14.6", Author: "Isaac Z. Schlueter (http://blog.izs.me)", Homepage: "https://docs.npmjs.com/", URL: "https://github.com/npm/cli", @@ -41,6 +43,8 @@ func TestParsePackageJSON(t *testing.T) { Language: pkg.JavaScript, MetadataType: pkg.NpmPackageJSONMetadataType, Metadata: pkg.NpmPackageJSONMetadata{ + Name: "npm", + Version: "6.14.6", Author: "Isaac Z. Schlueter (http://blog.izs.me)", Homepage: "https://docs.npmjs.com/", URL: "https://github.com/npm/cli", @@ -58,6 +62,8 @@ func TestParsePackageJSON(t *testing.T) { Language: pkg.JavaScript, MetadataType: pkg.NpmPackageJSONMetadataType, Metadata: pkg.NpmPackageJSONMetadata{ + Name: "npm", + Version: "6.14.6", Author: "Isaac Z. Schlueter (http://blog.izs.me)", Homepage: "https://docs.npmjs.com/", URL: "https://github.com/npm/cli", @@ -75,6 +81,8 @@ func TestParsePackageJSON(t *testing.T) { Language: pkg.JavaScript, MetadataType: pkg.NpmPackageJSONMetadataType, Metadata: pkg.NpmPackageJSONMetadata{ + Name: "npm", + Version: "6.14.6", Author: "Isaac Z. Schlueter (http://blog.izs.me)", Homepage: "https://docs.npmjs.com/", URL: "https://github.com/npm/cli", @@ -92,6 +100,8 @@ func TestParsePackageJSON(t *testing.T) { Language: pkg.JavaScript, MetadataType: pkg.NpmPackageJSONMetadataType, Metadata: pkg.NpmPackageJSONMetadata{ + Name: "npm", + Version: "6.14.6", Author: "Isaac Z. Schlueter (http://blog.izs.me)", Homepage: "https://docs.npmjs.com/", URL: "https://github.com/npm/cli", @@ -109,6 +119,8 @@ func TestParsePackageJSON(t *testing.T) { Language: pkg.JavaScript, MetadataType: pkg.NpmPackageJSONMetadataType, Metadata: pkg.NpmPackageJSONMetadata{ + Name: "function-bind", + Version: "1.1.1", Author: "Raynos ", Homepage: "https://github.com/Raynos/function-bind", URL: "git://github.com/Raynos/function-bind.git", diff --git a/syft/pkg/java_metadata.go b/syft/pkg/java_metadata.go index a3c5d8e3157..7fe919ae566 100644 --- a/syft/pkg/java_metadata.go +++ b/syft/pkg/java_metadata.go @@ -72,7 +72,7 @@ type JavaManifest struct { NamedSections map[string]map[string]string `json:"namedSections,omitempty"` } -// PackageURL returns the PURL for the specific Alpine package (see https://github.com/package-url/purl-spec) +// PackageURL returns the PURL for the specific Maven package (see https://github.com/package-url/purl-spec) func (m JavaMetadata) PackageURL(_ *linux.Release) string { if m.PomProperties != nil { pURL := packageurl.NewPackageURL( diff --git a/syft/pkg/language.go b/syft/pkg/language.go index 582a0122a53..ce6f106231e 100644 --- a/syft/pkg/language.go +++ b/syft/pkg/language.go @@ -1,5 +1,7 @@ package pkg +import "github.com/anchore/packageurl-go" + // Language represents a single programming language. type Language string @@ -30,3 +32,29 @@ var AllLanguages = []Language{ func (l Language) String() string { return string(l) } + +func LanguageFromPURL(p string) Language { + purl, err := packageurl.FromString(p) + if err != nil { + return UnknownLanguage + } + + switch purl.Type { + case packageurl.TypeMaven, purlGradlePkgType: + return Java + case packageurl.TypeComposer: + return PHP + case packageurl.TypeGolang: + return Go + case packageurl.TypeNPM: + return JavaScript + case packageurl.TypePyPi: + return Python + case packageurl.TypeGem: + return Ruby + case purlCargoPkgType: + return Rust + default: + return UnknownLanguage + } +} diff --git a/syft/pkg/language_test.go b/syft/pkg/language_test.go new file mode 100644 index 00000000000..f2989c37a8b --- /dev/null +++ b/syft/pkg/language_test.go @@ -0,0 +1,66 @@ +package pkg + +import ( + "github.com/scylladb/go-set/strset" + "github.com/stretchr/testify/assert" + "testing" +) + +func TestLanguageFromPURL(t *testing.T) { + + tests := []struct { + purl string + want Language + }{ + + { + purl: "pkg:npm/util@2.32", + want: JavaScript, + }, + { + purl: "pkg:pypi/util-linux@2.32.1-27.el8", + want: Python, + }, + { + purl: "pkg:gem/ruby-advisory-db-check@0.12.4", + want: Ruby, + }, + { + purl: "pkg:golang/github.com/gorilla/context@234fd47e07d1004f0aed9c", + want: Go, + }, + { + purl: "pkg:cargo/clap@2.33.0", + want: Rust, + }, + { + purl: "pkg:composer/laravel/laravel@5.5.0", + want: PHP, + }, + { + purl: "pkg:maven/org.apache.xmlgraphics/batik-anim@1.9.1?type=zip&classifier=dist", + want: Java, + }, + } + + var languages []string + var expectedLanguages = strset.New() + for _, ty := range AllLanguages { + expectedLanguages.Add(string(ty)) + } + + for _, tt := range tests { + t.Run(tt.purl, func(t *testing.T) { + actual := LanguageFromPURL(tt.purl) + + if actual != "" { + languages = append(languages, string(actual)) + } + + assert.Equalf(t, tt.want, actual, "LanguageFromPURL(%v)", tt.purl) + }) + } + + assert.ElementsMatch(t, expectedLanguages.List(), languages, "missing one or more languages to test against (maybe a package type was added?)") + +} diff --git a/syft/pkg/npm_package_json_metadata.go b/syft/pkg/npm_package_json_metadata.go index 3164047653c..3246b5909a1 100644 --- a/syft/pkg/npm_package_json_metadata.go +++ b/syft/pkg/npm_package_json_metadata.go @@ -1,7 +1,16 @@ package pkg +import ( + "github.com/anchore/packageurl-go" + "github.com/anchore/syft/syft/linux" +) + +var _ urlIdentifier = (*NpmPackageJSONMetadata)(nil) + // NpmPackageJSONMetadata holds extra information that is used in pkg.Package type NpmPackageJSONMetadata struct { + Name string `mapstructure:"name" json:"name"` + Version string `mapstructure:"version" json:"version"` Files []string `mapstructure:"files" json:"files,omitempty"` Author string `mapstructure:"author" json:"author"` Licenses []string `mapstructure:"licenses" json:"licenses"` @@ -9,3 +18,15 @@ type NpmPackageJSONMetadata struct { Description string `mapstructure:"description" json:"description"` URL string `mapstructure:"url" json:"url"` } + +// PackageURL returns the PURL for the specific NPM package (see https://github.com/package-url/purl-spec) +func (p NpmPackageJSONMetadata) PackageURL(_ *linux.Release) string { + return packageurl.NewPackageURL( + packageurl.TypeNPM, + "", + p.Name, + p.Version, + nil, + "", + ).ToString() +} diff --git a/syft/pkg/type.go b/syft/pkg/type.go index 572cead5421..4d4f8d4c71f 100644 --- a/syft/pkg/type.go +++ b/syft/pkg/type.go @@ -66,3 +66,35 @@ func (t Type) PackageURLType() string { return "" } } + +func TypeFromPURL(p string) Type { + purl, err := packageurl.FromString(p) + if err != nil { + return UnknownPkg + } + + switch purl.Type { + case packageurl.TypeDebian, "deb": + return DebPkg + case packageurl.TypeRPM: + return RpmPkg + case "alpine": + return ApkPkg + case packageurl.TypeMaven: + return JavaPkg + case packageurl.TypeComposer: + return PhpComposerPkg + case packageurl.TypeGolang: + return GoModulePkg + case packageurl.TypeNPM: + return NpmPkg + case packageurl.TypePyPi: + return PythonPkg + case packageurl.TypeGem: + return GemPkg + case "cargo", "crate": + return RustPkg + default: + return UnknownPkg + } +} diff --git a/syft/pkg/type_test.go b/syft/pkg/type_test.go new file mode 100644 index 00000000000..d5f5bc3e40b --- /dev/null +++ b/syft/pkg/type_test.go @@ -0,0 +1,83 @@ +package pkg + +import ( + "github.com/scylladb/go-set/strset" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestTypeFromPURL(t *testing.T) { + + tests := []struct { + name string + purl string + expected Type + }{ + { + purl: "pkg:rpm/fedora/util-linux@2.32.1-27.el8-?arch=amd64", + expected: RpmPkg, + }, + { + purl: "pkg:alpine/util-linux@2.32.1", + expected: ApkPkg, + }, + { + purl: "pkg:deb/debian/curl@7.50.3-1?arch=i386&distro=jessie", + expected: DebPkg, + }, + { + purl: "pkg:npm/util@2.32", + expected: NpmPkg, + }, + { + purl: "pkg:pypi/util-linux@2.32.1-27.el8", + expected: PythonPkg, + }, + { + purl: "pkg:gem/ruby-advisory-db-check@0.12.4", + expected: GemPkg, + }, + { + purl: "pkg:golang/github.com/gorilla/context@234fd47e07d1004f0aed9c", + expected: GoModulePkg, + }, + { + purl: "pkg:cargo/clap@2.33.0", + expected: RustPkg, + }, + { + purl: "pkg:composer/laravel/laravel@5.5.0", + expected: PhpComposerPkg, + }, + { + purl: "pkg:maven/org.apache.xmlgraphics/batik-anim@1.9.1?type=zip&classifier=dist", + expected: JavaPkg, + }, + } + + var pkgTypes []string + var expectedTypes = strset.New() + for _, ty := range AllPkgs { + expectedTypes.Add(string(ty)) + } + + // testing microsoft packages and jenkins-plugins is not valid for purl at this time + expectedTypes.Remove(string(KbPkg)) + expectedTypes.Remove(string(JenkinsPluginPkg)) + + for _, test := range tests { + t.Run(string(test.expected), func(t *testing.T) { + actual := TypeFromPURL(test.purl) + + if actual != "" { + pkgTypes = append(pkgTypes, string(actual)) + } + + assert.Equal(t, test.expected, actual) + }) + } + + assert.ElementsMatch(t, expectedTypes.List(), pkgTypes, "missing one or more package types to test against (maybe a package type was added?)") + +} diff --git a/syft/pkg/url.go b/syft/pkg/url.go index e62157d2b24..1bab84b45b9 100644 --- a/syft/pkg/url.go +++ b/syft/pkg/url.go @@ -18,6 +18,9 @@ const ( // this qualifier is not in the pURL spec, but is used by grype to perform indirect matching based on source information purlUpstreamQualifier = "upstream" + + purlCargoPkgType = "cargo" + purlGradlePkgType = "gradle" ) type urlIdentifier interface {