Skip to content

Commit

Permalink
Extract language and package type from pURLs on SBOM decode (#777)
Browse files Browse the repository at this point in the history
* add language detection from pURLs

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add package type detection from pURLs

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add cargo and npm pURL support

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* fix npm tests and linting

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
  • Loading branch information
wagoodman committed Jan 27, 2022
1 parent 9f7104d commit d7a23e4
Show file tree
Hide file tree
Showing 10 changed files with 287 additions and 21 deletions.
19 changes: 19 additions & 0 deletions syft/pkg/cargo_package_metadata.go
@@ -1,5 +1,12 @@
package pkg

import (
"github.com/anchore/packageurl-go"
"github.com/anchore/syft/syft/linux"
)

var _ urlIdentifier = (*CargoPackageMetadata)(nil)

type CargoPackageMetadata struct {
Name string `toml:"name" json:"name"`
Version string `toml:"version" json:"version"`
Expand All @@ -19,3 +26,15 @@ func (p CargoPackageMetadata) Pkg() *Package {
Metadata: p,
}
}

// PackageURL returns the PURL for the specific rust package (see https://github.com/package-url/purl-spec)
func (p CargoPackageMetadata) PackageURL(_ *linux.Release) string {
return packageurl.NewPackageURL(
"cargo",
"",
p.Name,
p.Version,
nil,
"",
).ToString()
}
42 changes: 22 additions & 20 deletions syft/pkg/cataloger/javascript/parse_package_json.go
Expand Up @@ -21,27 +21,27 @@ import (
// integrity check
var _ common.ParserFn = parsePackageJSON

// PackageJSON represents a JavaScript package.json file
type PackageJSON struct {
// packageJSON represents a JavaScript package.json file
type packageJSON struct {
Version string `json:"version"`
Latest []string `json:"latest"`
Author Author `json:"author"`
Author author `json:"author"`
License json.RawMessage `json:"license"`
Licenses []license `json:"licenses"`
Name string `json:"name"`
Homepage string `json:"homepage"`
Description string `json:"description"`
Dependencies map[string]string `json:"dependencies"`
Repository Repository `json:"repository"`
Repository repository `json:"repository"`
}

type Author struct {
type author struct {
Name string `json:"name" mapstruct:"name"`
Email string `json:"email" mapstruct:"email"`
URL string `json:"url" mapstruct:"url"`
}

type Repository struct {
type repository struct {
Type string `json:"type" mapstructure:"type"`
URL string `json:"url" mapstructure:"url"`
}
Expand All @@ -50,10 +50,10 @@ type Repository struct {
// ---> name: "Isaac Z. Schlueter" email: "i@izs.me" url: "http://blog.izs.me"
var authorPattern = regexp.MustCompile(`^\s*(?P<name>[^<(]*)(\s+<(?P<email>.*)>)?(\s\((?P<url>.*)\))?\s*$`)

func (a *Author) UnmarshalJSON(b []byte) error {
func (a *author) UnmarshalJSON(b []byte) error {
var authorStr string
var fields map[string]string
var author Author
var auth author

if err := json.Unmarshal(b, &authorStr); err != nil {
// string parsing did not work, assume a map was given
Expand All @@ -62,21 +62,21 @@ func (a *Author) UnmarshalJSON(b []byte) error {
return fmt.Errorf("unable to parse package.json author: %w", err)
}
} else {
// parse out "name <email> (url)" into an Author struct
// parse out "name <email> (url)" into an author struct
fields = internal.MatchNamedCaptureGroups(authorPattern, authorStr)
}

// translate the map into a structure
if err := mapstructure.Decode(fields, &author); err != nil {
if err := mapstructure.Decode(fields, &auth); err != nil {
return fmt.Errorf("unable to decode package.json author: %w", err)
}

*a = author
*a = auth

return nil
}

func (a *Author) AuthorString() string {
func (a *author) AuthorString() string {
result := a.Name
if a.Email != "" {
result += fmt.Sprintf(" <%s>", a.Email)
Expand All @@ -87,10 +87,10 @@ func (a *Author) AuthorString() string {
return result
}

func (r *Repository) UnmarshalJSON(b []byte) error {
func (r *repository) UnmarshalJSON(b []byte) error {
var repositoryStr string
var fields map[string]string
var repository Repository
var repo repository

if err := json.Unmarshal(b, &repositoryStr); err != nil {
// string parsing did not work, assume a map was given
Expand All @@ -99,11 +99,11 @@ func (r *Repository) UnmarshalJSON(b []byte) error {
return fmt.Errorf("unable to parse package.json author: %w", err)
}
// translate the map into a structure
if err := mapstructure.Decode(fields, &repository); err != nil {
if err := mapstructure.Decode(fields, &repo); err != nil {
return fmt.Errorf("unable to decode package.json author: %w", err)
}

*r = repository
*r = repo
} else {
r.URL = repositoryStr
}
Expand Down Expand Up @@ -134,7 +134,7 @@ func licenseFromJSON(b []byte) (string, error) {
return "", errors.New("unable to unmarshal license field as either string or object")
}

func (p PackageJSON) licensesFromJSON() ([]string, error) {
func (p packageJSON) licensesFromJSON() ([]string, error) {
if p.License == nil && p.Licenses == nil {
// This package.json doesn't specify any licenses whatsoever
return []string{}, nil
Expand Down Expand Up @@ -167,7 +167,7 @@ func parsePackageJSON(path string, reader io.Reader) ([]*pkg.Package, []artifact
dec := json.NewDecoder(reader)

for {
var p PackageJSON
var p packageJSON
if err := dec.Decode(&p); err == io.EOF {
break
} else if err != nil {
Expand All @@ -185,7 +185,7 @@ func parsePackageJSON(path string, reader io.Reader) ([]*pkg.Package, []artifact
return packages, nil, nil
}

func newPackageJSONPackage(p PackageJSON) *pkg.Package {
func newPackageJSONPackage(p packageJSON) *pkg.Package {
licenses, err := p.licensesFromJSON()
if err != nil {
log.Warnf("unable to extract licenses from javascript package.json: %+v", err)
Expand All @@ -199,6 +199,8 @@ func newPackageJSONPackage(p PackageJSON) *pkg.Package {
Type: pkg.NpmPkg,
MetadataType: pkg.NpmPackageJSONMetadataType,
Metadata: pkg.NpmPackageJSONMetadata{
Name: p.Name,
Version: p.Version,
Author: p.Author.AuthorString(),
Homepage: p.Homepage,
URL: p.Repository.URL,
Expand All @@ -207,7 +209,7 @@ func newPackageJSONPackage(p PackageJSON) *pkg.Package {
}
}

func (p PackageJSON) hasNameAndVersionValues() bool {
func (p packageJSON) hasNameAndVersionValues() bool {
return p.Name != "" && p.Version != ""
}

Expand Down
12 changes: 12 additions & 0 deletions syft/pkg/cataloger/javascript/parse_package_json_test.go
Expand Up @@ -24,6 +24,8 @@ func TestParsePackageJSON(t *testing.T) {
Language: pkg.JavaScript,
MetadataType: pkg.NpmPackageJSONMetadataType,
Metadata: pkg.NpmPackageJSONMetadata{
Name: "npm",
Version: "6.14.6",
Author: "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)",
Homepage: "https://docs.npmjs.com/",
URL: "https://github.com/npm/cli",
Expand All @@ -41,6 +43,8 @@ func TestParsePackageJSON(t *testing.T) {
Language: pkg.JavaScript,
MetadataType: pkg.NpmPackageJSONMetadataType,
Metadata: pkg.NpmPackageJSONMetadata{
Name: "npm",
Version: "6.14.6",
Author: "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)",
Homepage: "https://docs.npmjs.com/",
URL: "https://github.com/npm/cli",
Expand All @@ -58,6 +62,8 @@ func TestParsePackageJSON(t *testing.T) {
Language: pkg.JavaScript,
MetadataType: pkg.NpmPackageJSONMetadataType,
Metadata: pkg.NpmPackageJSONMetadata{
Name: "npm",
Version: "6.14.6",
Author: "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)",
Homepage: "https://docs.npmjs.com/",
URL: "https://github.com/npm/cli",
Expand All @@ -75,6 +81,8 @@ func TestParsePackageJSON(t *testing.T) {
Language: pkg.JavaScript,
MetadataType: pkg.NpmPackageJSONMetadataType,
Metadata: pkg.NpmPackageJSONMetadata{
Name: "npm",
Version: "6.14.6",
Author: "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)",
Homepage: "https://docs.npmjs.com/",
URL: "https://github.com/npm/cli",
Expand All @@ -92,6 +100,8 @@ func TestParsePackageJSON(t *testing.T) {
Language: pkg.JavaScript,
MetadataType: pkg.NpmPackageJSONMetadataType,
Metadata: pkg.NpmPackageJSONMetadata{
Name: "npm",
Version: "6.14.6",
Author: "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)",
Homepage: "https://docs.npmjs.com/",
URL: "https://github.com/npm/cli",
Expand All @@ -109,6 +119,8 @@ func TestParsePackageJSON(t *testing.T) {
Language: pkg.JavaScript,
MetadataType: pkg.NpmPackageJSONMetadataType,
Metadata: pkg.NpmPackageJSONMetadata{
Name: "function-bind",
Version: "1.1.1",
Author: "Raynos <raynos2@gmail.com>",
Homepage: "https://github.com/Raynos/function-bind",
URL: "git://github.com/Raynos/function-bind.git",
Expand Down
2 changes: 1 addition & 1 deletion syft/pkg/java_metadata.go
Expand Up @@ -72,7 +72,7 @@ type JavaManifest struct {
NamedSections map[string]map[string]string `json:"namedSections,omitempty"`
}

// PackageURL returns the PURL for the specific Alpine package (see https://github.com/package-url/purl-spec)
// PackageURL returns the PURL for the specific Maven package (see https://github.com/package-url/purl-spec)
func (m JavaMetadata) PackageURL(_ *linux.Release) string {
if m.PomProperties != nil {
pURL := packageurl.NewPackageURL(
Expand Down
28 changes: 28 additions & 0 deletions syft/pkg/language.go
@@ -1,5 +1,7 @@
package pkg

import "github.com/anchore/packageurl-go"

// Language represents a single programming language.
type Language string

Expand Down Expand Up @@ -30,3 +32,29 @@ var AllLanguages = []Language{
func (l Language) String() string {
return string(l)
}

func LanguageFromPURL(p string) Language {
purl, err := packageurl.FromString(p)
if err != nil {
return UnknownLanguage
}

switch purl.Type {
case packageurl.TypeMaven, purlGradlePkgType:
return Java
case packageurl.TypeComposer:
return PHP
case packageurl.TypeGolang:
return Go
case packageurl.TypeNPM:
return JavaScript
case packageurl.TypePyPi:
return Python
case packageurl.TypeGem:
return Ruby
case purlCargoPkgType:
return Rust
default:
return UnknownLanguage
}
}
66 changes: 66 additions & 0 deletions syft/pkg/language_test.go
@@ -0,0 +1,66 @@
package pkg

import (
"github.com/scylladb/go-set/strset"
"github.com/stretchr/testify/assert"
"testing"
)

func TestLanguageFromPURL(t *testing.T) {

tests := []struct {
purl string
want Language
}{

{
purl: "pkg:npm/util@2.32",
want: JavaScript,
},
{
purl: "pkg:pypi/util-linux@2.32.1-27.el8",
want: Python,
},
{
purl: "pkg:gem/ruby-advisory-db-check@0.12.4",
want: Ruby,
},
{
purl: "pkg:golang/github.com/gorilla/context@234fd47e07d1004f0aed9c",
want: Go,
},
{
purl: "pkg:cargo/clap@2.33.0",
want: Rust,
},
{
purl: "pkg:composer/laravel/laravel@5.5.0",
want: PHP,
},
{
purl: "pkg:maven/org.apache.xmlgraphics/batik-anim@1.9.1?type=zip&classifier=dist",
want: Java,
},
}

var languages []string
var expectedLanguages = strset.New()
for _, ty := range AllLanguages {
expectedLanguages.Add(string(ty))
}

for _, tt := range tests {
t.Run(tt.purl, func(t *testing.T) {
actual := LanguageFromPURL(tt.purl)

if actual != "" {
languages = append(languages, string(actual))
}

assert.Equalf(t, tt.want, actual, "LanguageFromPURL(%v)", tt.purl)
})
}

assert.ElementsMatch(t, expectedLanguages.List(), languages, "missing one or more languages to test against (maybe a package type was added?)")

}
21 changes: 21 additions & 0 deletions syft/pkg/npm_package_json_metadata.go
@@ -1,11 +1,32 @@
package pkg

import (
"github.com/anchore/packageurl-go"
"github.com/anchore/syft/syft/linux"
)

var _ urlIdentifier = (*NpmPackageJSONMetadata)(nil)

// NpmPackageJSONMetadata holds extra information that is used in pkg.Package
type NpmPackageJSONMetadata struct {
Name string `mapstructure:"name" json:"name"`
Version string `mapstructure:"version" json:"version"`
Files []string `mapstructure:"files" json:"files,omitempty"`
Author string `mapstructure:"author" json:"author"`
Licenses []string `mapstructure:"licenses" json:"licenses"`
Homepage string `mapstructure:"homepage" json:"homepage"`
Description string `mapstructure:"description" json:"description"`
URL string `mapstructure:"url" json:"url"`
}

// PackageURL returns the PURL for the specific NPM package (see https://github.com/package-url/purl-spec)
func (p NpmPackageJSONMetadata) PackageURL(_ *linux.Release) string {
return packageurl.NewPackageURL(
packageurl.TypeNPM,
"",
p.Name,
p.Version,
nil,
"",
).ToString()
}

0 comments on commit d7a23e4

Please sign in to comment.