diff --git a/pkg/detectors/aws/aws.go b/pkg/detectors/aws/aws.go index 1d236cd746db..ae894a079272 100644 --- a/pkg/detectors/aws/aws.go +++ b/pkg/detectors/aws/aws.go @@ -17,10 +17,35 @@ import ( "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" ) -type Scanner struct{} +type scanner struct { + skipIDs map[string]struct{} +} + +func New(opts ...func(*scanner)) *scanner { + scanner := &scanner{ + skipIDs: map[string]struct{}{}, + } + for _, opt := range opts { + + opt(scanner) + } + + return scanner +} + +func WithSkipIDs(skipIDs []string) func(*scanner) { + return func(s *scanner) { + ids := map[string]struct{}{} + for _, id := range skipIDs { + ids[id] = struct{}{} + } + + s.skipIDs = ids + } +} -// Ensure the Scanner satisfies the interface at compile time. -var _ detectors.Detector = (*Scanner)(nil) +// Ensure the scanner satisfies the interface at compile time. +var _ detectors.Detector = (*scanner)(nil) var ( client = common.SaneHttpClient() @@ -37,7 +62,7 @@ var ( // Keywords are used for efficiently pre-filtering chunks. // Use identifiers in the secret preferably, or the provider name. -func (s Scanner) Keywords() []string { +func (s scanner) Keywords() []string { return []string{ "AKIA", "ABIA", @@ -60,7 +85,7 @@ func GetHMAC(key []byte, data []byte) []byte { } // FromData will find and optionally verify AWS secrets in a given set of bytes. -func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { +func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { dataStr := string(data) idMatches := idPat.FindAllStringSubmatch(dataStr, -1) @@ -72,6 +97,12 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result } resIDMatch := strings.TrimSpace(idMatch[1]) + if s.skipIDs != nil { + if _, ok := s.skipIDs[resIDMatch]; ok { + continue + } + } + for _, secretMatch := range secretMatches { if len(secretMatch) != 2 { continue diff --git a/pkg/detectors/aws/aws_test.go b/pkg/detectors/aws/aws_test.go index 354e3fa275a6..f289451b86ee 100644 --- a/pkg/detectors/aws/aws_test.go +++ b/pkg/detectors/aws/aws_test.go @@ -39,14 +39,14 @@ func TestAWS_FromChunk(t *testing.T) { } tests := []struct { name string - s Scanner + s scanner args args want []detectors.Result wantErr bool }{ { name: "found, verified", - s: Scanner{}, + s: scanner{}, args: args{ ctx: context.Background(), data: []byte(fmt.Sprintf("You can find a aws secret %s within aws %s", secret, id)), @@ -68,7 +68,7 @@ func TestAWS_FromChunk(t *testing.T) { }, { name: "found, unverified", - s: Scanner{}, + s: scanner{}, args: args{ ctx: context.Background(), data: []byte(fmt.Sprintf("You can find a aws secret %s within aws %s but not valid", inactiveSecret, id)), // the secret would satisfy the regex but not pass validation @@ -86,7 +86,7 @@ func TestAWS_FromChunk(t *testing.T) { }, { name: "not found", - s: Scanner{}, + s: scanner{}, args: args{ ctx: context.Background(), data: []byte("You cannot find the secret within"), @@ -97,7 +97,7 @@ func TestAWS_FromChunk(t *testing.T) { }, { name: "found two, one included for every ID found", - s: Scanner{}, + s: scanner{}, args: args{ ctx: context.Background(), data: []byte(fmt.Sprintf("The verified ID is %s with a secret of %s, but the unverified ID is %s and this is the secret %s", id, secret, inactiveID, inactiveSecret)), @@ -124,7 +124,7 @@ func TestAWS_FromChunk(t *testing.T) { }, { name: "not found, because unverified secret was a hash", - s: Scanner{}, + s: scanner{}, args: args{ ctx: context.Background(), data: []byte(fmt.Sprintf("You can find a aws secret %s within aws %s but not valid", hash, id)), // The secret would satisfy the regex but be filtered out after not passing validation. @@ -135,7 +135,7 @@ func TestAWS_FromChunk(t *testing.T) { }, { name: "found two, returned both because the active secret for one paired with the inactive ID, despite the hash", - s: Scanner{}, + s: scanner{}, args: args{ ctx: context.Background(), data: []byte(fmt.Sprintf("The verified ID is %s with a secret of %s, but the unverified ID is %s and the secret is this hash %s", id, secret, inactiveID, hash)), @@ -162,7 +162,7 @@ func TestAWS_FromChunk(t *testing.T) { }, { name: "found, unverified, with leading +", - s: Scanner{}, + s: scanner{}, args: args{ ctx: context.Background(), data: []byte(fmt.Sprintf("You can find a aws secret %s within aws %s but not valid", "+HaNv9cTwheDKGJaws/+BMF2GgybQgBWdhcOOdfF", id)), // the secret would satisfy the regex but not pass validation @@ -177,10 +177,24 @@ func TestAWS_FromChunk(t *testing.T) { }, wantErr: false, }, + { + name: "skipped", + s: scanner{ + skipIDs: map[string]struct{}{ + "AKIAWARWQKZNHMZBLY4I": {}, + }, + }, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a aws secret %s within aws %s but not valid", "+HaNv9cTwheDKGJaws/+BMF2GgybQgBWdhcOOdfF", id)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + wantErr: false, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - s := Scanner{} + s := tt.s got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) if (err != nil) != tt.wantErr { t.Errorf("AWS.FromData() error = %v, wantErr %v", err, tt.wantErr) @@ -201,7 +215,7 @@ func TestAWS_FromChunk(t *testing.T) { func BenchmarkFromData(benchmark *testing.B) { ctx := context.Background() - s := Scanner{} + s := scanner{} for name, data := range detectors.MustGetBenchmarkData() { benchmark.Run(name, func(b *testing.B) { for n := 0; n < b.N; n++ { diff --git a/pkg/engine/defaults.go b/pkg/engine/defaults.go index a6b252ef3126..9cb901bf80fd 100644 --- a/pkg/engine/defaults.go +++ b/pkg/engine/defaults.go @@ -737,7 +737,7 @@ func DefaultDetectors() []detectors.Detector { &heroku.Scanner{}, &linearapi.Scanner{}, &alibaba.Scanner{}, - &aws.Scanner{}, + aws.New(), &azure.Scanner{}, &slack.Scanner{}, // has 4 secret types &gitlabv2.Scanner{},