-
Notifications
You must be signed in to change notification settings - Fork 1.5k
/
custom_detectors.go
226 lines (203 loc) · 6.38 KB
/
custom_detectors.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
package custom_detectors
import (
"bytes"
"context"
"encoding/json"
"net/http"
"regexp"
"strings"
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/custom_detectorspb"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)
// The maximum number of matches from one chunk. This const is used when
// permutating each regex match to protect the scanner from doing too much work
// for poorly defined regexps.
const maxTotalMatches = 100
// customRegexWebhook is a CustomRegex with webhook validation that is
// guaranteed to be valid (assuming the data is not changed after
// initialization).
type customRegexWebhook struct {
*custom_detectorspb.CustomRegex
}
// Ensure the Scanner satisfies the interface at compile time.
var _ detectors.Detector = (*customRegexWebhook)(nil)
// NewWebhookCustomRegex initializes and validates a customRegexWebhook. An
// unexported type is intentionally returned here to ensure the values have
// been validated.
func NewWebhookCustomRegex(pb *custom_detectorspb.CustomRegex) (*customRegexWebhook, error) {
// TODO: Return all validation errors.
if err := ValidateKeywords(pb.Keywords); err != nil {
return nil, err
}
if err := ValidateRegex(pb.Regex); err != nil {
return nil, err
}
for _, verify := range pb.Verify {
if err := ValidateVerifyEndpoint(verify.Endpoint, verify.Unsafe); err != nil {
return nil, err
}
if err := ValidateVerifyHeaders(verify.Headers); err != nil {
return nil, err
}
}
// TODO: Copy only necessary data out of pb.
return &customRegexWebhook{pb}, nil
}
var httpClient = common.SaneHttpClient()
func (c *customRegexWebhook) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
dataStr := string(data)
regexMatches := make(map[string][][]string, len(c.GetRegex()))
// Find all submatches for each regex.
for name, regex := range c.GetRegex() {
regex, err := regexp.Compile(regex)
if err != nil {
// TODO: Log error.
// This should never happen due to validation.
continue
}
regexMatches[name] = regex.FindAllStringSubmatch(dataStr, -1)
}
// Permutate each individual match.
// {
// "foo": [["match1"]]
// "bar": [["match2"], ["match3"]]
// }
// becomes
// [
// {"foo": ["match1"], "bar": ["match2"]},
// {"foo": ["match1"], "bar": ["match3"]},
// ]
matches := permutateMatches(regexMatches)
// Create result object and test for verification.
for _, match := range matches {
if common.IsDone(ctx) {
// TODO: Log we're possibly leaving out results.
return results, nil
}
var raw string
for _, values := range match {
// values[0] contains the entire regex match.
raw += values[0]
}
result := detectors.Result{
DetectorType: detectorspb.DetectorType_CustomRegex,
Raw: []byte(raw),
}
if isKnownFalsePositive(match) {
continue
}
if !verify {
results = append(results, result)
continue
}
// Verify via webhook.
jsonBody, err := json.Marshal(map[string]map[string][]string{
c.GetName(): match,
})
if err != nil {
continue
}
// Try each config until we successfully verify.
for _, verifyConfig := range c.GetVerify() {
if common.IsDone(ctx) {
// TODO: Log we're possibly leaving out results.
return results, nil
}
req, err := http.NewRequestWithContext(ctx, "POST", verifyConfig.GetEndpoint(), bytes.NewReader(jsonBody))
if err != nil {
continue
}
for _, header := range verifyConfig.GetHeaders() {
key, value, found := strings.Cut(header, ":")
if !found {
// Should be unreachable due to validation.
continue
}
req.Header.Add(key, strings.TrimLeft(value, "\t\n\v\f\r "))
}
res, err := httpClient.Do(req)
if err != nil {
continue
}
// TODO: Read response body.
res.Body.Close()
if res.StatusCode == http.StatusOK {
result.Verified = true
break
}
}
results = append(results, result)
}
return results, nil
}
func (c *customRegexWebhook) Keywords() []string {
return c.GetKeywords()
}
// productIndices produces a permutation of indices for each length. Example:
// productIndices(3, 2) -> [[0 0] [1 0] [2 0] [0 1] [1 1] [2 1]]. It returns
// a slice of length no larger than maxTotalMatches.
func productIndices(lengths ...int) [][]int {
count := 1
for _, l := range lengths {
count *= l
}
if count == 0 {
return nil
}
if count > maxTotalMatches {
count = maxTotalMatches
}
results := make([][]int, count)
for i := 0; i < count; i++ {
j := 1
result := make([]int, 0, len(lengths))
for _, l := range lengths {
result = append(result, (i/j)%l)
j *= l
}
results[i] = result
}
return results
}
// permutateMatches converts the list of all regex matches into all possible
// permutations selecting one from each named entry in the map. For example:
// {"foo": [matchA, matchB], "bar": [matchC]} becomes
// [{"foo": matchA, "bar": matchC}, {"foo": matchB, "bar": matchC}]
func permutateMatches(regexMatches map[string][][]string) []map[string][]string {
// Get a consistent order for names and their matching lengths.
// The lengths are used in calculating the permutation so order matters.
names := make([]string, 0, len(regexMatches))
lengths := make([]int, 0, len(regexMatches))
for key, value := range regexMatches {
names = append(names, key)
lengths = append(lengths, len(value))
}
// Permutate all the indices for each match. For example, if "foo" has
// [matchA, matchB] and "bar" has [matchC], we will get indices [0 0] [1 0].
permutationIndices := productIndices(lengths...)
// Build {"foo": matchA, "bar": matchC} and {"foo": matchB, "bar": matchC}
// from the indices.
var matches []map[string][]string
for _, permutation := range permutationIndices {
candidate := make(map[string][]string, len(permutationIndices))
for i, name := range names {
candidate[name] = regexMatches[name][permutation[i]]
}
matches = append(matches, candidate)
}
return matches
}
// This function will check false positives for common test words, but also it
// will make sure the key appears 'random' enough to be a real key.
func isKnownFalsePositive(match map[string][]string) bool {
for _, values := range match {
for _, value := range values {
if detectors.IsKnownFalsePositive(value, detectors.DefaultFalsePositives, true) {
return true
}
}
}
return false
}