forked from anchore/syft
-
Notifications
You must be signed in to change notification settings - Fork 0
/
directory_resolver.go
585 lines (495 loc) · 18 KB
/
directory_resolver.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
package source
import (
"errors"
"fmt"
"io"
"io/fs"
"os"
"path"
"path/filepath"
"runtime"
"strings"
"github.com/anchore/syft/internal"
"github.com/anchore/stereoscope/pkg/file"
"github.com/anchore/stereoscope/pkg/filetree"
"github.com/anchore/syft/internal/bus"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/event"
"github.com/wagoodman/go-partybus"
"github.com/wagoodman/go-progress"
)
const WindowsOS = "windows"
var unixSystemRuntimePrefixes = []string{
"/proc",
"/dev",
"/sys",
}
var _ FileResolver = (*directoryResolver)(nil)
type pathFilterFn func(string, os.FileInfo) bool
// directoryResolver implements path and content access for the directory data source.
type directoryResolver struct {
path string
currentWdRelativeToRoot string
currentWd string
fileTree *filetree.FileTree
metadata map[file.ID]FileMetadata
// TODO: wire up to report these paths in the json report
pathFilterFns []pathFilterFn
refsByMIMEType map[string][]file.Reference
errPaths map[string]error
}
func newDirectoryResolver(root string, pathFilters ...pathFilterFn) (*directoryResolver, error) {
currentWD, err := os.Getwd()
if err != nil {
return nil, fmt.Errorf("could not gret CWD: %w", err)
}
// we have to account for the root being accessed through a symlink path and always resolve the real path. Otherwise
// we will not be able to normalize given paths that fall under the resolver
cleanCWD, err := filepath.EvalSymlinks(currentWD)
if err != nil {
return nil, fmt.Errorf("could not evaluate CWD symlinks: %w", err)
}
cleanRoot, err := filepath.EvalSymlinks(root)
if err != nil {
return nil, fmt.Errorf("could not evaluate root=%q symlinks: %w", root, err)
}
var currentWdRelRoot string
if path.IsAbs(cleanRoot) {
currentWdRelRoot, err = filepath.Rel(cleanCWD, cleanRoot)
if err != nil {
return nil, fmt.Errorf("could not determine given root path to CWD: %w", err)
}
} else {
currentWdRelRoot = filepath.Clean(cleanRoot)
}
resolver := directoryResolver{
path: cleanRoot,
currentWd: cleanCWD,
currentWdRelativeToRoot: currentWdRelRoot,
fileTree: filetree.NewFileTree(),
metadata: make(map[file.ID]FileMetadata),
pathFilterFns: append([]pathFilterFn{isUnallowableFileType, isUnixSystemRuntimePath}, pathFilters...),
refsByMIMEType: make(map[string][]file.Reference),
errPaths: make(map[string]error),
}
return &resolver, indexAllRoots(cleanRoot, resolver.indexTree)
}
func (r *directoryResolver) indexTree(root string, stager *progress.Stage) ([]string, error) {
log.Debugf("indexing filesystem path=%q", root)
var roots []string
var err error
root, err = filepath.Abs(root)
if err != nil {
return nil, err
}
// we want to be able to index single files with the directory resolver. However, we should also allow for attempting
// to index paths that do not exist (that is, a root that does not exist is not an error case that should stop indexing).
// For this reason we look for an opportunity to discover if the given root is a file, and if so add a single root,
// but continue forth with index regardless if the given root path exists or not.
fi, err := os.Stat(root)
if err != nil && fi != nil && !fi.IsDir() {
// note: we want to index the path regardless of an error stat-ing the path
newRoot, _ := r.indexPath(root, fi, nil)
if newRoot != "" {
roots = append(roots, newRoot)
}
return roots, nil
}
return roots, filepath.Walk(root,
func(path string, info os.FileInfo, err error) error {
stager.Current = path
newRoot, err := r.indexPath(path, info, err)
if err != nil {
return err
}
if newRoot != "" {
roots = append(roots, newRoot)
}
return nil
})
}
func (r *directoryResolver) indexPath(path string, info os.FileInfo, err error) (string, error) {
// link cycles could cause a revisit --we should not allow this
if r.hasBeenIndexed(path) {
return "", nil
}
// ignore any path which a filter function returns true
for _, filterFn := range r.pathFilterFns {
if filterFn != nil && filterFn(path, info) {
if info != nil && info.IsDir() {
return "", fs.SkipDir
}
return "", nil
}
}
if r.isFileAccessErr(path, err) {
return "", nil
}
if info == nil {
// walk may not be able to provide a FileInfo object, don't allow for this to stop indexing; keep track of the paths and continue.
r.errPaths[path] = fmt.Errorf("no file info observable at path=%q", path)
return "", nil
}
// here we check to see if we need to normalize paths to posix on the way in coming from windows
if runtime.GOOS == WindowsOS {
path = windowsToPosix(path)
}
newRoot, err := r.addPathToIndex(path, info)
if r.isFileAccessErr(path, err) {
return "", nil
}
return newRoot, nil
}
func (r *directoryResolver) isFileAccessErr(path string, err error) bool {
// don't allow for errors to stop indexing, keep track of the paths and continue.
if err != nil {
log.Warnf("unable to access path=%q: %+v", path, err)
r.errPaths[path] = err
return true
}
return false
}
func (r directoryResolver) addPathToIndex(p string, info os.FileInfo) (string, error) {
switch t := newFileTypeFromMode(info.Mode()); t {
case SymbolicLink:
return r.addSymlinkToIndex(p, info)
case Directory:
return "", r.addDirectoryToIndex(p, info)
case RegularFile:
return "", r.addFileToIndex(p, info)
default:
return "", fmt.Errorf("unsupported file type: %s", t)
}
}
func (r directoryResolver) hasBeenIndexed(p string) bool {
filePath := file.Path(p)
if !r.fileTree.HasPath(filePath) {
return false
}
exists, ref, err := r.fileTree.File(filePath)
if err != nil || !exists || ref == nil {
return false
}
// cases like "/" will be in the tree, but not been indexed yet (a special case). We want to capture
// these cases as new paths to index.
_, exists = r.metadata[ref.ID()]
return exists
}
func (r directoryResolver) addDirectoryToIndex(p string, info os.FileInfo) error {
ref, err := r.fileTree.AddDir(file.Path(p))
if err != nil {
return err
}
location := NewLocationFromDirectory(p, *ref)
metadata := fileMetadataFromPath(p, info, r.isInIndex(location))
r.addFileMetadataToIndex(ref, metadata)
return nil
}
func (r directoryResolver) addFileToIndex(p string, info os.FileInfo) error {
ref, err := r.fileTree.AddFile(file.Path(p))
if err != nil {
return err
}
location := NewLocationFromDirectory(p, *ref)
metadata := fileMetadataFromPath(p, info, r.isInIndex(location))
r.addFileMetadataToIndex(ref, metadata)
return nil
}
func (r directoryResolver) addSymlinkToIndex(p string, info os.FileInfo) (string, error) {
var usedInfo = info
linkTarget, err := os.Readlink(p)
if err != nil {
return "", fmt.Errorf("unable to readlink for path=%q: %w", p, err)
}
// note: if the link is not absolute (e.g, /dev/stderr -> fd/2 ) we need to resolve it relative to the directory
// in question (e.g. resolve to /dev/fd/2)
if !filepath.IsAbs(linkTarget) {
linkTarget = filepath.Join(filepath.Dir(p), linkTarget)
}
ref, err := r.fileTree.AddSymLink(file.Path(p), file.Path(linkTarget))
if err != nil {
return "", err
}
targetAbsPath := linkTarget
if !filepath.IsAbs(targetAbsPath) {
targetAbsPath = filepath.Clean(filepath.Join(path.Dir(p), linkTarget))
}
location := NewLocationFromDirectory(p, *ref)
location.VirtualPath = p
metadata := fileMetadataFromPath(p, usedInfo, r.isInIndex(location))
metadata.LinkDestination = linkTarget
r.addFileMetadataToIndex(ref, metadata)
return targetAbsPath, nil
}
func (r directoryResolver) addFileMetadataToIndex(ref *file.Reference, metadata FileMetadata) {
if ref != nil {
if metadata.MIMEType != "" {
r.refsByMIMEType[metadata.MIMEType] = append(r.refsByMIMEType[metadata.MIMEType], *ref)
}
r.metadata[ref.ID()] = metadata
}
}
func (r directoryResolver) requestPath(userPath string) (string, error) {
if filepath.IsAbs(userPath) {
// don't allow input to potentially hop above root path
userPath = path.Join(r.path, userPath)
} else {
// ensure we take into account any relative difference between the root path and the CWD for relative requests
userPath = path.Join(r.currentWdRelativeToRoot, userPath)
}
var err error
userPath, err = filepath.Abs(userPath)
if err != nil {
return "", err
}
return userPath, nil
}
func (r directoryResolver) responsePath(path string) string {
// check to see if we need to encode back to Windows from posix
if runtime.GOOS == WindowsOS {
path = posixToWindows(path)
}
// always return references relative to the request path (not absolute path)
if filepath.IsAbs(path) {
// we need to account for the cwd relative to the running process and the given root for the directory resolver
prefix := filepath.Clean(filepath.Join(r.currentWd, r.currentWdRelativeToRoot))
return strings.TrimPrefix(path, prefix+string(filepath.Separator))
}
return path
}
// HasPath indicates if the given path exists in the underlying source.
func (r *directoryResolver) HasPath(userPath string) bool {
requestPath, err := r.requestPath(userPath)
if err != nil {
return false
}
return r.fileTree.HasPath(file.Path(requestPath))
}
// Stringer to represent a directory path data source
func (r directoryResolver) String() string {
return fmt.Sprintf("dir:%s", r.path)
}
// FilesByPath returns all file.References that match the given paths from the directory.
func (r directoryResolver) FilesByPath(userPaths ...string) ([]Location, error) {
var references = make([]Location, 0)
for _, userPath := range userPaths {
userStrPath, err := r.requestPath(userPath)
if err != nil {
log.Warnf("unable to get file by path=%q : %+v", userPath, err)
continue
}
// we should be resolving symlinks and preserving this information as a VirtualPath to the real file
evaluatedPath, err := filepath.EvalSymlinks(userStrPath)
if err != nil {
log.Debugf("directory resolver unable to evaluate symlink for path=%q : %+v", userPath, err)
continue
}
// TODO: why not use stored metadata?
fileMeta, err := os.Stat(evaluatedPath)
if errors.Is(err, os.ErrNotExist) {
// note: there are other kinds of errors other than os.ErrNotExist that may be given that is platform
// specific, but essentially hints at the same overall problem (that the path does not exist). Such an
// error could be syscall.ENOTDIR (see https://github.com/golang/go/issues/18974).
continue
} else if err != nil {
// we don't want to consider any other syscalls that may hint at non-existence of the file/dir as
// invalid paths. This logging statement is meant to raise IO or permissions related problems.
var pathErr *os.PathError
if !errors.As(err, &pathErr) {
log.Warnf("path is not valid (%s): %+v", evaluatedPath, err)
}
continue
}
// don't consider directories
if fileMeta.IsDir() {
continue
}
if runtime.GOOS == WindowsOS {
userStrPath = windowsToPosix(userStrPath)
}
exists, ref, err := r.fileTree.File(file.Path(userStrPath), filetree.FollowBasenameLinks)
if err == nil && exists {
loc := NewVirtualLocationFromDirectory(
r.responsePath(string(ref.RealPath)), // the actual path relative to the resolver root
r.responsePath(userStrPath), // the path used to access this file, relative to the resolver root
*ref,
)
references = append(references, loc)
}
}
return references, nil
}
// FilesByGlob returns all file.References that match the given path glob pattern from any layer in the image.
func (r directoryResolver) FilesByGlob(patterns ...string) ([]Location, error) {
result := make([]Location, 0)
for _, pattern := range patterns {
globResults, err := r.fileTree.FilesByGlob(pattern, filetree.FollowBasenameLinks)
if err != nil {
return nil, err
}
for _, globResult := range globResults {
loc := NewVirtualLocationFromDirectory(
r.responsePath(string(globResult.Reference.RealPath)), // the actual path relative to the resolver root
r.responsePath(string(globResult.MatchPath)), // the path used to access this file, relative to the resolver root
globResult.Reference,
)
result = append(result, loc)
}
}
return result, nil
}
// RelativeFileByPath fetches a single file at the given path relative to the layer squash of the given reference.
// This is helpful when attempting to find a file that is in the same layer or lower as another file. For the
// directoryResolver, this is a simple path lookup.
func (r *directoryResolver) RelativeFileByPath(_ Location, path string) *Location {
paths, err := r.FilesByPath(path)
if err != nil {
return nil
}
if len(paths) == 0 {
return nil
}
return &paths[0]
}
// FileContentsByLocation fetches file contents for a single file reference relative to a directory.
// If the path does not exist an error is returned.
func (r directoryResolver) FileContentsByLocation(location Location) (io.ReadCloser, error) {
if location.ref.RealPath == "" {
return nil, errors.New("empty path given")
}
if !r.isInIndex(location) {
// this is in cases where paths have been explicitly excluded from the tree index. In which case
// we should DENY all content requests. Why? These paths have been indicated to be inaccessible (either
// by preference or these files are not readable by the current user).
return nil, fmt.Errorf("file content is inaccessible path=%q", location.ref.RealPath)
}
// RealPath is posix so for windows directory resolver we need to translate
// to its true on disk path.
filePath := string(location.ref.RealPath)
if runtime.GOOS == WindowsOS {
filePath = posixToWindows(filePath)
}
return file.NewLazyReadCloser(filePath), nil
}
func (r directoryResolver) isInIndex(location Location) bool {
if location.ref.RealPath == "" {
return false
}
return r.fileTree.HasPath(location.ref.RealPath, filetree.FollowBasenameLinks)
}
func (r *directoryResolver) AllLocations() <-chan Location {
results := make(chan Location)
go func() {
defer close(results)
// this should be all non-directory types
for _, ref := range r.fileTree.AllFiles(file.TypeReg, file.TypeSymlink, file.TypeHardLink, file.TypeBlockDevice, file.TypeCharacterDevice, file.TypeFifo) {
results <- NewLocationFromDirectory(r.responsePath(string(ref.RealPath)), ref)
}
}()
return results
}
func (r *directoryResolver) FileMetadataByLocation(location Location) (FileMetadata, error) {
metadata, exists := r.metadata[location.ref.ID()]
if !exists {
return FileMetadata{}, fmt.Errorf("location: %+v : %w", location, os.ErrNotExist)
}
return metadata, nil
}
func (r *directoryResolver) FilesByMIMEType(types ...string) ([]Location, error) {
var locations []Location
for _, ty := range types {
if refs, ok := r.refsByMIMEType[ty]; ok {
for _, ref := range refs {
locations = append(locations, NewLocationFromDirectory(r.responsePath(string(ref.RealPath)), ref))
}
}
}
return locations, nil
}
func windowsToPosix(windowsPath string) (posixPath string) {
// volume should be encoded at the start (e.g /c/<path>) where c is the volume
volumeName := filepath.VolumeName(windowsPath)
pathWithoutVolume := strings.TrimPrefix(windowsPath, volumeName)
volumeLetter := strings.ToLower(strings.TrimSuffix(volumeName, ":"))
// translate non-escaped backslash to forwardslash
translatedPath := strings.ReplaceAll(pathWithoutVolume, "\\", "/")
// always have `/` as the root... join all components, e.g.:
// convert: C:\\some\windows\Place
// into: /c/some/windows/Place
return path.Clean("/" + strings.Join([]string{volumeLetter, translatedPath}, "/"))
}
func posixToWindows(posixPath string) (windowsPath string) {
// decode the volume (e.g. /c/<path> --> C:\\) - There should always be a volume name.
pathFields := strings.Split(posixPath, "/")
volumeName := strings.ToUpper(pathFields[1]) + `:\\`
// translate non-escaped forward slashes into backslashes
remainingTranslatedPath := strings.Join(pathFields[2:], "\\")
// combine volume name and backslash components
return filepath.Clean(volumeName + remainingTranslatedPath)
}
func isUnixSystemRuntimePath(path string, _ os.FileInfo) bool {
return internal.HasAnyOfPrefixes(path, unixSystemRuntimePrefixes...)
}
func isUnallowableFileType(_ string, info os.FileInfo) bool {
if info == nil {
// we can't filter out by filetype for non-existent files
return false
}
switch newFileTypeFromMode(info.Mode()) {
case CharacterDevice, Socket, BlockDevice, FIFONode, IrregularFile:
return true
// note: symlinks that point to these files may still get by.
// We handle this later in processing to help prevent against infinite links traversal.
}
return false
}
func indexAllRoots(root string, indexer func(string, *progress.Stage) ([]string, error)) error {
// why account for multiple roots? To cover cases when there is a symlink that references above the root path,
// in which case we need to additionally index where the link resolves to. it's for this reason why the filetree
// must be relative to the root of the filesystem (and not just relative to the given path).
pathsToIndex := []string{root}
fullPathsMap := map[string]struct{}{}
stager, prog := indexingProgress(root)
defer prog.SetCompleted()
loop:
for {
var currentPath string
switch len(pathsToIndex) {
case 0:
break loop
case 1:
currentPath, pathsToIndex = pathsToIndex[0], nil
default:
currentPath, pathsToIndex = pathsToIndex[0], pathsToIndex[1:]
}
additionalRoots, err := indexer(currentPath, stager)
if err != nil {
return fmt.Errorf("unable to index filesystem path=%q: %w", currentPath, err)
}
for _, newRoot := range additionalRoots {
if _, ok := fullPathsMap[newRoot]; !ok {
fullPathsMap[newRoot] = struct{}{}
pathsToIndex = append(pathsToIndex, newRoot)
}
}
}
return nil
}
func indexingProgress(path string) (*progress.Stage, *progress.Manual) {
stage := &progress.Stage{}
prog := &progress.Manual{
Total: -1,
}
bus.Publish(partybus.Event{
Type: event.FileIndexingStarted,
Source: path,
Value: struct {
progress.Stager
progress.Progressable
}{
Stager: progress.Stager(stage),
Progressable: prog,
},
})
return stage, prog
}