From 4da3e2cfbabc9f751898f250b49f2439785783a1 Mon Sep 17 00:00:00 2001 From: Aaron L Date: Tue, 28 Mar 2017 21:21:07 -0700 Subject: [PATCH] Fix deadlock in Remove (linux/inotify) Several people have reported this issue where if you are using a single goroutine to watch for fs events and you call Remove in that goroutine it can deadlock. The cause for this is that the Remove was made synchronous by PR #73. The reason for this was to try and ensure that maps were no longer leaking. In this PR: IN_IGNORE was used as the event to ensure map cleanup. This worked fine when Remove() was called and the next event was IN_IGNORE, but when a different event was received the main goroutine that's supposed to be reading from the Events channel would be stuck waiting for the sync.Cond, which would never be hit because the select would then block waiting for someone to receive the non-IN_IGNORE event from the channel so it could proceed to process the IN_IGNORE event that was waiting in the queue. Deadlock :) Removing the synchronization then created two nasty races where Remove followed by Remove would error unnecessarily, and one where Remove followed by an Add could result in the maps being cleaned up AFTER the Add call which means the inotify watch is active, but our maps don't have the values anymore. It then becomes impossible to delete the watches via the fsnotify code since it checks it's local data before calling InotifyRemove. This code attempts to use IN_DELETE_SELF as a means to know when a watch was deleted as part of an unlink(). That means that we didn't delete the watch via the fsnotify lib and we should clean up our maps since that watch no longer exists. This allows us to clean up the maps immediately when calling Remove since we no longer try to synchronize cleanup using IN_IGNORE as the sync point. - Fix #195 - Fix #123 - Fix #115 --- inotify.go | 37 +++++++++++++++++++------------------ inotify_test.go | 18 ++++++++---------- 2 files changed, 27 insertions(+), 28 deletions(-) diff --git a/inotify.go b/inotify.go index 15a48932..d9fd1b88 100644 --- a/inotify.go +++ b/inotify.go @@ -24,7 +24,6 @@ type Watcher struct { Events chan Event Errors chan error mu sync.Mutex // Map access - cv *sync.Cond // sync removing on rm_watch with IN_IGNORE fd int poller *fdPoller watches map[string]*watch // Map of inotify watches (key: path) @@ -56,7 +55,6 @@ func NewWatcher() (*Watcher, error) { done: make(chan struct{}), doneResp: make(chan struct{}), } - w.cv = sync.NewCond(&w.mu) go w.readEvents() return w, nil @@ -137,6 +135,13 @@ func (w *Watcher) Remove(name string) error { if !ok { return fmt.Errorf("can't remove non-existent inotify watch for: %s", name) } + + // We successfully removed the watch if InotifyRmWatch doesn't return an + // error, we need to clean up our internal state to ensure it matches + // inotify's kernel state. + delete(w.paths, int(watch.wd)) + delete(w.watches, name) + // inotify_rm_watch will return EINVAL if the file has been deleted; // the inotify will already have been removed. // watches and pathes are deleted in ignoreLinux() implicitly and asynchronously @@ -154,13 +159,6 @@ func (w *Watcher) Remove(name string) error { return errno } - // wait until ignoreLinux() deleting maps - exists := true - for exists { - w.cv.Wait() - _, exists = w.watches[name] - } - return nil } @@ -261,8 +259,17 @@ func (w *Watcher) readEvents() { // the "Name" field with a valid filename. We retrieve the path of the watch from // the "paths" map. w.mu.Lock() - name := w.paths[int(raw.Wd)] + name, ok := w.paths[int(raw.Wd)] + // IN_DELETE_SELF occurs when the file/directory being watched is removed. + // This is a sign to clean up the maps, otherwise we are no longer in sync + // with the inotify kernel state which has already deleted the watch + // automatically. + if ok && mask&unix.IN_DELETE_SELF == unix.IN_DELETE_SELF { + delete(w.paths, int(raw.Wd)) + delete(w.watches, name) + } w.mu.Unlock() + if nameLen > 0 { // Point "bytes" at the first byte of the filename bytes := (*[unix.PathMax]byte)(unsafe.Pointer(&buf[offset+unix.SizeofInotifyEvent])) @@ -273,7 +280,7 @@ func (w *Watcher) readEvents() { event := newEvent(name, mask) // Send the events that are not ignored on the events channel - if !event.ignoreLinux(w, raw.Wd, mask) { + if !event.ignoreLinux(mask) { select { case w.Events <- event: case <-w.done: @@ -290,15 +297,9 @@ func (w *Watcher) readEvents() { // Certain types of events can be "ignored" and not sent over the Events // channel. Such as events marked ignore by the kernel, or MODIFY events // against files that do not exist. -func (e *Event) ignoreLinux(w *Watcher, wd int32, mask uint32) bool { +func (e *Event) ignoreLinux(mask uint32) bool { // Ignore anything the inotify API says to ignore if mask&unix.IN_IGNORED == unix.IN_IGNORED { - w.mu.Lock() - defer w.mu.Unlock() - name := w.paths[int(wd)] - delete(w.paths, int(wd)) - delete(w.watches, name) - w.cv.Broadcast() return true } diff --git a/inotify_test.go b/inotify_test.go index 6771a1c6..2cc6d93d 100644 --- a/inotify_test.go +++ b/inotify_test.go @@ -293,25 +293,23 @@ func TestInotifyRemoveTwice(t *testing.T) { t.Fatalf("Failed to add testFile: %v", err) } - err = os.Remove(testFile) + err = w.Remove(testFile) if err != nil { - t.Fatalf("Failed to remove testFile: %v", err) + t.Fatalf("wanted successful remove but got:", err) } err = w.Remove(testFile) if err == nil { t.Fatalf("no error on removing invalid file") } - s1 := fmt.Sprintf("%s", err) - err = w.Remove(testFile) - if err == nil { - t.Fatalf("no error on removing invalid file") + w.mu.Lock() + defer w.mu.Unlock() + if len(w.watches) != 0 { + t.Fatalf("Expected watches len is 0, but got: %d, %v", len(w.watches), w.watches) } - s2 := fmt.Sprintf("%s", err) - - if s1 != s2 { - t.Fatalf("receive different error - %s / %s", s1, s2) + if len(w.paths) != 0 { + t.Fatalf("Expected paths len is 0, but got: %d, %v", len(w.paths), w.paths) } }