Skip to content

Commit

Permalink
Fix deadlock in Remove (linux/inotify)
Browse files Browse the repository at this point in the history
Several people have reported this issue where if you are using a
single goroutine to watch for fs events and you call Remove in
that goroutine it can deadlock. The cause for this is that the Remove
was made synchronous by PR fsnotify#73. The reason for this was to try and
ensure that maps were no longer leaking.

In this PR: IN_IGNORE was used as the event to ensure map cleanup.
This worked fine when Remove() was called and the next event was
IN_IGNORE, but when a different event was received the main goroutine
that's supposed to be reading from the Events channel would be stuck
waiting for the sync.Cond, which would never be hit because the select
would then block waiting for someone to receive the non-IN_IGNORE event
from the channel so it could proceed to process the IN_IGNORE event that
was waiting in the queue. Deadlock :)

Removing the synchronization then created two nasty races where Remove
followed by Remove would error unnecessarily, and one where Remove
followed by an Add could result in the maps being cleaned up AFTER the
Add call which means the inotify watch is active, but our maps don't
have the values anymore. It then becomes impossible to delete the
watches via the fsnotify code since it checks it's local data before
calling InotifyRemove.

This code attempts to use IN_DELETE_SELF as a means to know when a watch
was deleted as part of an unlink(). That means that we didn't delete the
watch via the fsnotify lib and we should clean up our maps since that
watch no longer exists. This allows us to clean up the maps immediately
when calling Remove since we no longer try to synchronize cleanup
using IN_IGNORE as the sync point.

- Fix fsnotify#195
- Fix fsnotify#123
- Fix fsnotify#115
  • Loading branch information
aarondl authored and markbates committed Mar 29, 2017
1 parent ff7bc41 commit 4da3e2c
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 28 deletions.
37 changes: 19 additions & 18 deletions inotify.go
Expand Up @@ -24,7 +24,6 @@ type Watcher struct {
Events chan Event
Errors chan error
mu sync.Mutex // Map access
cv *sync.Cond // sync removing on rm_watch with IN_IGNORE
fd int
poller *fdPoller
watches map[string]*watch // Map of inotify watches (key: path)
Expand Down Expand Up @@ -56,7 +55,6 @@ func NewWatcher() (*Watcher, error) {
done: make(chan struct{}),
doneResp: make(chan struct{}),
}
w.cv = sync.NewCond(&w.mu)

go w.readEvents()
return w, nil
Expand Down Expand Up @@ -137,6 +135,13 @@ func (w *Watcher) Remove(name string) error {
if !ok {
return fmt.Errorf("can't remove non-existent inotify watch for: %s", name)
}

// We successfully removed the watch if InotifyRmWatch doesn't return an
// error, we need to clean up our internal state to ensure it matches
// inotify's kernel state.
delete(w.paths, int(watch.wd))
delete(w.watches, name)

// inotify_rm_watch will return EINVAL if the file has been deleted;
// the inotify will already have been removed.
// watches and pathes are deleted in ignoreLinux() implicitly and asynchronously
Expand All @@ -154,13 +159,6 @@ func (w *Watcher) Remove(name string) error {
return errno
}

// wait until ignoreLinux() deleting maps
exists := true
for exists {
w.cv.Wait()
_, exists = w.watches[name]
}

return nil
}

Expand Down Expand Up @@ -261,8 +259,17 @@ func (w *Watcher) readEvents() {
// the "Name" field with a valid filename. We retrieve the path of the watch from
// the "paths" map.
w.mu.Lock()
name := w.paths[int(raw.Wd)]
name, ok := w.paths[int(raw.Wd)]
// IN_DELETE_SELF occurs when the file/directory being watched is removed.
// This is a sign to clean up the maps, otherwise we are no longer in sync
// with the inotify kernel state which has already deleted the watch
// automatically.
if ok && mask&unix.IN_DELETE_SELF == unix.IN_DELETE_SELF {
delete(w.paths, int(raw.Wd))
delete(w.watches, name)
}
w.mu.Unlock()

if nameLen > 0 {
// Point "bytes" at the first byte of the filename
bytes := (*[unix.PathMax]byte)(unsafe.Pointer(&buf[offset+unix.SizeofInotifyEvent]))
Expand All @@ -273,7 +280,7 @@ func (w *Watcher) readEvents() {
event := newEvent(name, mask)

// Send the events that are not ignored on the events channel
if !event.ignoreLinux(w, raw.Wd, mask) {
if !event.ignoreLinux(mask) {
select {
case w.Events <- event:
case <-w.done:
Expand All @@ -290,15 +297,9 @@ func (w *Watcher) readEvents() {
// Certain types of events can be "ignored" and not sent over the Events
// channel. Such as events marked ignore by the kernel, or MODIFY events
// against files that do not exist.
func (e *Event) ignoreLinux(w *Watcher, wd int32, mask uint32) bool {
func (e *Event) ignoreLinux(mask uint32) bool {
// Ignore anything the inotify API says to ignore
if mask&unix.IN_IGNORED == unix.IN_IGNORED {
w.mu.Lock()
defer w.mu.Unlock()
name := w.paths[int(wd)]
delete(w.paths, int(wd))
delete(w.watches, name)
w.cv.Broadcast()
return true
}

Expand Down
18 changes: 8 additions & 10 deletions inotify_test.go
Expand Up @@ -293,25 +293,23 @@ func TestInotifyRemoveTwice(t *testing.T) {
t.Fatalf("Failed to add testFile: %v", err)
}

err = os.Remove(testFile)
err = w.Remove(testFile)
if err != nil {
t.Fatalf("Failed to remove testFile: %v", err)
t.Fatalf("wanted successful remove but got:", err)
}

err = w.Remove(testFile)
if err == nil {
t.Fatalf("no error on removing invalid file")
}
s1 := fmt.Sprintf("%s", err)

err = w.Remove(testFile)
if err == nil {
t.Fatalf("no error on removing invalid file")
w.mu.Lock()
defer w.mu.Unlock()
if len(w.watches) != 0 {
t.Fatalf("Expected watches len is 0, but got: %d, %v", len(w.watches), w.watches)
}
s2 := fmt.Sprintf("%s", err)

if s1 != s2 {
t.Fatalf("receive different error - %s / %s", s1, s2)
if len(w.paths) != 0 {
t.Fatalf("Expected paths len is 0, but got: %d, %v", len(w.paths), w.paths)
}
}

Expand Down

0 comments on commit 4da3e2c

Please sign in to comment.