Skip to content

Commit

Permalink
overhaul how we load module info to format Go files
Browse files Browse the repository at this point in the history
We would call "go mod edit -json" for each Go file we formatted,
as each file may be in a different directory,
and thus inside a different module.

However, the first mistake is that we always ran the command in the
directory where gofumpt is run, not the directory containing each Go
file. Our tests weren't strict enough to catch this; now
octal-literal.txt is, via its run of gofmt before it calls cd.

The second mistake, and a pretty embarrassing one, is that since v0.3.0
made gofumpt concurrent, it has been racy in how it writes to globals
like langVersion from multiple goroutines. octal-literals.txt now tests
for this by adding a nested Go module.

Finally, we could also run into open file limits,
because spawning a child process and grabbing its output opens files of
its own such as named pipes.
The added test shows this with a limit of 256 and 10k tiny Go files:

	--- FAIL: TestWithLowOpenFileLimit (0.30s)
		ulimit_unix_test.go:82: writing 10000 tiny Go files
		ulimit_unix_test.go:104: running with 1 paths
		ulimit_unix_test.go:104: running with 10000 paths
		ulimit_unix_test.go:112:
			error:
			  got non-nil error
			comment:
			  stderr:
			  open /tmp/TestWithLowOpenFileLimit2753748366/001/p003/014.go: too many open files
			  open /tmp/TestWithLowOpenFileLimit2753748366/001/p003/017.go: too many open files
			  open /tmp/TestWithLowOpenFileLimit2753748366/001/p004/000.go: too many open files
			  open /tmp/TestWithLowOpenFileLimit2753748366/001/p004/019.go: too many open files

Instead, only call "go mod edit -json" once per directory,
and do it in the main thread to reduce its parallelism.
Also make it grab fdSem as well, for good measure.

This may not be a complete fix, as we're not sure how many files are
open by an exec.Command.Output call. However, we are no longer able to
reproduce a failure, so leave that as a TODO.

Fixes #208.
  • Loading branch information
mvdan committed Mar 18, 2022
1 parent 9eee203 commit 62e7ea0
Show file tree
Hide file tree
Showing 4 changed files with 193 additions and 44 deletions.
70 changes: 54 additions & 16 deletions gofmt.go
Expand Up @@ -23,6 +23,7 @@ import (
"runtime"
"runtime/pprof"
"strings"
"sync"

"golang.org/x/sync/semaphore"
exec "golang.org/x/sys/execabs"
Expand Down Expand Up @@ -287,21 +288,18 @@ func processFile(filename string, info fs.FileInfo, in io.Reader, r *reporter, e
// Apply gofumpt's changes before we print the code in gofumpt's format.

// If either -lang or -modpath aren't set, fetch them from go.mod.
if *langVersion == "" || *modulePath == "" {
out, err := exec.Command("go", "mod", "edit", "-json").Output()
if err == nil && len(out) > 0 {
var mod struct {
Go string
Module struct {
Path string
}
}
_ = json.Unmarshal(out, &mod)
if *langVersion == "" {
*langVersion = mod.Go
lang := *langVersion
modpath := *modulePath
if lang == "" || modpath == "" {
dir := filepath.Dir(filename)
mod, ok := moduleCacheByDir.Load(dir)
if ok && mod != nil {
mod := mod.(*module)
if lang == "" {
lang = mod.Go
}
if *modulePath == "" {
*modulePath = mod.Module.Path
if modpath == "" {
modpath = mod.Module.Path
}
}
}
Expand All @@ -311,8 +309,8 @@ func processFile(filename string, info fs.FileInfo, in io.Reader, r *reporter, e
// We also skip walking vendor directories entirely, but that happens elsewhere.
if explicit || !isGenerated(file) {
gformat.File(fileSet, file, gformat.Options{
LangVersion: *langVersion,
ModulePath: *modulePath,
LangVersion: lang,
ModulePath: modpath,
ExtraRules: *extraRules,
})
}
Expand Down Expand Up @@ -532,7 +530,47 @@ func gofmtMain(s *sequencer) {
}
}

type module struct {
Go string
Module struct {
Path string
}
}

func loadModuleInfo(dir string) interface{} {
cmd := exec.Command("go", "mod", "edit", "-json")
cmd.Dir = dir

// Spawning "go mod edit" will open files by design,
// such as the named pipe to obtain stdout.
// TODO(mvdan): if we run into "too many open files" errors again in the
// future, we probably need to turn fdSem into a weighted semaphore so this
// operation can acquire a weight larger than 1.
fdSem <- true
out, err := cmd.Output()
defer func() { <-fdSem }()

if err != nil || len(out) == 0 {
return nil
}
mod := new(module)
if err := json.Unmarshal(out, mod); err != nil {
return nil
}
return mod
}

// Written to by fileWeight, read from fileWeight and processFile.
// A present but nil value means that loading the module info failed.
// Note that we don't require the keys to be absolute directories,
// so duplicates are possible. The same can happen with symlinks.
var moduleCacheByDir sync.Map // map[dirString]*module

func fileWeight(path string, info fs.FileInfo) int64 {
dir := filepath.Dir(path)
if _, ok := moduleCacheByDir.Load(dir); !ok {
moduleCacheByDir.Store(dir, loadModuleInfo(dir))
}
if info == nil {
return exclusive
}
Expand Down
43 changes: 29 additions & 14 deletions testdata/scripts/octal-literals.txt
@@ -1,31 +1,33 @@
cd module
cp foo.go foo.go.orig

# Initially, the Go language version is too low.
gofumpt foo.go
cmp stdout foo.go.orig
gofumpt -l .
! stdout .

# We can give an explicitly newer version.
gofumpt -lang=1.13 foo.go
cmp stdout foo.go.golden
gofumpt -lang=1.13 -l .
stdout -count=1 'foo\.go'
stdout -count=1 'nested[/\\]nested\.go'

# If we bump the version in go.mod, it should be picked up.
exec go mod edit -go=1.13
gofumpt -l .
stdout -count=1 'foo\.go'
! stdout 'nested'

# Ensure we produce the output we expect, and that it's stable.
gofumpt foo.go
cmp stdout foo.go.golden

gofumpt -d foo.go.golden
! stdout .

# We can give an explicitly older version.
gofumpt -lang=v1 foo.go
cmp stdout foo.go.orig
# We can give an explicitly older version, too
gofumpt -lang=v1 -l .
! stdout .

-- module/go.mod --
-- go.mod --
module test

go 1.12
-- module/foo.go --
-- foo.go --
package p

const (
Expand All @@ -34,7 +36,7 @@ const (
k = 0o_7_5_5
l = 1022
)
-- module/foo.go.golden --
-- foo.go.golden --
package p

const (
Expand All @@ -43,3 +45,16 @@ const (
k = 0o_7_5_5
l = 1022
)
-- nested/go.mod --
module nested

go 1.11
-- nested/nested.go --
package p

const (
i = 0
j = 022
k = 0o_7_5_5
l = 1022
)
30 changes: 16 additions & 14 deletions testdata/scripts/workspaces.txt
@@ -1,22 +1,16 @@
[!go1.18] skip

# Octal literal prefixes were introduced in Go 1.13. If we are outside of the
# module, language version should not be set.
gofumpt a/go112
cmp stdout a/go112
# Whether we run gofumpt from inside or outside a module,
# we should always use the information from its go.mod.
# We also test that we don't get confused by the presence of go.work.

cd a
gofumpt go112
cmp stdout go113

-- a/go112 --
package main
gofumpt a/go112.go
cmp stdout a/go113.go

const x = 0777
-- a/go113 --
package main
cd a
gofumpt go112.go
cmp stdout go113.go

const x = 0o777
-- go.work --
go 1.18
use ./a
Expand All @@ -26,6 +20,14 @@ module a
go 1.18
-- a/a.go --
package a
-- a/go112.go --
package main

const x = 0777
-- a/go113.go --
package main

const x = 0o777
-- b/go.mod --
module b
go 1.18
Expand Down
94 changes: 94 additions & 0 deletions ulimit_unix_test.go
@@ -0,0 +1,94 @@
// Copyright (c) 2019, Daniel Martí <mvdan@mvdan.cc>
// See LICENSE for licensing information

// TODO: replace with the unix build tag once we require Go 1.19 or later
//go:build linux

package main

import (
"bytes"
"fmt"
"os"
"os/exec"
"path/filepath"
"strconv"
"testing"

qt "github.com/frankban/quicktest"
"golang.org/x/sys/unix"
)

func init() {
// Here rather than in TestMain, to reuse the unix build tag.
if limit := os.Getenv("TEST_WITH_FILE_LIMIT"); limit != "" {
n, err := strconv.ParseUint(limit, 10, 64)
if err != nil {
panic(err)
}
rlimit := unix.Rlimit{Cur: n, Max: n}
if err := unix.Setrlimit(unix.RLIMIT_NOFILE, &rlimit); err != nil {
panic(err)
}
os.Exit(main1())
}
}

func TestWithLowOpenFileLimit(t *testing.T) {
// Safe to run in parallel, as we only change the limit for child processes.
t.Parallel()

tempDir := t.TempDir()
testBinary, err := os.Executable()
qt.Assert(t, err, qt.IsNil)

const (
// Enough directories to run into the ulimit.
// Enough number of files in total to run into the ulimit.
numberDirs = 500
numberFilesPerDir = 20
numberFilesTotal = numberDirs * numberFilesPerDir
)
t.Logf("writing %d tiny Go files", numberFilesTotal)
var allGoFiles []string
for i := 0; i < numberDirs; i++ {
// Prefix "p", so the package name is a valid identifier.
// Add one go.mod file per directory as well,
// which will help catch data races when loading module info.
dirName := fmt.Sprintf("p%03d", i)
dirPath := filepath.Join(tempDir, dirName)
err := os.MkdirAll(dirPath, 0o777)
qt.Assert(t, err, qt.IsNil)

err = os.WriteFile(filepath.Join(dirPath, "go.mod"),
[]byte(fmt.Sprintf("module %s\n\ngo 1.16", dirName)), 0o666)
qt.Assert(t, err, qt.IsNil)

for j := 0; j < numberFilesPerDir; j++ {
filePath := filepath.Join(dirPath, fmt.Sprintf("%03d.go", j))
err := os.WriteFile(filePath,
// Extra newlines so that "-l" prints all paths.
[]byte(fmt.Sprintf("package %s\n\n\n", dirName)), 0o666)
qt.Assert(t, err, qt.IsNil)
allGoFiles = append(allGoFiles, filePath)
}
}
if len(allGoFiles) != numberFilesTotal {
panic("allGoFiles doesn't have the expected number of files?")
}
runGofmt := func(paths ...string) {
t.Logf("running with %d paths", len(paths))
cmd := exec.Command(testBinary, append([]string{"-l"}, paths...)...)
// 256 is a relatively common low limit, e.g. on Mac.
cmd.Env = append(os.Environ(), "TEST_WITH_FILE_LIMIT=256")
out, err := cmd.Output()
var stderr []byte
if err, _ := err.(*exec.ExitError); err != nil {
stderr = err.Stderr
}
qt.Assert(t, err, qt.IsNil, qt.Commentf("stderr:\n%s", stderr))
qt.Assert(t, bytes.Count(out, []byte("\n")), qt.Equals, len(allGoFiles))
}
runGofmt(tempDir)
runGofmt(allGoFiles...)
}

0 comments on commit 62e7ea0

Please sign in to comment.