diff --git a/fixtures/utf16be_bom.env b/fixtures/utf16be_bom.env new file mode 100644 index 0000000..9d65b84 Binary files /dev/null and b/fixtures/utf16be_bom.env differ diff --git a/fixtures/utf16le_bom.env b/fixtures/utf16le_bom.env new file mode 100644 index 0000000..d55ea15 Binary files /dev/null and b/fixtures/utf16le_bom.env differ diff --git a/fixtures/bom.env b/fixtures/utf8_bom.env similarity index 100% rename from fixtures/bom.env rename to fixtures/utf8_bom.env diff --git a/go.mod b/go.mod index 42fbf18..3b45da7 100644 --- a/go.mod +++ b/go.mod @@ -2,7 +2,10 @@ module github.com/subosito/gotenv go 1.18 -require github.com/stretchr/testify v1.7.5 +require ( + github.com/stretchr/testify v1.7.5 + golang.org/x/text v0.12.0 +) require ( github.com/davecgh/go-spew v1.1.1 // indirect diff --git a/go.sum b/go.sum index f59e5c0..01c94b0 100644 --- a/go.sum +++ b/go.sum @@ -8,6 +8,8 @@ github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSS github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.5 h1:s5PTfem8p8EbKQOctVV53k6jCJt3UX4IEJzwh+C324Q= github.com/stretchr/testify v1.7.5/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +golang.org/x/text v0.12.0 h1:k+n5B8goJNdU7hSvEtMUz3d1Q6D/XW4COJSJR6fN0mc= +golang.org/x/text v0.12.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/gotenv.go b/gotenv.go index eddad3a..7fe03ff 100644 --- a/gotenv.go +++ b/gotenv.go @@ -12,6 +12,9 @@ import ( "sort" "strconv" "strings" + + "golang.org/x/text/encoding/unicode" + "golang.org/x/text/transform" ) const ( @@ -20,11 +23,13 @@ const ( // Pattern for detecting valid variable within a value variablePattern = `(\\)?(\$)(\{?([A-Z0-9_]+)?\}?)` - - // Byte order mark character - bom = "\xef\xbb\xbf" ) +// Byte order mark character +var bomUTF8 = []byte("\xEF\xBB\xBF") +var bomUTF16LE = []byte("\xFF\xFE") +var bomUTF16BE = []byte("\xFE\xFF") + // Env holds key/value pair of valid environment variable type Env map[string]string @@ -48,12 +53,12 @@ func Must(fn func(filenames ...string) error, filenames ...string) { } // Apply is a function to load an io Reader then export the valid variables into environment variables if they do not exist. -func Apply(r io.Reader) error { +func Apply(r Reader) error { return parset(r, false) } // OverApply is a function to load an io Reader then export and override the valid variables into environment variables. -func OverApply(r io.Reader) error { +func OverApply(r Reader) error { return parset(r, true) } @@ -79,7 +84,7 @@ func loadenv(override bool, filenames ...string) error { } // parse and set :) -func parset(r io.Reader, override bool) error { +func parset(r Reader, override bool) error { env, err := strictParse(r, override) if err != nil { return err @@ -105,7 +110,7 @@ func setenv(key, val string, override bool) { // Parse is a function to parse line by line any io.Reader supplied and returns the valid Env key/value pair of valid variables. // It expands the value of a variable from the environment variable but does not set the value to the environment itself. // This function is skipping any invalid lines and only processing the valid one. -func Parse(r io.Reader) Env { +func Parse(r Reader) Env { env, _ := strictParse(r, false) return env } @@ -113,7 +118,7 @@ func Parse(r io.Reader) Env { // StrictParse is a function to parse line by line any io.Reader supplied and returns the valid Env key/value pair of valid variables. // It expands the value of a variable from the environment variable but does not set the value to the environment itself. // This function is returning an error if there are any invalid lines. -func StrictParse(r io.Reader) (Env, error) { +func StrictParse(r Reader) (Env, error) { return strictParse(r, false) } @@ -201,12 +206,34 @@ func splitLines(data []byte, atEOF bool) (advance int, token []byte, err error) return eol, data[:idx], nil } -func strictParse(r io.Reader, override bool) (Env, error) { +type Reader interface { + io.Reader + io.ReaderAt +} + +func strictParse(r Reader, override bool) (Env, error) { env := make(Env) - scanner := bufio.NewScanner(r) - scanner.Split(splitLines) - firstLine := true + // We chooes a different scanner depending on file encoding. + var scanner *bufio.Scanner + + // There can be a maximum of 3 BOM bytes. + bomByteBuffer := make([]byte, 3) + if _, err := r.ReadAt(bomByteBuffer, 0); err != nil { + return env, err + } + + if bytes.HasPrefix(bomByteBuffer, bomUTF8) { + scanner = bufio.NewScanner(transform.NewReader(r, unicode.UTF8BOM.NewDecoder())) + } else if bytes.HasPrefix(bomByteBuffer, bomUTF16LE) { + scanner = bufio.NewScanner(transform.NewReader(r, unicode.UTF16(unicode.LittleEndian, unicode.ExpectBOM).NewDecoder())) + } else if bytes.HasPrefix(bomByteBuffer, bomUTF16BE) { + scanner = bufio.NewScanner(transform.NewReader(r, unicode.UTF16(unicode.BigEndian, unicode.ExpectBOM).NewDecoder())) + } else { + scanner = bufio.NewScanner(r) + } + + scanner.Split(splitLines) for scanner.Scan() { if err := scanner.Err(); err != nil { @@ -214,12 +241,6 @@ func strictParse(r io.Reader, override bool) (Env, error) { } line := strings.TrimSpace(scanner.Text()) - - if firstLine { - line = strings.TrimPrefix(line, bom) - firstLine = false - } - if line == "" || line[0] == '#' { continue } diff --git a/gotenv_test.go b/gotenv_test.go index b566b38..1a4d041 100644 --- a/gotenv_test.go +++ b/gotenv_test.go @@ -3,7 +3,6 @@ package gotenv_test import ( "bufio" "errors" - "io" "os" "strings" "testing" @@ -243,26 +242,34 @@ func TestStrictParse(t *testing.T) { } type failingReader struct { - io.Reader + gotenv.Reader } func (fr failingReader) Read(p []byte) (n int, err error) { return 0, errors.New("you shall not read") } +func (fr failingReader) ReadAt(p []byte, off int64) (n int, err error) { + return 0, errors.New("you shall not read") +} + func TestStrictParse_PassThroughErrors(t *testing.T) { _, err := gotenv.StrictParse(&failingReader{}) assert.Error(t, err) } type infiniteReader struct { - io.Reader + gotenv.Reader } func (er infiniteReader) Read(p []byte) (n int, err error) { return len(p), nil } +func (er infiniteReader) ReadAt(p []byte, off int64) (n int, err error) { + return len(p), nil +} + func TestStrictParse_NoTokenPassThroughErrors(t *testing.T) { _, err := gotenv.StrictParse(&infiniteReader{}) assert.Error(t, err) @@ -346,7 +353,7 @@ func TestLoad_nonExist(t *testing.T) { } func TestLoad_unicodeBOMFixture(t *testing.T) { - file := "fixtures/bom.env" + file := "fixtures/utf8_bom.env" f, err := os.Open(file) assert.Nil(t, err) @@ -364,13 +371,34 @@ func TestLoad_unicodeBOMFixture(t *testing.T) { } } -func TestLoad_unicodeBOM(t *testing.T) { - file := "fixtures/bom.env" +func TestLoad_BOM_UTF8(t *testing.T) { + defer os.Clearenv() - err := gotenv.Load(file) - assert.Nil(t, err) - assert.Equal(t, "UTF-8", os.Getenv("BOM")) - os.Clearenv() + file := "fixtures/utf8_bom.env" + + if err := gotenv.Load(file); assert.Nil(t, err) { + assert.Equal(t, "UTF-8", os.Getenv("BOM")) + } +} + +func TestLoad_BOM_UTF16_LE(t *testing.T) { + defer os.Clearenv() + + file := "fixtures/utf16le_bom.env" + + if err := gotenv.Load(file); assert.Nil(t, err) { + assert.Equal(t, "UTF-16 LE", os.Getenv("BOM")) + } +} + +func TestLoad_BOM_UTF16_BE(t *testing.T) { + defer os.Clearenv() + + file := "fixtures/utf16be_bom.env" + + if err := gotenv.Load(file); assert.Nil(t, err) { + assert.Equal(t, "UTF-16 BE", os.Getenv("BOM")) + } } func TestMust_Load(t *testing.T) {