Skip to content

Commit

Permalink
feat: support reading UTF16 files (#26)
Browse files Browse the repository at this point in the history
To allow this, we need to use a different scanner that ignores the
respective BOM bytes possible to occur. This means \xFFEE and \xFEFF
since UTF16 can both be little endian or big endian.

By default, Windows uses UTF16 little endian.

Fixes #25
  • Loading branch information
Bios-Marcel committed Aug 14, 2023
1 parent 266f68a commit 0a22e48
Show file tree
Hide file tree
Showing 7 changed files with 83 additions and 29 deletions.
Binary file added fixtures/utf16be_bom.env
Binary file not shown.
Binary file added fixtures/utf16le_bom.env
Binary file not shown.
File renamed without changes.
5 changes: 4 additions & 1 deletion go.mod
Expand Up @@ -2,7 +2,10 @@ module github.com/subosito/gotenv

go 1.18

require github.com/stretchr/testify v1.7.5
require (
github.com/stretchr/testify v1.7.5
golang.org/x/text v0.12.0
)

require (
github.com/davecgh/go-spew v1.1.1 // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Expand Up @@ -8,6 +8,8 @@ github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSS
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.5 h1:s5PTfem8p8EbKQOctVV53k6jCJt3UX4IEJzwh+C324Q=
github.com/stretchr/testify v1.7.5/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
golang.org/x/text v0.12.0 h1:k+n5B8goJNdU7hSvEtMUz3d1Q6D/XW4COJSJR6fN0mc=
golang.org/x/text v0.12.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
Expand Down
57 changes: 39 additions & 18 deletions gotenv.go
Expand Up @@ -12,6 +12,9 @@ import (
"sort"
"strconv"
"strings"

"golang.org/x/text/encoding/unicode"
"golang.org/x/text/transform"
)

const (
Expand All @@ -20,11 +23,13 @@ const (

// Pattern for detecting valid variable within a value
variablePattern = `(\\)?(\$)(\{?([A-Z0-9_]+)?\}?)`

// Byte order mark character
bom = "\xef\xbb\xbf"
)

// Byte order mark character
var bomUTF8 = []byte("\xEF\xBB\xBF")
var bomUTF16LE = []byte("\xFF\xFE")
var bomUTF16BE = []byte("\xFE\xFF")

// Env holds key/value pair of valid environment variable
type Env map[string]string

Expand All @@ -48,12 +53,12 @@ func Must(fn func(filenames ...string) error, filenames ...string) {
}

// Apply is a function to load an io Reader then export the valid variables into environment variables if they do not exist.
func Apply(r io.Reader) error {
func Apply(r Reader) error {
return parset(r, false)
}

// OverApply is a function to load an io Reader then export and override the valid variables into environment variables.
func OverApply(r io.Reader) error {
func OverApply(r Reader) error {
return parset(r, true)
}

Expand All @@ -79,7 +84,7 @@ func loadenv(override bool, filenames ...string) error {
}

// parse and set :)
func parset(r io.Reader, override bool) error {
func parset(r Reader, override bool) error {
env, err := strictParse(r, override)
if err != nil {
return err
Expand All @@ -105,15 +110,15 @@ func setenv(key, val string, override bool) {
// Parse is a function to parse line by line any io.Reader supplied and returns the valid Env key/value pair of valid variables.
// It expands the value of a variable from the environment variable but does not set the value to the environment itself.
// This function is skipping any invalid lines and only processing the valid one.
func Parse(r io.Reader) Env {
func Parse(r Reader) Env {
env, _ := strictParse(r, false)
return env
}

// StrictParse is a function to parse line by line any io.Reader supplied and returns the valid Env key/value pair of valid variables.
// It expands the value of a variable from the environment variable but does not set the value to the environment itself.
// This function is returning an error if there are any invalid lines.
func StrictParse(r io.Reader) (Env, error) {
func StrictParse(r Reader) (Env, error) {
return strictParse(r, false)
}

Expand Down Expand Up @@ -201,25 +206,41 @@ func splitLines(data []byte, atEOF bool) (advance int, token []byte, err error)
return eol, data[:idx], nil
}

func strictParse(r io.Reader, override bool) (Env, error) {
type Reader interface {
io.Reader
io.ReaderAt
}

func strictParse(r Reader, override bool) (Env, error) {
env := make(Env)
scanner := bufio.NewScanner(r)
scanner.Split(splitLines)

firstLine := true
// We chooes a different scanner depending on file encoding.
var scanner *bufio.Scanner

// There can be a maximum of 3 BOM bytes.
bomByteBuffer := make([]byte, 3)
if _, err := r.ReadAt(bomByteBuffer, 0); err != nil {
return env, err
}

if bytes.HasPrefix(bomByteBuffer, bomUTF8) {
scanner = bufio.NewScanner(transform.NewReader(r, unicode.UTF8BOM.NewDecoder()))
} else if bytes.HasPrefix(bomByteBuffer, bomUTF16LE) {
scanner = bufio.NewScanner(transform.NewReader(r, unicode.UTF16(unicode.LittleEndian, unicode.ExpectBOM).NewDecoder()))
} else if bytes.HasPrefix(bomByteBuffer, bomUTF16BE) {
scanner = bufio.NewScanner(transform.NewReader(r, unicode.UTF16(unicode.BigEndian, unicode.ExpectBOM).NewDecoder()))
} else {
scanner = bufio.NewScanner(r)
}

scanner.Split(splitLines)

for scanner.Scan() {
if err := scanner.Err(); err != nil {
return env, err
}

line := strings.TrimSpace(scanner.Text())

if firstLine {
line = strings.TrimPrefix(line, bom)
firstLine = false
}

if line == "" || line[0] == '#' {
continue
}
Expand Down
48 changes: 38 additions & 10 deletions gotenv_test.go
Expand Up @@ -3,7 +3,6 @@ package gotenv_test
import (
"bufio"
"errors"
"io"
"os"
"strings"
"testing"
Expand Down Expand Up @@ -243,26 +242,34 @@ func TestStrictParse(t *testing.T) {
}

type failingReader struct {
io.Reader
gotenv.Reader
}

func (fr failingReader) Read(p []byte) (n int, err error) {
return 0, errors.New("you shall not read")
}

func (fr failingReader) ReadAt(p []byte, off int64) (n int, err error) {
return 0, errors.New("you shall not read")
}

func TestStrictParse_PassThroughErrors(t *testing.T) {
_, err := gotenv.StrictParse(&failingReader{})
assert.Error(t, err)
}

type infiniteReader struct {
io.Reader
gotenv.Reader
}

func (er infiniteReader) Read(p []byte) (n int, err error) {
return len(p), nil
}

func (er infiniteReader) ReadAt(p []byte, off int64) (n int, err error) {
return len(p), nil
}

func TestStrictParse_NoTokenPassThroughErrors(t *testing.T) {
_, err := gotenv.StrictParse(&infiniteReader{})
assert.Error(t, err)
Expand Down Expand Up @@ -346,7 +353,7 @@ func TestLoad_nonExist(t *testing.T) {
}

func TestLoad_unicodeBOMFixture(t *testing.T) {
file := "fixtures/bom.env"
file := "fixtures/utf8_bom.env"

f, err := os.Open(file)
assert.Nil(t, err)
Expand All @@ -364,13 +371,34 @@ func TestLoad_unicodeBOMFixture(t *testing.T) {
}
}

func TestLoad_unicodeBOM(t *testing.T) {
file := "fixtures/bom.env"
func TestLoad_BOM_UTF8(t *testing.T) {
defer os.Clearenv()

err := gotenv.Load(file)
assert.Nil(t, err)
assert.Equal(t, "UTF-8", os.Getenv("BOM"))
os.Clearenv()
file := "fixtures/utf8_bom.env"

if err := gotenv.Load(file); assert.Nil(t, err) {
assert.Equal(t, "UTF-8", os.Getenv("BOM"))
}
}

func TestLoad_BOM_UTF16_LE(t *testing.T) {
defer os.Clearenv()

file := "fixtures/utf16le_bom.env"

if err := gotenv.Load(file); assert.Nil(t, err) {
assert.Equal(t, "UTF-16 LE", os.Getenv("BOM"))
}
}

func TestLoad_BOM_UTF16_BE(t *testing.T) {
defer os.Clearenv()

file := "fixtures/utf16be_bom.env"

if err := gotenv.Load(file); assert.Nil(t, err) {
assert.Equal(t, "UTF-16 BE", os.Getenv("BOM"))
}
}

func TestMust_Load(t *testing.T) {
Expand Down

0 comments on commit 0a22e48

Please sign in to comment.