Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix bug where internal unquoted whitespace truncates values #205

Merged
merged 10 commits into from Feb 5, 2023
3 changes: 2 additions & 1 deletion fixtures/plain.env
Expand Up @@ -4,4 +4,5 @@ OPTION_C= 3
OPTION_D =4
OPTION_E = 5
OPTION_F =
OPTION_G=
OPTION_G=
OPTION_H=1 2
126 changes: 0 additions & 126 deletions godotenv.go
Expand Up @@ -15,12 +15,10 @@ package godotenv

import (
"bytes"
"errors"
"fmt"
"io"
"os"
"os/exec"
"regexp"
"sort"
"strconv"
"strings"
Expand Down Expand Up @@ -215,130 +213,6 @@ func readFile(filename string) (envMap map[string]string, err error) {
return Parse(file)
}

var exportRegex = regexp.MustCompile(`^\s*(?:export\s+)?(.*?)\s*$`)

func parseLine(line string, envMap map[string]string) (key string, value string, err error) {
if len(line) == 0 {
err = errors.New("zero length string")
return
}

// ditch the comments (but keep quoted hashes)
if strings.Contains(line, "#") {
segmentsBetweenHashes := strings.Split(line, "#")
quotesAreOpen := false
var segmentsToKeep []string
for _, segment := range segmentsBetweenHashes {
if strings.Count(segment, "\"") == 1 || strings.Count(segment, "'") == 1 {
if quotesAreOpen {
quotesAreOpen = false
segmentsToKeep = append(segmentsToKeep, segment)
} else {
quotesAreOpen = true
}
}

if len(segmentsToKeep) == 0 || quotesAreOpen {
segmentsToKeep = append(segmentsToKeep, segment)
}
}

line = strings.Join(segmentsToKeep, "#")
}

firstEquals := strings.Index(line, "=")
firstColon := strings.Index(line, ":")
splitString := strings.SplitN(line, "=", 2)
if firstColon != -1 && (firstColon < firstEquals || firstEquals == -1) {
//this is a yaml-style line
splitString = strings.SplitN(line, ":", 2)
}

if len(splitString) != 2 {
err = errors.New("can't separate key from value")
return
}

// Parse the key
key = splitString[0]

key = strings.TrimPrefix(key, "export")

key = strings.TrimSpace(key)

key = exportRegex.ReplaceAllString(splitString[0], "$1")

// Parse the value
value = parseValue(splitString[1], envMap)
return
}

var (
singleQuotesRegex = regexp.MustCompile(`\A'(.*)'\z`)
doubleQuotesRegex = regexp.MustCompile(`\A"(.*)"\z`)
escapeRegex = regexp.MustCompile(`\\.`)
unescapeCharsRegex = regexp.MustCompile(`\\([^$])`)
)

func parseValue(value string, envMap map[string]string) string {

// trim
value = strings.Trim(value, " ")

// check if we've got quoted values or possible escapes
if len(value) > 1 {
singleQuotes := singleQuotesRegex.FindStringSubmatch(value)

doubleQuotes := doubleQuotesRegex.FindStringSubmatch(value)

if singleQuotes != nil || doubleQuotes != nil {
// pull the quotes off the edges
value = value[1 : len(value)-1]
}

if doubleQuotes != nil {
// expand newlines
value = escapeRegex.ReplaceAllStringFunc(value, func(match string) string {
c := strings.TrimPrefix(match, `\`)
switch c {
case "n":
return "\n"
case "r":
return "\r"
default:
return match
}
})
// unescape characters
value = unescapeCharsRegex.ReplaceAllString(value, "$1")
}

if singleQuotes == nil {
value = expandVariables(value, envMap)
}
}

return value
}

var expandVarRegex = regexp.MustCompile(`(\\)?(\$)(\()?\{?([A-Z0-9_]+)?\}?`)

func expandVariables(v string, m map[string]string) string {
return expandVarRegex.ReplaceAllStringFunc(v, func(s string) string {
submatch := expandVarRegex.FindStringSubmatch(s)

if submatch == nil {
return s
}
if submatch[1] == "\\" || submatch[2] == "(" {
return submatch[0][1:]
} else if submatch[4] != "" {
return m[submatch[4]]
}
return s
})
}

func doubleQuoteEscape(line string) string {
for _, c := range doubleQuoteSpecialChars {
toReplace := "\\" + string(c)
Expand Down
69 changes: 61 additions & 8 deletions godotenv_test.go
Expand Up @@ -12,9 +12,14 @@ import (
var noopPresets = make(map[string]string)

func parseAndCompare(t *testing.T, rawEnvLine string, expectedKey string, expectedValue string) {
key, value, _ := parseLine(rawEnvLine, noopPresets)
if key != expectedKey || value != expectedValue {
t.Errorf("Expected '%v' to parse as '%v' => '%v', got '%v' => '%v' instead", rawEnvLine, expectedKey, expectedValue, key, value)
result, err := Unmarshal(rawEnvLine)

if err != nil {
t.Errorf("Expected %q to parse as %q: %q, errored %q", rawEnvLine, expectedKey, expectedValue, err)
return
}
if result[expectedKey] != expectedValue {
t.Errorf("Expected '%v' to parse as '%v' => '%v', got %q instead", rawEnvLine, expectedKey, expectedValue, result)
}
}

Expand Down Expand Up @@ -80,6 +85,7 @@ func TestReadPlainEnv(t *testing.T) {
"OPTION_E": "5",
"OPTION_F": "",
"OPTION_G": "",
"OPTION_H": "1 2",
}

envMap, err := Read(envFileName)
Expand Down Expand Up @@ -153,6 +159,7 @@ func TestLoadPlainEnv(t *testing.T) {
"OPTION_C": "3",
"OPTION_D": "4",
"OPTION_E": "5",
"OPTION_H": "1 2",
}

loadEnvAndCompareValues(t, Load, envFileName, expectedValues, noopPresets)
Expand Down Expand Up @@ -272,7 +279,6 @@ func TestExpanding(t *testing.T) {
}
})
}

}

func TestVariableStringValueSeparator(t *testing.T) {
Expand Down Expand Up @@ -369,6 +375,9 @@ func TestParsing(t *testing.T) {
// expect(env('foo=bar ')).to eql('foo' => 'bar') # not 'bar '
parseAndCompare(t, "FOO=bar ", "FOO", "bar")

// unquoted internal whitespace is preserved
parseAndCompare(t, `KEY=value value`, "KEY", "value value")

// it 'ignores inline comments' do
// expect(env("foo=bar # this is foo")).to eql('foo' => 'bar')
parseAndCompare(t, "FOO=bar # this is foo", "FOO", "bar")
Expand All @@ -391,18 +400,16 @@ func TestParsing(t *testing.T) {
parseAndCompare(t, `FOO="bar\\r\ b\az"`, "FOO", "bar\\r baz")

parseAndCompare(t, `="value"`, "", "value")
parseAndCompare(t, `KEY="`, "KEY", "\"")
parseAndCompare(t, `KEY="value`, "KEY", "\"value")

// leading whitespace should be ignored
// unquoted whitespace around keys should be ignored
parseAndCompare(t, " KEY =value", "KEY", "value")
parseAndCompare(t, " KEY=value", "KEY", "value")
parseAndCompare(t, "\tKEY=value", "KEY", "value")

// it 'throws an error if line format is incorrect' do
// expect{env('lol$wut')}.to raise_error(Dotenv::FormatError)
badlyFormattedLine := "lol$wut"
_, _, err := parseLine(badlyFormattedLine, noopPresets)
_, err := Unmarshal(badlyFormattedLine)
if err == nil {
t.Errorf("Expected \"%v\" to return error, but it didn't", badlyFormattedLine)
}
Expand Down Expand Up @@ -520,3 +527,49 @@ func TestRoundtrip(t *testing.T) {

}
}

func TestTrailingNewlines(t *testing.T) {
cases := map[string]struct {
input string
key string
value string
}{
"Simple value without trailing newline": {
input: "KEY=value",
key: "KEY",
value: "value",
},
"Value with internal whitespace without trailing newline": {
input: "KEY=value value",
key: "KEY",
value: "value value",
},
"Value with internal whitespace with trailing newline": {
input: "KEY=value value\n",
key: "KEY",
value: "value value",
},
"YAML style - value with internal whitespace without trailing newline": {
input: "KEY: value value",
key: "KEY",
value: "value value",
},
"YAML style - value with internal whitespace with trailing newline": {
input: "KEY: value value\n",
key: "KEY",
value: "value value",
},
}

for n, c := range cases {
t.Run(n, func(t *testing.T) {
result, err := Unmarshal(c.input)
if err != nil {
t.Errorf("Input: %q Unexpected error:\t%q", c.input, err)
}
if result[c.key] != c.value {
t.Errorf("Input %q Expected:\t %q/%q\nGot:\t %q", c.input, c.key, c.value, result)
}
})
}
}
80 changes: 72 additions & 8 deletions parser.go
Expand Up @@ -4,6 +4,7 @@ import (
"bytes"
"errors"
"fmt"
"regexp"
"strings"
"unicode"
)
Expand Down Expand Up @@ -69,7 +70,13 @@ func getStatementStart(src []byte) []byte {
// locateKeyName locates and parses key name and returns rest of slice
func locateKeyName(src []byte) (key string, cutset []byte, err error) {
// trim "export" and space at beginning
src = bytes.TrimLeftFunc(bytes.TrimPrefix(src, []byte(exportPrefix)), isSpace)
src = bytes.TrimLeftFunc(src, isSpace)
if bytes.HasPrefix(src, []byte(exportPrefix)) {
trimmed := bytes.TrimPrefix(src, []byte(exportPrefix))
if bytes.IndexFunc(trimmed, isSpace) == 0 {
src = bytes.TrimLeftFunc(trimmed, isSpace)
}
}

// locate key name end and validate it in single loop
offset := 0
Expand All @@ -88,8 +95,8 @@ loop:
break loop
case '_':
default:
// variable name should match [A-Za-z0-9_]
if unicode.IsLetter(rchar) || unicode.IsNumber(rchar) {
// variable name should match [A-Za-z0-9_.]
if unicode.IsLetter(rchar) || unicode.IsNumber(rchar) || rchar == '.' {
continue
}

Expand All @@ -113,13 +120,41 @@ loop:
func extractVarValue(src []byte, vars map[string]string) (value string, rest []byte, err error) {
quote, hasPrefix := hasQuotePrefix(src)
if !hasPrefix {
// unquoted value - read until whitespace
end := bytes.IndexFunc(src, unicode.IsSpace)
if end == -1 {
return expandVariables(string(src), vars), nil, nil
// unquoted value - read until end of line
endOfLine := bytes.IndexFunc(src, isLineEnd)

// Hit EOF without a trailing newline
if endOfLine == -1 {
endOfLine = len(src)

if endOfLine == 0 {
return "", nil, nil
}
}

return expandVariables(string(src[0:end]), vars), src[end:], nil
// Convert line to rune away to do accurate countback of runes
line := []rune(string(src[0:endOfLine]))

// Assume end of line is end of var
endOfVar := len(line)
if endOfVar == 0 {
return "", src[endOfLine:], nil
}

// Work backwards to check if the line ends in whitespace then
// a comment (ie asdasd # some comment)
for i := endOfVar - 1; i >= 0; i-- {
if line[i] == charComment && i > 0 {
if isSpace(line[i-1]) {
endOfVar = i
break
}
}
}

trimmed := strings.TrimFunc(string(line[0:endOfVar]), isSpace)

return expandVariables(trimmed, vars), src[endOfLine:], nil
}

// lookup quoted string terminator
Expand Down Expand Up @@ -205,3 +240,32 @@ func isSpace(r rune) bool {
}
return false
}

func isLineEnd(r rune) bool {
if r == '\n' || r == '\r' {
return true
}
return false
}

var (
escapeRegex = regexp.MustCompile(`\\.`)
expandVarRegex = regexp.MustCompile(`(\\)?(\$)(\()?\{?([A-Z0-9_]+)?\}?`)
unescapeCharsRegex = regexp.MustCompile(`\\([^$])`)
)

func expandVariables(v string, m map[string]string) string {
return expandVarRegex.ReplaceAllStringFunc(v, func(s string) string {
submatch := expandVarRegex.FindStringSubmatch(s)

if submatch == nil {
return s
}
if submatch[1] == "\\" || submatch[2] == "(" {
return submatch[0][1:]
} else if submatch[4] != "" {
return m[submatch[4]]
}
return s
})
}