From 3fc4292b58a67b78e1dbb6e47b4879a6cc602ec4 Mon Sep 17 00:00:00 2001 From: John Barton Date: Mon, 6 Feb 2023 08:47:38 +1100 Subject: [PATCH] Fix bug where internal unquoted whitespace truncates values (#205) * Add tests to cover the regression reported in #204 * Add a comment on regex for clarity * Remove some old code that wasn't doing anything * Push _all_ parse code into the parser and get tests calling live code * Add some newline specific tests * Add some YAML tests for the newline/space split bug * Fix incorrect terminating of lines on whitespace * Fix most of the parser regressions * Bring back FOO.BAR names * remove some commented out code --- fixtures/plain.env | 3 +- godotenv.go | 126 --------------------------------------------- godotenv_test.go | 69 ++++++++++++++++++++++--- parser.go | 80 +++++++++++++++++++++++++--- 4 files changed, 135 insertions(+), 143 deletions(-) diff --git a/fixtures/plain.env b/fixtures/plain.env index 43f7e44..e033366 100644 --- a/fixtures/plain.env +++ b/fixtures/plain.env @@ -4,4 +4,5 @@ OPTION_C= 3 OPTION_D =4 OPTION_E = 5 OPTION_F = -OPTION_G= \ No newline at end of file +OPTION_G= +OPTION_H=1 2 \ No newline at end of file diff --git a/godotenv.go b/godotenv.go index 3d312f3..61b0ebb 100644 --- a/godotenv.go +++ b/godotenv.go @@ -15,12 +15,10 @@ package godotenv import ( "bytes" - "errors" "fmt" "io" "os" "os/exec" - "regexp" "sort" "strconv" "strings" @@ -215,130 +213,6 @@ func readFile(filename string) (envMap map[string]string, err error) { return Parse(file) } -var exportRegex = regexp.MustCompile(`^\s*(?:export\s+)?(.*?)\s*$`) - -func parseLine(line string, envMap map[string]string) (key string, value string, err error) { - if len(line) == 0 { - err = errors.New("zero length string") - return - } - - // ditch the comments (but keep quoted hashes) - if strings.Contains(line, "#") { - segmentsBetweenHashes := strings.Split(line, "#") - quotesAreOpen := false - var segmentsToKeep []string - for _, segment := range segmentsBetweenHashes { - if strings.Count(segment, "\"") == 1 || strings.Count(segment, "'") == 1 { - if quotesAreOpen { - quotesAreOpen = false - segmentsToKeep = append(segmentsToKeep, segment) - } else { - quotesAreOpen = true - } - } - - if len(segmentsToKeep) == 0 || quotesAreOpen { - segmentsToKeep = append(segmentsToKeep, segment) - } - } - - line = strings.Join(segmentsToKeep, "#") - } - - firstEquals := strings.Index(line, "=") - firstColon := strings.Index(line, ":") - splitString := strings.SplitN(line, "=", 2) - if firstColon != -1 && (firstColon < firstEquals || firstEquals == -1) { - //this is a yaml-style line - splitString = strings.SplitN(line, ":", 2) - } - - if len(splitString) != 2 { - err = errors.New("can't separate key from value") - return - } - - // Parse the key - key = splitString[0] - - key = strings.TrimPrefix(key, "export") - - key = strings.TrimSpace(key) - - key = exportRegex.ReplaceAllString(splitString[0], "$1") - - // Parse the value - value = parseValue(splitString[1], envMap) - return -} - -var ( - singleQuotesRegex = regexp.MustCompile(`\A'(.*)'\z`) - doubleQuotesRegex = regexp.MustCompile(`\A"(.*)"\z`) - escapeRegex = regexp.MustCompile(`\\.`) - unescapeCharsRegex = regexp.MustCompile(`\\([^$])`) -) - -func parseValue(value string, envMap map[string]string) string { - - // trim - value = strings.Trim(value, " ") - - // check if we've got quoted values or possible escapes - if len(value) > 1 { - singleQuotes := singleQuotesRegex.FindStringSubmatch(value) - - doubleQuotes := doubleQuotesRegex.FindStringSubmatch(value) - - if singleQuotes != nil || doubleQuotes != nil { - // pull the quotes off the edges - value = value[1 : len(value)-1] - } - - if doubleQuotes != nil { - // expand newlines - value = escapeRegex.ReplaceAllStringFunc(value, func(match string) string { - c := strings.TrimPrefix(match, `\`) - switch c { - case "n": - return "\n" - case "r": - return "\r" - default: - return match - } - }) - // unescape characters - value = unescapeCharsRegex.ReplaceAllString(value, "$1") - } - - if singleQuotes == nil { - value = expandVariables(value, envMap) - } - } - - return value -} - -var expandVarRegex = regexp.MustCompile(`(\\)?(\$)(\()?\{?([A-Z0-9_]+)?\}?`) - -func expandVariables(v string, m map[string]string) string { - return expandVarRegex.ReplaceAllStringFunc(v, func(s string) string { - submatch := expandVarRegex.FindStringSubmatch(s) - - if submatch == nil { - return s - } - if submatch[1] == "\\" || submatch[2] == "(" { - return submatch[0][1:] - } else if submatch[4] != "" { - return m[submatch[4]] - } - return s - }) -} - func doubleQuoteEscape(line string) string { for _, c := range doubleQuoteSpecialChars { toReplace := "\\" + string(c) diff --git a/godotenv_test.go b/godotenv_test.go index 017d6b6..edb5781 100644 --- a/godotenv_test.go +++ b/godotenv_test.go @@ -12,9 +12,14 @@ import ( var noopPresets = make(map[string]string) func parseAndCompare(t *testing.T, rawEnvLine string, expectedKey string, expectedValue string) { - key, value, _ := parseLine(rawEnvLine, noopPresets) - if key != expectedKey || value != expectedValue { - t.Errorf("Expected '%v' to parse as '%v' => '%v', got '%v' => '%v' instead", rawEnvLine, expectedKey, expectedValue, key, value) + result, err := Unmarshal(rawEnvLine) + + if err != nil { + t.Errorf("Expected %q to parse as %q: %q, errored %q", rawEnvLine, expectedKey, expectedValue, err) + return + } + if result[expectedKey] != expectedValue { + t.Errorf("Expected '%v' to parse as '%v' => '%v', got %q instead", rawEnvLine, expectedKey, expectedValue, result) } } @@ -80,6 +85,7 @@ func TestReadPlainEnv(t *testing.T) { "OPTION_E": "5", "OPTION_F": "", "OPTION_G": "", + "OPTION_H": "1 2", } envMap, err := Read(envFileName) @@ -153,6 +159,7 @@ func TestLoadPlainEnv(t *testing.T) { "OPTION_C": "3", "OPTION_D": "4", "OPTION_E": "5", + "OPTION_H": "1 2", } loadEnvAndCompareValues(t, Load, envFileName, expectedValues, noopPresets) @@ -272,7 +279,6 @@ func TestExpanding(t *testing.T) { } }) } - } func TestVariableStringValueSeparator(t *testing.T) { @@ -369,6 +375,9 @@ func TestParsing(t *testing.T) { // expect(env('foo=bar ')).to eql('foo' => 'bar') # not 'bar ' parseAndCompare(t, "FOO=bar ", "FOO", "bar") + // unquoted internal whitespace is preserved + parseAndCompare(t, `KEY=value value`, "KEY", "value value") + // it 'ignores inline comments' do // expect(env("foo=bar # this is foo")).to eql('foo' => 'bar') parseAndCompare(t, "FOO=bar # this is foo", "FOO", "bar") @@ -391,10 +400,8 @@ func TestParsing(t *testing.T) { parseAndCompare(t, `FOO="bar\\r\ b\az"`, "FOO", "bar\\r baz") parseAndCompare(t, `="value"`, "", "value") - parseAndCompare(t, `KEY="`, "KEY", "\"") - parseAndCompare(t, `KEY="value`, "KEY", "\"value") - // leading whitespace should be ignored + // unquoted whitespace around keys should be ignored parseAndCompare(t, " KEY =value", "KEY", "value") parseAndCompare(t, " KEY=value", "KEY", "value") parseAndCompare(t, "\tKEY=value", "KEY", "value") @@ -402,7 +409,7 @@ func TestParsing(t *testing.T) { // it 'throws an error if line format is incorrect' do // expect{env('lol$wut')}.to raise_error(Dotenv::FormatError) badlyFormattedLine := "lol$wut" - _, _, err := parseLine(badlyFormattedLine, noopPresets) + _, err := Unmarshal(badlyFormattedLine) if err == nil { t.Errorf("Expected \"%v\" to return error, but it didn't", badlyFormattedLine) } @@ -520,3 +527,49 @@ func TestRoundtrip(t *testing.T) { } } + +func TestTrailingNewlines(t *testing.T) { + cases := map[string]struct { + input string + key string + value string + }{ + "Simple value without trailing newline": { + input: "KEY=value", + key: "KEY", + value: "value", + }, + "Value with internal whitespace without trailing newline": { + input: "KEY=value value", + key: "KEY", + value: "value value", + }, + "Value with internal whitespace with trailing newline": { + input: "KEY=value value\n", + key: "KEY", + value: "value value", + }, + "YAML style - value with internal whitespace without trailing newline": { + input: "KEY: value value", + key: "KEY", + value: "value value", + }, + "YAML style - value with internal whitespace with trailing newline": { + input: "KEY: value value\n", + key: "KEY", + value: "value value", + }, + } + + for n, c := range cases { + t.Run(n, func(t *testing.T) { + result, err := Unmarshal(c.input) + if err != nil { + t.Errorf("Input: %q Unexpected error:\t%q", c.input, err) + } + if result[c.key] != c.value { + t.Errorf("Input %q Expected:\t %q/%q\nGot:\t %q", c.input, c.key, c.value, result) + } + }) + } +} diff --git a/parser.go b/parser.go index 196418c..cc709af 100644 --- a/parser.go +++ b/parser.go @@ -4,6 +4,7 @@ import ( "bytes" "errors" "fmt" + "regexp" "strings" "unicode" ) @@ -69,7 +70,13 @@ func getStatementStart(src []byte) []byte { // locateKeyName locates and parses key name and returns rest of slice func locateKeyName(src []byte) (key string, cutset []byte, err error) { // trim "export" and space at beginning - src = bytes.TrimLeftFunc(bytes.TrimPrefix(src, []byte(exportPrefix)), isSpace) + src = bytes.TrimLeftFunc(src, isSpace) + if bytes.HasPrefix(src, []byte(exportPrefix)) { + trimmed := bytes.TrimPrefix(src, []byte(exportPrefix)) + if bytes.IndexFunc(trimmed, isSpace) == 0 { + src = bytes.TrimLeftFunc(trimmed, isSpace) + } + } // locate key name end and validate it in single loop offset := 0 @@ -88,8 +95,8 @@ loop: break loop case '_': default: - // variable name should match [A-Za-z0-9_] - if unicode.IsLetter(rchar) || unicode.IsNumber(rchar) { + // variable name should match [A-Za-z0-9_.] + if unicode.IsLetter(rchar) || unicode.IsNumber(rchar) || rchar == '.' { continue } @@ -113,13 +120,41 @@ loop: func extractVarValue(src []byte, vars map[string]string) (value string, rest []byte, err error) { quote, hasPrefix := hasQuotePrefix(src) if !hasPrefix { - // unquoted value - read until whitespace - end := bytes.IndexFunc(src, unicode.IsSpace) - if end == -1 { - return expandVariables(string(src), vars), nil, nil + // unquoted value - read until end of line + endOfLine := bytes.IndexFunc(src, isLineEnd) + + // Hit EOF without a trailing newline + if endOfLine == -1 { + endOfLine = len(src) + + if endOfLine == 0 { + return "", nil, nil + } } - return expandVariables(string(src[0:end]), vars), src[end:], nil + // Convert line to rune away to do accurate countback of runes + line := []rune(string(src[0:endOfLine])) + + // Assume end of line is end of var + endOfVar := len(line) + if endOfVar == 0 { + return "", src[endOfLine:], nil + } + + // Work backwards to check if the line ends in whitespace then + // a comment (ie asdasd # some comment) + for i := endOfVar - 1; i >= 0; i-- { + if line[i] == charComment && i > 0 { + if isSpace(line[i-1]) { + endOfVar = i + break + } + } + } + + trimmed := strings.TrimFunc(string(line[0:endOfVar]), isSpace) + + return expandVariables(trimmed, vars), src[endOfLine:], nil } // lookup quoted string terminator @@ -205,3 +240,32 @@ func isSpace(r rune) bool { } return false } + +func isLineEnd(r rune) bool { + if r == '\n' || r == '\r' { + return true + } + return false +} + +var ( + escapeRegex = regexp.MustCompile(`\\.`) + expandVarRegex = regexp.MustCompile(`(\\)?(\$)(\()?\{?([A-Z0-9_]+)?\}?`) + unescapeCharsRegex = regexp.MustCompile(`\\([^$])`) +) + +func expandVariables(v string, m map[string]string) string { + return expandVarRegex.ReplaceAllStringFunc(v, func(s string) string { + submatch := expandVarRegex.FindStringSubmatch(s) + + if submatch == nil { + return s + } + if submatch[1] == "\\" || submatch[2] == "(" { + return submatch[0][1:] + } else if submatch[4] != "" { + return m[submatch[4]] + } + return s + }) +}