Skip to content

Commit

Permalink
more consistent line splitting
Browse files Browse the repository at this point in the history
change the scanner to split lines according to expected
behavior, e.g: split lines on LF, CR or CRLF

remove need for extra function to guess that after the facts

Co-authored-by: Luis Davim <luis.davim@gmail.com>
  • Loading branch information
LeGEC and luisdavim committed Jan 11, 2023
1 parent d94fdbe commit 65c8557
Show file tree
Hide file tree
Showing 2 changed files with 110 additions and 18 deletions.
50 changes: 32 additions & 18 deletions gotenv.go
Expand Up @@ -3,6 +3,7 @@ package gotenv

import (
"bufio"
"bytes"
"fmt"
"io"
"os"
Expand Down Expand Up @@ -174,9 +175,38 @@ func Write(env Env, filename string) error {
return file.Sync()
}

func strictParse(r io.Reader, override bool) (Env, error) {
env := make(Env)
// splitLines is a valid SplitFunc for a bufio.Scanner. It will split lines on CR ('\r'), LF ('\n') or CRLF (any of the three sequences).
// If a CR is immediately followed by a LF, it is treated as a CRLF (one single line break).
func splitLines(data []byte, atEOF bool) (advance int, token []byte, err error) {
if atEOF && len(data) == 0 {
return 0, nil, bufio.ErrFinalToken
}

idx := bytes.IndexAny(data, "\r\n")
switch {
case atEOF && idx < 0:
return len(data), data, bufio.ErrFinalToken

case idx < 0:
return 0, nil, nil
}

// consume CR or LF
eol := idx + 1
// detect CRLF
if len(data) > eol && data[eol-1] == '\r' && data[eol] == '\n' {
eol++
}

return eol, data[:idx], nil
}

func strictParse(r io.Reader, env Env, override bool) (Env, error) {
if env == nil {
env = make(Env)
}
scanner := bufio.NewScanner(r)
scanner.Split(splitLines)

firstLine := true

Expand Down Expand Up @@ -283,7 +313,6 @@ func parseLine(s string, env Env, override bool) error {
return varReplacement(s, hsq, env, override)
}
val = varRgx.ReplaceAllStringFunc(val, fv)
val = parseVal(val, env, hdq, override)
}

env[key] = val
Expand Down Expand Up @@ -352,18 +381,3 @@ func checkFormat(s string, env Env) error {

return fmt.Errorf("line `%s` doesn't match format", s)
}

func parseVal(val string, env Env, ignoreNewlines bool, override bool) string {
if strings.Contains(val, "=") && !ignoreNewlines {
kv := strings.Split(val, "\r")

if len(kv) > 1 {
val = kv[0]
for _, l := range kv[1:] {
_ = parseLine(l, env, override)
}
}
}

return val
}
78 changes: 78 additions & 0 deletions scanner_test.go
@@ -0,0 +1,78 @@
package gotenv

import (
"bufio"
"strings"
"testing"

"github.com/stretchr/testify/assert"
)

func TestScanner(t *testing.T) {

type testCase struct {
name string
in string
exp []string
}

testCases := []testCase{
{
"regular LF split with trailing LF",
"aa\nbb\ncc\n",
[]string{"aa", "bb", "cc", ""},
},
{
"regular LF split with no trailing LF",
"aa\nbb\ncc",
[]string{"aa", "bb", "cc"},
},

{
"regular CR split with trailing CR",
"aa\rbb\rcc\r",
[]string{"aa", "bb", "cc", ""},
},
{
"regular CR split with no trailing CR",
"aa\rbb\rcc",
[]string{"aa", "bb", "cc"},
},

{
"regular CRLF split with trailing CRLF",
"aa\r\nbb\r\ncc\r\n",
[]string{"aa", "bb", "cc", ""},
},
{
"regular CRLF split with no trailing CRLF",
"aa\r\nbb\r\ncc",
[]string{"aa", "bb", "cc"},
},

{
"mix of possible line endings",
"aa\r\nbb\ncc\rdd",
[]string{"aa", "bb", "cc", "dd"},
},
}

for _, tc := range testCases {
s := bufio.NewScanner(strings.NewReader(tc.in))
s.Split(splitLines)

i := 0
for s.Scan() {
if i >= len(tc.exp) {
assert.Fail(t, "unexpected line", "testCase: %s - got extra line: %q", tc.name, s.Text())
} else {
got := s.Text()
assert.Equal(t, tc.exp[i], got, "testCase: %s - line %d", tc.name, i)
}
i++
}

assert.NoError(t, s.Err(), "testCase: %s", tc.name)
assert.Equal(t, len(tc.exp), i, "testCase: %s - expected to have the correct line count", tc.name)
}
}

0 comments on commit 65c8557

Please sign in to comment.