From e4ef594946b6b32be31cecd5b8c84be3cad21405 Mon Sep 17 00:00:00 2001 From: Janik Rabe Date: Wed, 17 Apr 2024 20:34:40 +0100 Subject: [PATCH] Fix counting of escape sequences when splitting TXT strings (#1540) `endingToTxtSlice`, used by TXT, SPF and a few other types, parses a string such as `"hello world"` from an RR's content in a zone file. These strings are limited to 255 characters, and `endingToTxtSlice` automatically splits them if they're longer than that. However, it didn't count the length correctly: escape sequences such as `\\` or `\123` were counted as multiple characters (2 and 4 respectively in these examples), but they should only count as one character because they represent a single byte in wire format (which is where this 255 character limit comes from). This commit fixes that. --- parse_test.go | 39 +++++++++++++++++++++++++++------- scan_rr.go | 59 ++++++++++++++++++++++++++++++++++++--------------- scan_test.go | 28 ++++++++++++++++++++++++ 3 files changed, 101 insertions(+), 25 deletions(-) diff --git a/parse_test.go b/parse_test.go index 1b8f5db47..da94cc38e 100644 --- a/parse_test.go +++ b/parse_test.go @@ -1098,18 +1098,41 @@ func TestTXT(t *testing.T) { } } - // Test TXT record with chunk larger than 255 bytes, they should be split up, by the parser - s := "" - for i := 0; i < 255; i++ { - s += "a" + // Test TXT record with string larger than 255 bytes that should be split + // up by the parser. Add some escape sequences too to ensure their length + // is counted correctly. + s := `"\;\\\120` + strings.Repeat("a", 255) + `b"` + rr, err = NewRR(`test.local. 60 IN TXT ` + s) + if err != nil { + t.Error("failed to parse empty-string TXT record", err) + } + if rr.(*TXT).Txt[1] != "aaab" { + t.Errorf("Txt should have two strings, last one must be 'aaab', but is %s", rr.(*TXT).Txt[1]) } - s += "b" - rr, err = NewRR(`test.local. 60 IN TXT "` + s + `"`) + rrContent := strings.Replace(rr.String(), rr.Header().String(), "", 1) + expectedRRContent := `";\\x` + strings.Repeat("a", 252) + `" "aaab"` + if expectedRRContent != rrContent { + t.Errorf("Expected TXT RR content to be %#q but got %#q", expectedRRContent, rrContent) + } + + // Test TXT record that is already split up into strings of len <= 255. + s = fmt.Sprintf( + "%q %q %q %q %q %q", + strings.Repeat(`a`, 255), + strings.Repeat("b", 255), + strings.Repeat("c", 255), + strings.Repeat("d", 0), + strings.Repeat("e", 1), + strings.Repeat("f", 123), + ) + rr, err = NewRR(`test.local. 60 IN TXT ` + s) if err != nil { t.Error("failed to parse empty-string TXT record", err) } - if rr.(*TXT).Txt[1] != "b" { - t.Errorf("Txt should have two chunk, last one my be 'b', but is %s", rr.(*TXT).Txt[1]) + rrContent = strings.Replace(rr.String(), rr.Header().String(), "", 1) + expectedRRContent = s // same as input + if expectedRRContent != rrContent { + t.Errorf("Expected TXT RR content to be %#q but got %#q", expectedRRContent, rrContent) } } diff --git a/scan_rr.go b/scan_rr.go index 1a90c61f8..7d1ade7d8 100644 --- a/scan_rr.go +++ b/scan_rr.go @@ -51,25 +51,21 @@ func endingToTxtSlice(c *zlexer, errstr string) ([]string, *ParseError) { switch l.value { case zString: empty = false - if len(l.token) > 255 { - // split up tokens that are larger than 255 into 255-chunks - sx := []string{} - p, i := 0, 255 - for { - if i <= len(l.token) { - sx = append(sx, l.token[p:i]) - } else { - sx = append(sx, l.token[p:]) - break - - } - p, i = p+255, i+255 + // split up tokens that are larger than 255 into 255-chunks + sx := []string{} + p := 0 + for { + i := escapedStringOffset(l.token[p:], 255) + if i != -1 && p+i != len(l.token) { + sx = append(sx, l.token[p:p+i]) + } else { + sx = append(sx, l.token[p:]) + break + } - s = append(s, sx...) - break + p += i } - - s = append(s, l.token) + s = append(s, sx...) case zBlank: if quote { // zBlank can only be seen in between txt parts. @@ -1920,3 +1916,32 @@ func (rr *APL) parse(c *zlexer, o string) *ParseError { rr.Prefixes = prefixes return nil } + +// escapedStringOffset finds the offset within a string (which may contain escape +// sequences) that corresponds to a certain byte offset. If the input offset is +// out of bounds, -1 is returned. +func escapedStringOffset(s string, byteOffset int) int { + if byteOffset == 0 { + return 0 + } + + offset := 0 + for i := 0; i < len(s); i++ { + offset += 1 + + // Skip escape sequences + if s[i] != '\\' { + // Not an escape sequence; nothing to do. + } else if isDDD(s[i+1:]) { + i += 3 + } else { + i++ + } + + if offset >= byteOffset { + return i + 1 + } + } + + return -1 +} diff --git a/scan_test.go b/scan_test.go index 207748b64..580236ea2 100644 --- a/scan_test.go +++ b/scan_test.go @@ -427,3 +427,31 @@ func BenchmarkZoneParser(b *testing.B) { } } } + +func TestEscapedStringOffset(t *testing.T) { + var cases = []struct { + input string + inputOffset int + expectedOffset int + }{ + {"simple string with no escape sequences", 20, 20}, + {"simple string with no escape sequences", 500, -1}, + {`\;\088\\\;\120\\`, 0, 0}, + {`\;\088\\\;\120\\`, 1, 2}, + {`\;\088\\\;\120\\`, 2, 6}, + {`\;\088\\\;\120\\`, 3, 8}, + {`\;\088\\\;\120\\`, 4, 10}, + {`\;\088\\\;\120\\`, 5, 14}, + {`\;\088\\\;\120\\`, 6, 16}, + {`\;\088\\\;\120\\`, 7, -1}, + } + for i, test := range cases { + outputOffset := escapedStringOffset(test.input, test.inputOffset) + if outputOffset != test.expectedOffset { + t.Errorf( + "Test %d (input %#q offset %d) returned offset %d but expected %d", + i, test.input, test.inputOffset, outputOffset, test.expectedOffset, + ) + } + } +}