From ef9bf34da0c67ab7958b46eddcf203956db844d2 Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Fri, 30 Apr 2021 14:40:46 -0700 Subject: [PATCH] Avoid diffing by lines if inefficient Avoid diffing by lines if it turns out to be significantly less efficient than diffing by bytes. Before this change: ( """ - d5c14bdf6bac81c27afc5429500ed750 - 25483503b557c606dad4f144d27ae10b - 90bdbcdbb6ea7156068e3dcfb7459244 - 978f480a6e3cced51e297fbff9a506b7 + Xd5c14bdf6bac81c27afc5429500ed750 + X25483503b557c606dad4f144d27ae10b + X90bdbcdbb6ea7156068e3dcfb7459244 + X978f480a6e3cced51e297fbff9a506b7 """ ) After this change: strings.Join({ + "X", "d5c14bdf6bac81c27afc5429500ed750\n", + "X", "25483503b557c606dad4f144d27ae10b\n", + "X", "90bdbcdbb6ea7156068e3dcfb7459244\n", + "X", "978f480a6e3cced51e297fbff9a506b7\n", }, "") --- cmp/compare_test.go | 5 +++++ cmp/report_slices.go | 19 +++++++++++++++++-- cmp/testdata/diffs | 12 ++++++++++++ 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/cmp/compare_test.go b/cmp/compare_test.go index 7eac029..baae591 100644 --- a/cmp/compare_test.go +++ b/cmp/compare_test.go @@ -1307,6 +1307,11 @@ using the AllowUnexported option.`, "\n"), x: "org-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=aa,#=_value _value=2 11\torg-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=bb,#=_value _value=2 21\torg-4747474747474747,bucket-4242424242424242:m,tag1=b,tag2=cc,#=_value _value=1 21\torg-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=dd,#=_value _value=3 31\torg-4747474747474747,bucket-4242424242424242:m,tag1=c,#=_value _value=4 41\t", y: "org-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=aa _value=2 11\torg-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=bb _value=2 21\torg-4747474747474747,bucket-4242424242424242:m,tag1=b,tag2=cc _value=1 21\torg-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=dd _value=3 31\torg-4747474747474747,bucket-4242424242424242:m,tag1=c _value=4 41\t", reason: "leading/trailing equal spans should not appear in diff lines", + }, { + label: label + "/AllLinesDiffer", + x: "d5c14bdf6bac81c27afc5429500ed750\n25483503b557c606dad4f144d27ae10b\n90bdbcdbb6ea7156068e3dcfb7459244\n978f480a6e3cced51e297fbff9a506b7\n", + y: "Xd5c14bdf6bac81c27afc5429500ed750\nX25483503b557c606dad4f144d27ae10b\nX90bdbcdbb6ea7156068e3dcfb7459244\nX978f480a6e3cced51e297fbff9a506b7\n", + reason: "all lines are different, so diffing based on lines is pointless", }} } diff --git a/cmp/report_slices.go b/cmp/report_slices.go index f985cc9..bd8ca15 100644 --- a/cmp/report_slices.go +++ b/cmp/report_slices.go @@ -98,6 +98,7 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode { // Auto-detect the type of the data. var isLinedText, isText, isBinary bool var sx, sy string + var ssx, ssy []string switch { case t.Kind() == reflect.String: sx, sy = vx.String(), vy.String() @@ -130,6 +131,22 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode { } isText = !isBinary isLinedText = isText && numLines >= 4 && maxLineLen <= 1024 + + // Avoid diffing by lines if it produces a significantly more complex + // edit script than diffing by bytes. + if isLinedText { + ssx = strings.Split(sx, "\n") + ssy = strings.Split(sy, "\n") + esLines := diff.Difference(len(ssx), len(ssy), func(ix, iy int) diff.Result { + return diff.BoolResult(ssx[ix] == ssy[iy]) + }) + esBytes := diff.Difference(len(sx), len(sy), func(ix, iy int) diff.Result { + return diff.BoolResult(sx[ix] == sy[iy]) + }) + efficiencyLines := float64(esLines.Dist()) / float64(len(esLines)) + efficiencyBytes := float64(esBytes.Dist()) / float64(len(esBytes)) + isLinedText = efficiencyLines < 4*efficiencyBytes + } } // Format the string into printable records. @@ -139,8 +156,6 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode { // If the text appears to be multi-lined text, // then perform differencing across individual lines. case isLinedText: - ssx := strings.Split(sx, "\n") - ssy := strings.Split(sy, "\n") list = opts.formatDiffSlice( reflect.ValueOf(ssx), reflect.ValueOf(ssy), 1, "line", func(v reflect.Value, d diffMode) textRecord { diff --git a/cmp/testdata/diffs b/cmp/testdata/diffs index 2d9f9ab..c02df29 100644 --- a/cmp/testdata/diffs +++ b/cmp/testdata/diffs @@ -1065,6 +1065,18 @@ ` _value=4 41 `, }, "") >>> TestDiff/Reporter/SurroundingEqualElements +<<< TestDiff/Reporter/AllLinesDiffer + strings.Join({ ++ "X", + "d5c14bdf6bac81c27afc5429500ed750\n", ++ "X", + "25483503b557c606dad4f144d27ae10b\n", ++ "X", + "90bdbcdbb6ea7156068e3dcfb7459244\n", ++ "X", + "978f480a6e3cced51e297fbff9a506b7\n", + }, "") +>>> TestDiff/Reporter/AllLinesDiffer <<< TestDiff/EmbeddedStruct/ParentStructA/Inequal teststructs.ParentStructA{ privateStruct: teststructs.privateStruct{