Fix improperly placed chunks

Resolve #65 - Also add even more tests for checking `ldiff` results against `diff` results. - Fix issues with diff/ldiff output highlighted by the above tests. - Add a parameter to indicate that the hunk being processed is the _last_ hunk; this results in correct counting of the hunk size. - The misplaced chunks were happening because of an improper `.abs` on `#diff_size`, when the `.abs` needed to be on the finding of the maximum diff size.
halostatue · Jul 1, 2020 · dbb36e6 · dbb36e6
1 parent 99f65fd
commit dbb36e6
Show file tree

Hide file tree

Showing 19 changed files with 330 additions and 100 deletions.
diff --git a/lib/diff/lcs/block.rb b/lib/diff/lcs/block.rb
@@ -19,7 +19,7 @@ def initialize(chunk)
   end
 
   def diff_size
-    (@insert.size - @remove.size).abs
+    @insert.size - @remove.size
   end
 
   def op

diff --git a/lib/diff/lcs/hunk.rb b/lib/diff/lcs/hunk.rb
@@ -20,7 +20,7 @@ def initialize(data_old, data_new, piece, flag_context, file_length_difference)
     before = after = file_length_difference
     after += @blocks[0].diff_size
     @file_length_difference = after # The caller must get this manually
-    @max_diff_size = @blocks.map { |e| e.diff_size }.max
+    @max_diff_size = @blocks.map { |e| e.diff_size.abs }.max
 
     # Save the start & end of each array. If the array doesn't exist (e.g.,
     # we're only adding items in this block), then figure out the line
@@ -101,26 +101,26 @@ def overlaps?(hunk)
   end
 
   # Returns a diff string based on a format.
-  def diff(format)
+  def diff(format, last = false)
     case format
     when :old
-      old_diff
+      old_diff(last)
     when :unified
-      unified_diff
+      unified_diff(last)
     when :context
-      context_diff
+      context_diff(last)
     when :ed
       self
     when :reverse_ed, :ed_finish
-      ed_diff(format)
+      ed_diff(format, last)
     else
       fail "Unknown diff format #{format}."
     end
   end
 
   # Note that an old diff can't have any context. Therefore, we know that
   # there's only one block in the hunk.
-  def old_diff
+  def old_diff(last = false)
     warn 'Expecting only one block in an old diff hunk!' if @blocks.size > 1
     op_act = { '+' => 'a', '-' => 'd', '!' => 'c' }
 
@@ -129,26 +129,26 @@ def old_diff
     # Calculate item number range. Old diff range is just like a context
     # diff range, except the ranges are on one line with the action between
     # them.
-    s = encode("#{context_range(:old)}#{op_act[block.op]}#{context_range(:new)}\n")
+    s = encode("#{context_range(:old, ',')}#{op_act[block.op]}#{context_range(:new, ',')}\n")
     # If removing anything, just print out all the remove lines in the hunk
     # which is just all the remove lines in the block.
     unless block.remove.empty?
-      @data_old[@start_old..@end_old].each { |e| s << encode('< ') + e + encode("\n") }
+      @data_old[@start_old..@end_old].each { |e| s << encode('< ') + e.chomp + encode("\n") }
     end
 
     s << encode("---\n") if block.op == '!'
 
     unless block.insert.empty?
-      @data_new[@start_new..@end_new].each { |e| s << encode('> ') + e + encode("\n") }
+      @data_new[@start_new..@end_new].each { |e| s << encode('> ') + e.chomp + encode("\n") }
     end
 
     s
   end
   private :old_diff
 
-  def unified_diff
+  def unified_diff(last = false)
     # Calculate item number range.
-    s = encode("@@ -#{unified_range(:old)} +#{unified_range(:new)} @@\n")
+    s = encode("@@ -#{unified_range(:old, last)} +#{unified_range(:new, last)} @@\n")
 
     # Outlist starts containing the hunk of the old file. Removing an item
     # just means putting a '-' in front of it. Inserting an item requires
@@ -161,7 +161,14 @@ def unified_diff
     # file -- don't take removed items into account.
     lo, hi, num_added, num_removed = @start_old, @end_old, 0, 0
 
-    outlist = @data_old[lo..hi].map { |e| e.insert(0, encode(' ')) }
+    outlist = @data_old[lo..hi].map { |e| String.new("#{encode(' ')}#{e.chomp}") }
+
+    last_block = blocks[-1]
+
+    if last
+      old_missing_newline = missing_last_newline?(@data_old)
+      new_missing_newline = missing_last_newline?(@data_new)
+    end
 
     @blocks.each do |block|
       block.remove.each do |item|
@@ -170,67 +177,102 @@ def unified_diff
         outlist[offset][0, 1] = encode(op)
         num_removed += 1
       end
+
+      if last && block == last_block && old_missing_newline && !new_missing_newline
+        outlist << encode("\\ No newline at end of file")
+        num_removed += 1
+      end
+
       block.insert.each do |item|
         op     = item.action.to_s # +
         offset = item.position - @start_new + num_removed
-        outlist[offset, 0] = encode(op) + @data_new[item.position]
+        outlist[offset, 0] = encode(op) + @data_new[item.position].chomp
         num_added += 1
       end
     end
 
+    if last && new_missing_newline
+      outlist << encode("\\ No newline at end of file")
+    end
+
     s << outlist.join(encode("\n"))
+
+    s
   end
   private :unified_diff
 
-  def context_diff
+  def context_diff(last = false)
     s = encode("***************\n")
-    s << encode("*** #{context_range(:old)} ****\n")
-    r = context_range(:new)
+    s << encode("*** #{context_range(:old, ',', true)} ****\n")
+    r = context_range(:new, ',', true)
+
+    if last
+      old_missing_newline = missing_last_newline?(@data_old)
+      new_missing_newline = missing_last_newline?(@data_new)
+    end
 
     # Print out file 1 part for each block in context diff format if there
     # are any blocks that remove items
     lo, hi = @start_old, @end_old
     removes = @blocks.reject { |e| e.remove.empty? }
-    if removes
-      outlist = @data_old[lo..hi].map { |e| e.insert(0, encode(' ')) }
+    if !removes.empty?
+      outlist = @data_old[lo..hi].map { |e| String.new("#{encode('  ')}#{e.chomp}") }
+
+      last_block = removes[-1]
 
       removes.each do |block|
         block.remove.each do |item|
-          outlist[item.position - lo].insert(0, encode(block.op)) # - or !
+          outlist[item.position - lo][0, 1] = encode(block.op) # - or !
+        end
+
+        if last && block == last_block && old_missing_newline
+          outlist << encode("\\ No newline at end of file")
         end
       end
+
       s << outlist.join("\n")
     end
 
     s << encode("\n--- #{r} ----\n")
     lo, hi = @start_new, @end_new
     inserts = @blocks.reject { |e| e.insert.empty? }
-    if inserts
-      outlist = @data_new[lo..hi].collect { |e| e.insert(0, encode(' ')) }
+
+    if !inserts.empty?
+      outlist = @data_new[lo..hi].map { |e| String.new("#{encode('  ')}#{e.chomp}") }
+
+      last_block = inserts[-1]
+
       inserts.each do |block|
         block.insert.each do |item|
-          outlist[item.position - lo].insert(0, encode(block.op)) # - or !
+          outlist[item.position - lo][0, 1] = encode(block.op) # + or !
+        end
+
+        if last && block == last_block && new_missing_newline
+          outlist << encode("\\ No newline at end of file")
         end
       end
       s << outlist.join("\n")
     end
+
     s
   end
   private :context_diff
 
-  def ed_diff(format)
+  def ed_diff(format, last = false)
     op_act = { '+' => 'a', '-' => 'd', '!' => 'c' }
     warn 'Expecting only one block in an old diff hunk!' if @blocks.size > 1
 
     s =
       if format == :reverse_ed
-        encode("#{op_act[@blocks[0].op]}#{context_range(:old)}\n")
+        encode("#{op_act[@blocks[0].op]}#{context_range(:old, ',')}\n")
       else
         encode("#{context_range(:old, ' ')}#{op_act[@blocks[0].op]}\n")
       end
 
     unless @blocks[0].insert.empty?
-      @data_new[@start_new..@end_new].each do |e| s << e + encode("\n") end
+      @data_new[@start_new..@end_new].each do |e|
+        s << e.chomp + encode("\n")
+      end
       s << encode(".\n")
     end
     s
@@ -239,35 +281,51 @@ def ed_diff(format)
 
   # Generate a range of item numbers to print. Only print 1 number if the
   # range has only one item in it. Otherwise, it's 'start,end'
-  def context_range(mode, op = ',') # rubocop:disable Naming/UncommunicativeMethodParamName
+  def context_range(mode, op, last = false)
     case mode
     when :old
       s, e = (@start_old + 1), (@end_old + 1)
     when :new
       s, e = (@start_new + 1), (@end_new + 1)
     end
 
+    e = e - 1 if last
+    e = 1 if e.zero?
+
     s < e ? "#{s}#{op}#{e}" : e.to_s
   end
   private :context_range
 
   # Generate a range of item numbers to print for unified diff. Print number
   # where block starts, followed by number of lines in the block
   # (don't print number of lines if it's 1)
-  def unified_range(mode)
+  def unified_range(mode, last)
     case mode
     when :old
       s, e = (@start_old + 1), (@end_old + 1)
     when :new
       s, e = (@start_new + 1), (@end_new + 1)
     end
 
-    length = e - s + 1
+    length = e - s + (last ? 0 : 1)
+
     first = length < 2 ? e : s # "strange, but correct"
-    length == 1 ? first.to_s : "#{first},#{length}"
+    length <= 1 ? first.to_s : "#{first},#{length}"
   end
   private :unified_range
 
+  def missing_last_newline?(data)
+    newline = encode("\n")
+
+    if data[-2]
+      data[-2].end_with?(newline) && !data[-1].end_with?(newline)
+    elsif data[-1]
+      !data[-1].end_with?(newline)
+    else
+      true
+    end
+  end
+
   if String.method_defined?(:encoding)
     def encode(literal, target_encoding = @preferred_data_encoding)
       literal.encode target_encoding

diff --git a/lib/diff/lcs/ldiff.rb b/lib/diff/lcs/ldiff.rb
@@ -98,24 +98,19 @@ def run(args, _input = $stdin, output = $stdout, error = $stderr) #:nodoc:
     # items we've read from each file will differ by FLD (could be 0).
     file_length_difference = 0
 
-    if @binary.nil? or @binary
-      data_old = IO.read(file_old)
-      data_new = IO.read(file_new)
-
-      # Test binary status
-      if @binary.nil?
-        old_txt = data_old[0, 4096].scan(/\0/).empty?
-        new_txt = data_new[0, 4096].scan(/\0/).empty?
-        @binary = !old_txt or !new_txt
-      end
+    data_old = IO.read(file_old)
+    data_new = IO.read(file_new)
+
+    # Test binary status
+    if @binary.nil?
+      old_txt = data_old[0, 4096].scan(/\0/).empty?
+      new_txt = data_new[0, 4096].scan(/\0/).empty?
+      @binary = !old_txt or !new_txt
+    end
 
-      unless @binary
-        data_old = data_old.split($/).map { |e| e.chomp }
-        data_new = data_new.split($/).map { |e| e.chomp }
-      end
-    else
-      data_old = IO.readlines(file_old).map { |e| e.chomp }
-      data_new = IO.readlines(file_new).map { |e| e.chomp }
+    unless @binary
+      data_old = data_old.lines.to_a
+      data_new = data_new.lines.to_a
     end
 
     # diff yields lots of pieces, each of which is basically a Block object
@@ -163,13 +158,13 @@ def run(args, _input = $stdin, output = $stdout, error = $stderr) #:nodoc:
       end
     end
 
-    last = oldhunk.diff(@format)
+    last = oldhunk.diff(@format, true)
     last << "\n" if last.respond_to?(:end_with?) && !last.end_with?("\n")
 
     output << last
 
     output.reverse_each { |e| real_output << e.diff(:ed_finish) } if @format == :ed
 
-    1
+    return 1
   end
 end
diff --git a/spec/fixtures/ldiff/output.diff-c b/spec/fixtures/ldiff/output.diff-c
@@ -1,5 +1,5 @@
-*** spec/fixtures/aX	2019-02-01 22:29:34.000000000 -0500
---- spec/fixtures/bXaX	2019-02-01 22:29:43.000000000 -0500
+*** spec/fixtures/aX	2020-06-23 11:15:32.000000000 -0400
+--- spec/fixtures/bXaX	2020-06-23 11:15:32.000000000 -0400
 ***************
 *** 1 ****
 ! aX

diff --git a/spec/fixtures/ldiff/output.diff-u b/spec/fixtures/ldiff/output.diff-u
@@ -1,5 +1,5 @@
---- spec/fixtures/aX	2019-02-01 22:29:34.000000000 -0500
-+++ spec/fixtures/bXaX	2019-02-01 22:29:43.000000000 -0500
+--- spec/fixtures/aX	2020-06-23 11:15:32.000000000 -0400
++++ spec/fixtures/bXaX	2020-06-23 11:15:32.000000000 -0400
 @@ -1 +1 @@
 -aX
 +bXaX
diff --git a/spec/fixtures/ldiff/output.diff.chef b/spec/fixtures/ldiff/output.diff.chef
@@ -0,0 +1,4 @@
+3c3
+<   "description": "hi"
+---
+>   "description": "lo"
diff --git a/spec/fixtures/ldiff/output.diff.chef-c b/spec/fixtures/ldiff/output.diff.chef-c
@@ -0,0 +1,15 @@
+*** spec/fixtures/old-chef	2020-06-23 23:18:20.000000000 -0400
+--- spec/fixtures/new-chef	2020-06-23 23:18:20.000000000 -0400
+***************
+*** 1,4 ****
+  {
+    "name": "x",
+!   "description": "hi"
+  }
+\ No newline at end of file
+--- 1,4 ----
+  {
+    "name": "x",
+!   "description": "lo"
+  }
+\ No newline at end of file
diff --git a/spec/fixtures/ldiff/output.diff.chef-e b/spec/fixtures/ldiff/output.diff.chef-e
@@ -0,0 +1,3 @@
+3c
+  "description": "lo"
+.
diff --git a/spec/fixtures/ldiff/output.diff.chef-f b/spec/fixtures/ldiff/output.diff.chef-f
@@ -0,0 +1,3 @@
+c3
+  "description": "lo"
+.
diff --git a/spec/fixtures/ldiff/output.diff.chef-u b/spec/fixtures/ldiff/output.diff.chef-u
@@ -1,8 +1,9 @@
---- spec/fixtures/old-chef	2019-02-01 21:57:15.000000000 -0400
-+++ spec/fixtures/new-chef	2019-02-01 21:57:29.000000000 -0400
-@@ -1,5 +1,5 @@
+--- spec/fixtures/old-chef	2020-06-23 23:18:20.000000000 -0400
++++ spec/fixtures/new-chef	2020-06-23 23:18:20.000000000 -0400
+@@ -1,4 +1,4 @@
  {
    "name": "x",
 -  "description": "hi"
 +  "description": "lo"
  }
+\ No newline at end of file