Skip to content

Commit

Permalink
Fix improperly placed chunks
Browse files Browse the repository at this point in the history
Resolve #65

- Also add even more tests for checking `ldiff` results against `diff` results.
- Fix issues with diff/ldiff output highlighted by the above tests.
- Add a parameter to indicate that the hunk being processed is the _last_ hunk;
  this results in correct counting of the hunk size.
- The misplaced chunks were happening because of an improper `.abs` on
  `#diff_size`, when the `.abs` needed to be on the finding of the maximum diff
  size.
  • Loading branch information
halostatue committed Jul 1, 2020
1 parent 99f65fd commit dbb36e6
Show file tree
Hide file tree
Showing 19 changed files with 330 additions and 100 deletions.
2 changes: 1 addition & 1 deletion lib/diff/lcs/block.rb
Expand Up @@ -19,7 +19,7 @@ def initialize(chunk)
end

def diff_size
(@insert.size - @remove.size).abs
@insert.size - @remove.size
end

def op
Expand Down
118 changes: 88 additions & 30 deletions lib/diff/lcs/hunk.rb
Expand Up @@ -20,7 +20,7 @@ def initialize(data_old, data_new, piece, flag_context, file_length_difference)
before = after = file_length_difference
after += @blocks[0].diff_size
@file_length_difference = after # The caller must get this manually
@max_diff_size = @blocks.map { |e| e.diff_size }.max
@max_diff_size = @blocks.map { |e| e.diff_size.abs }.max

# Save the start & end of each array. If the array doesn't exist (e.g.,
# we're only adding items in this block), then figure out the line
Expand Down Expand Up @@ -101,26 +101,26 @@ def overlaps?(hunk)
end

# Returns a diff string based on a format.
def diff(format)
def diff(format, last = false)
case format
when :old
old_diff
old_diff(last)
when :unified
unified_diff
unified_diff(last)
when :context
context_diff
context_diff(last)
when :ed
self
when :reverse_ed, :ed_finish
ed_diff(format)
ed_diff(format, last)
else
fail "Unknown diff format #{format}."
end
end

# Note that an old diff can't have any context. Therefore, we know that
# there's only one block in the hunk.
def old_diff
def old_diff(last = false)
warn 'Expecting only one block in an old diff hunk!' if @blocks.size > 1
op_act = { '+' => 'a', '-' => 'd', '!' => 'c' }

Expand All @@ -129,26 +129,26 @@ def old_diff
# Calculate item number range. Old diff range is just like a context
# diff range, except the ranges are on one line with the action between
# them.
s = encode("#{context_range(:old)}#{op_act[block.op]}#{context_range(:new)}\n")
s = encode("#{context_range(:old, ',')}#{op_act[block.op]}#{context_range(:new, ',')}\n")
# If removing anything, just print out all the remove lines in the hunk
# which is just all the remove lines in the block.
unless block.remove.empty?
@data_old[@start_old..@end_old].each { |e| s << encode('< ') + e + encode("\n") }
@data_old[@start_old..@end_old].each { |e| s << encode('< ') + e.chomp + encode("\n") }
end

s << encode("---\n") if block.op == '!'

unless block.insert.empty?
@data_new[@start_new..@end_new].each { |e| s << encode('> ') + e + encode("\n") }
@data_new[@start_new..@end_new].each { |e| s << encode('> ') + e.chomp + encode("\n") }
end

s
end
private :old_diff

def unified_diff
def unified_diff(last = false)
# Calculate item number range.
s = encode("@@ -#{unified_range(:old)} +#{unified_range(:new)} @@\n")
s = encode("@@ -#{unified_range(:old, last)} +#{unified_range(:new, last)} @@\n")

# Outlist starts containing the hunk of the old file. Removing an item
# just means putting a '-' in front of it. Inserting an item requires
Expand All @@ -161,7 +161,14 @@ def unified_diff
# file -- don't take removed items into account.
lo, hi, num_added, num_removed = @start_old, @end_old, 0, 0

outlist = @data_old[lo..hi].map { |e| e.insert(0, encode(' ')) }
outlist = @data_old[lo..hi].map { |e| String.new("#{encode(' ')}#{e.chomp}") }

last_block = blocks[-1]

if last
old_missing_newline = missing_last_newline?(@data_old)
new_missing_newline = missing_last_newline?(@data_new)
end

@blocks.each do |block|
block.remove.each do |item|
Expand All @@ -170,67 +177,102 @@ def unified_diff
outlist[offset][0, 1] = encode(op)
num_removed += 1
end

if last && block == last_block && old_missing_newline && !new_missing_newline
outlist << encode("\\ No newline at end of file")
num_removed += 1
end

block.insert.each do |item|
op = item.action.to_s # +
offset = item.position - @start_new + num_removed
outlist[offset, 0] = encode(op) + @data_new[item.position]
outlist[offset, 0] = encode(op) + @data_new[item.position].chomp
num_added += 1
end
end

if last && new_missing_newline
outlist << encode("\\ No newline at end of file")
end

s << outlist.join(encode("\n"))

s
end
private :unified_diff

def context_diff
def context_diff(last = false)
s = encode("***************\n")
s << encode("*** #{context_range(:old)} ****\n")
r = context_range(:new)
s << encode("*** #{context_range(:old, ',', true)} ****\n")
r = context_range(:new, ',', true)

if last
old_missing_newline = missing_last_newline?(@data_old)
new_missing_newline = missing_last_newline?(@data_new)
end

# Print out file 1 part for each block in context diff format if there
# are any blocks that remove items
lo, hi = @start_old, @end_old
removes = @blocks.reject { |e| e.remove.empty? }
if removes
outlist = @data_old[lo..hi].map { |e| e.insert(0, encode(' ')) }
if !removes.empty?
outlist = @data_old[lo..hi].map { |e| String.new("#{encode(' ')}#{e.chomp}") }

last_block = removes[-1]

removes.each do |block|
block.remove.each do |item|
outlist[item.position - lo].insert(0, encode(block.op)) # - or !
outlist[item.position - lo][0, 1] = encode(block.op) # - or !
end

if last && block == last_block && old_missing_newline
outlist << encode("\\ No newline at end of file")
end
end

s << outlist.join("\n")
end

s << encode("\n--- #{r} ----\n")
lo, hi = @start_new, @end_new
inserts = @blocks.reject { |e| e.insert.empty? }
if inserts
outlist = @data_new[lo..hi].collect { |e| e.insert(0, encode(' ')) }

if !inserts.empty?
outlist = @data_new[lo..hi].map { |e| String.new("#{encode(' ')}#{e.chomp}") }

last_block = inserts[-1]

inserts.each do |block|
block.insert.each do |item|
outlist[item.position - lo].insert(0, encode(block.op)) # - or !
outlist[item.position - lo][0, 1] = encode(block.op) # + or !
end

if last && block == last_block && new_missing_newline
outlist << encode("\\ No newline at end of file")
end
end
s << outlist.join("\n")
end

s
end
private :context_diff

def ed_diff(format)
def ed_diff(format, last = false)
op_act = { '+' => 'a', '-' => 'd', '!' => 'c' }
warn 'Expecting only one block in an old diff hunk!' if @blocks.size > 1

s =
if format == :reverse_ed
encode("#{op_act[@blocks[0].op]}#{context_range(:old)}\n")
encode("#{op_act[@blocks[0].op]}#{context_range(:old, ',')}\n")
else
encode("#{context_range(:old, ' ')}#{op_act[@blocks[0].op]}\n")
end

unless @blocks[0].insert.empty?
@data_new[@start_new..@end_new].each do |e| s << e + encode("\n") end
@data_new[@start_new..@end_new].each do |e|
s << e.chomp + encode("\n")
end
s << encode(".\n")
end
s
Expand All @@ -239,35 +281,51 @@ def ed_diff(format)

# Generate a range of item numbers to print. Only print 1 number if the
# range has only one item in it. Otherwise, it's 'start,end'
def context_range(mode, op = ',') # rubocop:disable Naming/UncommunicativeMethodParamName
def context_range(mode, op, last = false)
case mode
when :old
s, e = (@start_old + 1), (@end_old + 1)
when :new
s, e = (@start_new + 1), (@end_new + 1)
end

e = e - 1 if last
e = 1 if e.zero?

s < e ? "#{s}#{op}#{e}" : e.to_s
end
private :context_range

# Generate a range of item numbers to print for unified diff. Print number
# where block starts, followed by number of lines in the block
# (don't print number of lines if it's 1)
def unified_range(mode)
def unified_range(mode, last)
case mode
when :old
s, e = (@start_old + 1), (@end_old + 1)
when :new
s, e = (@start_new + 1), (@end_new + 1)
end

length = e - s + 1
length = e - s + (last ? 0 : 1)

first = length < 2 ? e : s # "strange, but correct"
length == 1 ? first.to_s : "#{first},#{length}"
length <= 1 ? first.to_s : "#{first},#{length}"
end
private :unified_range

def missing_last_newline?(data)
newline = encode("\n")

if data[-2]
data[-2].end_with?(newline) && !data[-1].end_with?(newline)
elsif data[-1]
!data[-1].end_with?(newline)
else
true
end
end

if String.method_defined?(:encoding)
def encode(literal, target_encoding = @preferred_data_encoding)
literal.encode target_encoding
Expand Down
33 changes: 14 additions & 19 deletions lib/diff/lcs/ldiff.rb
Expand Up @@ -98,24 +98,19 @@ def run(args, _input = $stdin, output = $stdout, error = $stderr) #:nodoc:
# items we've read from each file will differ by FLD (could be 0).
file_length_difference = 0

if @binary.nil? or @binary
data_old = IO.read(file_old)
data_new = IO.read(file_new)

# Test binary status
if @binary.nil?
old_txt = data_old[0, 4096].scan(/\0/).empty?
new_txt = data_new[0, 4096].scan(/\0/).empty?
@binary = !old_txt or !new_txt
end
data_old = IO.read(file_old)
data_new = IO.read(file_new)

# Test binary status
if @binary.nil?
old_txt = data_old[0, 4096].scan(/\0/).empty?
new_txt = data_new[0, 4096].scan(/\0/).empty?
@binary = !old_txt or !new_txt
end

unless @binary
data_old = data_old.split($/).map { |e| e.chomp }
data_new = data_new.split($/).map { |e| e.chomp }
end
else
data_old = IO.readlines(file_old).map { |e| e.chomp }
data_new = IO.readlines(file_new).map { |e| e.chomp }
unless @binary
data_old = data_old.lines.to_a
data_new = data_new.lines.to_a
end

# diff yields lots of pieces, each of which is basically a Block object
Expand Down Expand Up @@ -163,13 +158,13 @@ def run(args, _input = $stdin, output = $stdout, error = $stderr) #:nodoc:
end
end

last = oldhunk.diff(@format)
last = oldhunk.diff(@format, true)
last << "\n" if last.respond_to?(:end_with?) && !last.end_with?("\n")

output << last

output.reverse_each { |e| real_output << e.diff(:ed_finish) } if @format == :ed

1
return 1
end
end
4 changes: 2 additions & 2 deletions spec/fixtures/ldiff/output.diff-c
@@ -1,5 +1,5 @@
*** spec/fixtures/aX 2019-02-01 22:29:34.000000000 -0500
--- spec/fixtures/bXaX 2019-02-01 22:29:43.000000000 -0500
*** spec/fixtures/aX 2020-06-23 11:15:32.000000000 -0400
--- spec/fixtures/bXaX 2020-06-23 11:15:32.000000000 -0400
***************
*** 1 ****
! aX
Expand Down
4 changes: 2 additions & 2 deletions spec/fixtures/ldiff/output.diff-u
@@ -1,5 +1,5 @@
--- spec/fixtures/aX 2019-02-01 22:29:34.000000000 -0500
+++ spec/fixtures/bXaX 2019-02-01 22:29:43.000000000 -0500
--- spec/fixtures/aX 2020-06-23 11:15:32.000000000 -0400
+++ spec/fixtures/bXaX 2020-06-23 11:15:32.000000000 -0400
@@ -1 +1 @@
-aX
+bXaX
4 changes: 4 additions & 0 deletions spec/fixtures/ldiff/output.diff.chef
@@ -0,0 +1,4 @@
3c3
< "description": "hi"
---
> "description": "lo"
15 changes: 15 additions & 0 deletions spec/fixtures/ldiff/output.diff.chef-c
@@ -0,0 +1,15 @@
*** spec/fixtures/old-chef 2020-06-23 23:18:20.000000000 -0400
--- spec/fixtures/new-chef 2020-06-23 23:18:20.000000000 -0400
***************
*** 1,4 ****
{
"name": "x",
! "description": "hi"
}
\ No newline at end of file
--- 1,4 ----
{
"name": "x",
! "description": "lo"
}
\ No newline at end of file
3 changes: 3 additions & 0 deletions spec/fixtures/ldiff/output.diff.chef-e
@@ -0,0 +1,3 @@
3c
"description": "lo"
.
3 changes: 3 additions & 0 deletions spec/fixtures/ldiff/output.diff.chef-f
@@ -0,0 +1,3 @@
c3
"description": "lo"
.
7 changes: 4 additions & 3 deletions spec/fixtures/ldiff/output.diff.chef-u
@@ -1,8 +1,9 @@
--- spec/fixtures/old-chef 2019-02-01 21:57:15.000000000 -0400
+++ spec/fixtures/new-chef 2019-02-01 21:57:29.000000000 -0400
@@ -1,5 +1,5 @@
--- spec/fixtures/old-chef 2020-06-23 23:18:20.000000000 -0400
+++ spec/fixtures/new-chef 2020-06-23 23:18:20.000000000 -0400
@@ -1,4 +1,4 @@
{
"name": "x",
- "description": "hi"
+ "description": "lo"
}
\ No newline at end of file

0 comments on commit dbb36e6

Please sign in to comment.