Skip to content

Commit

Permalink
Make whitespace (' ', \t, \r, \n) always visible for "changed" lines
Browse files Browse the repository at this point in the history
context lines, added-only lines, and removed-only lines are shown as usual in the diffs.

fixes diffplug#465
  • Loading branch information
vlsi committed Nov 10, 2019
1 parent 04d5c7d commit 914d90d
Show file tree
Hide file tree
Showing 3 changed files with 439 additions and 191 deletions.
Expand Up @@ -18,19 +18,18 @@
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.List;
import java.util.ListIterator;
import java.util.Objects;

import org.eclipse.jgit.diff.DiffFormatter;
import org.eclipse.jgit.diff.EditList;
import org.eclipse.jgit.diff.MyersDiff;
import org.eclipse.jgit.diff.HistogramDiff;
import org.eclipse.jgit.diff.RawText;
import org.eclipse.jgit.diff.RawTextComparator;

import com.diffplug.common.base.CharMatcher;
import com.diffplug.common.base.Errors;
import com.diffplug.common.base.Preconditions;
import com.diffplug.common.base.Splitter;
Expand Down Expand Up @@ -168,7 +167,9 @@ private void addIntendedLine(String indent, String line) {
* sequence (\n, \r, \r\n).
*/
private static String diff(Builder builder, File file) throws IOException {
String raw = new String(Files.readAllBytes(file.toPath()), builder.formatter.getEncoding());
byte[] rawBytes = Files.readAllBytes(file.toPath());
Charset encoding = builder.formatter.getEncoding();
String raw = new String(rawBytes, encoding);
String rawUnix = LineEnding.toUnix(raw);
String formattedUnix;
if (builder.isPaddedCell) {
Expand All @@ -177,61 +178,29 @@ private static String diff(Builder builder, File file) throws IOException {
formattedUnix = builder.formatter.compute(rawUnix, file);
}

if (rawUnix.equals(formattedUnix)) {
// the formatting is fine, so it's a line-ending issue
String formatted = builder.formatter.computeLineEndings(formattedUnix, file);
return diffWhitespaceLineEndings(raw, formatted, false, true);
} else {
return diffWhitespaceLineEndings(rawUnix, formattedUnix, true, false);
String formatted = builder.formatter.computeLineEndings(formattedUnix, file);
// Assume the conversion to UTF-8 is always lossless
// UTF-8 simplifies the implementation of WriteSpaceAwareDiffFormatter, so it could easily
// match for ' ', '\n' and other characters at the byte level.
// jgit diff algorithms are implemented for byte[], so we have to use bytes as well
if (!encoding.equals(StandardCharsets.UTF_8)) {
rawBytes = raw.getBytes(StandardCharsets.UTF_8);
}
byte[] formattedBytes = formatted.getBytes(StandardCharsets.UTF_8);
return visualizeDiff(rawBytes, formattedBytes);
}

/**
* Returns a git-style diff between the two unix strings.
*
* Output has no trailing newlines.
*
* Boolean args determine whether whitespace or line endings will be visible.
*/
private static String diffWhitespaceLineEndings(String dirty, String clean, boolean whitespace, boolean lineEndings) throws IOException {
dirty = visibleWhitespaceLineEndings(dirty, whitespace, lineEndings);
clean = visibleWhitespaceLineEndings(clean, whitespace, lineEndings);

RawText a = new RawText(dirty.getBytes(StandardCharsets.UTF_8));
RawText b = new RawText(clean.getBytes(StandardCharsets.UTF_8));
EditList edits = new EditList();
edits.addAll(MyersDiff.INSTANCE.diff(RawTextComparator.DEFAULT, a, b));

private static String visualizeDiff(byte[] rawBytes, byte[] formattedBytes) throws IOException {
RawText a = new RawText(rawBytes);
RawText b = new RawText(formattedBytes);
EditList edits = new HistogramDiff().diff(RawTextComparator.DEFAULT, a, b);
ByteArrayOutputStream out = new ByteArrayOutputStream();
try (DiffFormatter formatter = new DiffFormatter(out)) {
formatter.format(edits, a, b);
}
String formatted = out.toString(StandardCharsets.UTF_8.name());

// we don't need the diff to show this, since we display newlines ourselves
formatted = formatted.replace("\\ No newline at end of file\n", "");
return NEWLINE_MATCHER.trimTrailingFrom(formatted);
}

private static final CharMatcher NEWLINE_MATCHER = CharMatcher.is('\n');

/**
* Makes the whitespace and/or the lineEndings visible.
*
* MyersDiff wants inputs with only unix line endings. So this ensures that that is the case.
*/
private static String visibleWhitespaceLineEndings(String input, boolean whitespace, boolean lineEndings) {
if (whitespace) {
input = input.replace(' ', MIDDLE_DOT).replace("\t", "\\t");
}
if (lineEndings) {
input = input.replace("\n", "\\n\n").replace("\r", "\\r");
} else {
// we want only \n, so if we didn't replace them above, we'll replace them here.
input = input.replace("\r", "");
}
return input;
// defaultCharset is here so the formatter could select "fancy" or "simple"
// characters for whitespace visualization based on the capabilities of the console
// For instance, if the app is running with file.encoding=ISO-8859-1, then
// the console can't encode fancy whitespace characters, and the formatter would
// resort to simple \r, \n, and so on
new WriteSpaceAwareDiffFormatter(out, Charset.defaultCharset()).format(edits, a, b);
return new String(out.toByteArray(), StandardCharsets.UTF_8);
}

private static final char MIDDLE_DOT = '\u00b7';
}
@@ -0,0 +1,206 @@
/*
* Copyright 2016 DiffPlug
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.diffplug.spotless.extra.integration;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.StandardCharsets;

import org.eclipse.jgit.diff.Edit;
import org.eclipse.jgit.diff.EditList;
import org.eclipse.jgit.diff.RawText;
import org.eclipse.jgit.util.IntList;
import org.eclipse.jgit.util.RawParseUtils;

/**
* Formats the diff in Git-like style, however it makes whitespace visible for
* edit-like diffs (when one fragment is replaced with another).
*/
class WriteSpaceAwareDiffFormatter {
private static final int CONTEXT_LINES = 3;
private static final String MIDDLE_DOT = "\u00b7";
private static final String CR = "\u240d";
private static final String LF = "\u240a";
private static final String TAB = "\u21e5";
private static final byte[] MIDDLE_DOT_UTF8 = MIDDLE_DOT.getBytes(StandardCharsets.UTF_8);
private static final byte[] CR_UTF8 = CR.getBytes(StandardCharsets.UTF_8);
private static final byte[] LF_UTF8 = LF.getBytes(StandardCharsets.UTF_8);
private static final byte[] TAB_UTF8 = TAB.getBytes(StandardCharsets.UTF_8);
private static final byte[] SPACE_SIMPLE = new byte[]{' '};
private static final byte[] CR_SIMPLE = new byte[]{'\\', 'r'};
private static final byte[] LF_SIMPLE = new byte[]{'\\', 'n'};
private static final byte[] TAB_SIMPLE = new byte[]{'\\', 't'};

private final ByteArrayOutputStream out;
private final byte[] middleDot;
private final byte[] cr;
private final byte[] lf;
private final byte[] tab;

/**
* Creates the formatter.
* @param out output stream for the resulting diff. The diff would have \n line endings
* @param charset the charset that will be used when printing the results for the end user
*/
public WriteSpaceAwareDiffFormatter(ByteArrayOutputStream out, Charset charset) {
this.out = out;
CharsetEncoder charsetEncoder = charset.newEncoder();
this.middleDot = replacementFor(charsetEncoder, MIDDLE_DOT, MIDDLE_DOT_UTF8, SPACE_SIMPLE);
this.cr = replacementFor(charsetEncoder, CR, CR_UTF8, CR_SIMPLE);
this.lf = replacementFor(charsetEncoder, LF, LF_UTF8, LF_SIMPLE);
this.tab = replacementFor(charsetEncoder, TAB, TAB_UTF8, TAB_SIMPLE);
}

private static byte[] replacementFor(CharsetEncoder charsetEncoder, String value, byte[] fancy, byte[] simple) {
return charsetEncoder.canEncode(value) ? fancy : simple;
}

/**
* Formats the diff.
* @param edits the list of edits to format
* @param a input text a, with \n line endings, with UTF-8 encoding
* @param b input text b, with \n line endings, with UTF-8 encoding
* @throws IOException if formatting fails
*/
public void format(EditList edits, RawText a, RawText b) throws IOException {
IntList linesA = RawParseUtils.lineMap(a.getRawContent(), 0, a.getRawContent().length);
IntList linesB = RawParseUtils.lineMap(b.getRawContent(), 0, b.getRawContent().length);
boolean firstLine = true;
for (int i = 0; i < edits.size(); i++) {
Edit edit = edits.get(i);
int lineA = Math.max(0, edit.getBeginA() - CONTEXT_LINES);
int lineB = Math.max(0, edit.getBeginB() - CONTEXT_LINES);

final int endIdx = findCombinedEnd(edits, i);
final Edit endEdit = edits.get(endIdx);

int endA = Math.min(a.size(), endEdit.getEndA() + CONTEXT_LINES);
int endB = Math.min(b.size(), endEdit.getEndB() + CONTEXT_LINES);

if (firstLine) {
firstLine = false;
} else {
out.write('\n');
}
header(lineA, endA, lineB, endB);

boolean showWhitespace = edit.getType() == Edit.Type.REPLACE;

while (lineA < endA || lineB < endB) {
if (lineA < edit.getBeginA()) {
// Common part before the diff
line(' ', a, lineA, linesA, false);
lineA++;
lineB++;
} else if (lineA < edit.getEndA()) {
line('-', a, lineA, linesA, showWhitespace);
lineA++;
} else if (lineB < edit.getEndB()) {
line('+', b, lineB, linesB, showWhitespace);
lineB++;
} else {
// Common part after the diff
line(' ', a, lineA, linesA, false);
lineA++;
lineB++;
}

if (lineA == edit.getEndA() && lineB == edit.getEndB() && i < endIdx) {
i++;
edit = edits.get(i);
showWhitespace = edit.getType() == Edit.Type.REPLACE;
}
}
}
}

/**
* There might be multiple adjacent diffs, so we need to figure out the latest one in the group.
* @param edits list of edits
* @param i starting edit
* @return the index of the latest edit in the group
*/
private int findCombinedEnd(EditList edits, int i) {
for (; i < edits.size() - 1; i++) {
Edit current = edits.get(i);
Edit next = edits.get(i + 1);
if (current.getEndA() - next.getBeginA() > 2 * CONTEXT_LINES &&
current.getEndB() - next.getBeginB() > 2 * CONTEXT_LINES) {
break;
}
}
return i;
}

private void header(int lineA, int endA, int lineB, int endB) {
out.write('@');
out.write('@');
range('-', lineA + 1, endA - lineA);
range('+', lineB + 1, endB - lineB);
out.write(' ');
out.write('@');
out.write('@');
}

private void range(char prefix, int begin, int length) {
out.write(' ');
out.write(prefix);
if (length == 0) {
writeInt(begin - 1);
out.write(',');
out.write('0');
} else {
writeInt(begin);
if (length > 1) {
out.write(',');
writeInt(length);
}
}
}

private void writeInt(int num) {
String str = Integer.toString(num);
for (int i = 0, len = str.length(); i < len; i++) {
out.write(str.charAt(i));
}
}

private void line(char prefix, RawText a, int lineA, IntList lines, boolean showWhitespace) throws IOException {
out.write('\n');
out.write(prefix);
if (!showWhitespace) {
a.writeLine(out, lineA);
return;
}
byte[] bytes = a.getRawContent();
for (int i = lines.get(lineA + 1), end = lines.get(lineA + 2); i < end; i++) {
byte b = bytes[i];
if (b == ' ') {
out.write(middleDot);
} else if (b == '\t') {
out.write(tab);
} else if (b == '\r') {
out.write(cr);
} else if (b == '\n') {
out.write(lf);
} else {
out.write(b);
}
}
}
}

0 comments on commit 914d90d

Please sign in to comment.