diff --git a/csv/pom.xml b/csv/pom.xml index 0ed667d3..5cf19f04 100644 --- a/csv/pom.xml +++ b/csv/pom.xml @@ -40,7 +40,7 @@ abstractions. com.google.guava guava - 18.0 + 25.0-jre test diff --git a/csv/src/main/java/com/fasterxml/jackson/dataformat/csv/CsvParser.java b/csv/src/main/java/com/fasterxml/jackson/dataformat/csv/CsvParser.java index 641a64cf..be60f157 100644 --- a/csv/src/main/java/com/fasterxml/jackson/dataformat/csv/CsvParser.java +++ b/csv/src/main/java/com/fasterxml/jackson/dataformat/csv/CsvParser.java @@ -73,13 +73,14 @@ public enum Feature IGNORE_TRAILING_UNMAPPABLE(false), /** - * Feature that allows skipping input lines that are completely empty, instead + * Feature that allows skipping input lines that are completely empty or blank (composed only of whitespace), + * instead of being decoded as lines of just a single column with an empty/blank String value (or, * of being decoded as lines of just a single column with empty String value (or, * depending on binding, `null`). *

* Feature is disabled by default. * - * @since 2.9 + * @since 2.10 */ SKIP_EMPTY_LINES(false), @@ -787,19 +788,19 @@ protected void _readHeaderLine() throws IOException { */ protected JsonToken _handleStartDoc() throws IOException { - // also, if comments enabled, may need to skip leading ones - _reader.skipLeadingComments(); + // also, if comments enabled, or skip empty lines, may need to skip leading ones + _reader.skipLinesWhenNeeded(); // First things first: are we expecting header line? If so, read, process if (_schema.usesHeader()) { _readHeaderLine(); - _reader.skipLeadingComments(); + _reader.skipLinesWhenNeeded(); } // and if we are to skip the first data line, skip it if (_schema.skipsFirstDataRow()) { _reader.skipLine(); - _reader.skipLeadingComments(); + _reader.skipLinesWhenNeeded(); } - + // Only one real complication, actually; empty documents (zero bytes). // Those have no entries. Should be easy enough to detect like so: final boolean wrapAsArray = Feature.WRAP_AS_ARRAY.enabledIn(_formatFeatures); diff --git a/csv/src/main/java/com/fasterxml/jackson/dataformat/csv/impl/CsvDecoder.java b/csv/src/main/java/com/fasterxml/jackson/dataformat/csv/impl/CsvDecoder.java index 7037edaf..82929133 100644 --- a/csv/src/main/java/com/fasterxml/jackson/dataformat/csv/impl/CsvDecoder.java +++ b/csv/src/main/java/com/fasterxml/jackson/dataformat/csv/impl/CsvDecoder.java @@ -65,7 +65,12 @@ public class CsvDecoder protected boolean _trimSpaces; protected boolean _allowComments; - + + /** + * @since 2.10.1 + */ + protected boolean _skipBlankLines; // NOTE: can be final in 3.0, not before + /** * Maximum of quote character, linefeeds (\r and \n), escape character. */ @@ -111,14 +116,14 @@ public class CsvDecoder * needs to be handled (indicates end-of-record). */ protected int _pendingLF = 0; - + /** * Flag that indicates whether parser is closed or not. Gets * set when parser is either closed by explicit call * ({@link #close}) or when end-of-input is reached. */ protected boolean _closed; - + /* /********************************************************************** /* Current input location information @@ -152,7 +157,7 @@ public class CsvDecoder * For big (gigabyte-sized) sizes are possible, needs to be long, * unlike pointers and sizes related to in-memory buffers. */ - protected long _tokenInputTotal = 0; + protected long _tokenInputTotal = 0; /** * Input row on which current token starts, 1-based @@ -202,8 +207,7 @@ public class CsvDecoder final static double MIN_INT_D = Integer.MIN_VALUE; final static double MAX_INT_D = Integer.MAX_VALUE; - - + // Digits, numeric final protected static int INT_0 = '0'; final protected static int INT_1 = '1'; @@ -254,8 +258,8 @@ public class CsvDecoder /********************************************************************** */ - @SuppressWarnings("deprecation") - public CsvDecoder(CsvParser owner, IOContext ctxt, Reader r, CsvSchema schema, TextBuffer textBuffer, + public CsvDecoder(CsvParser owner, IOContext ctxt, Reader r, CsvSchema schema, + TextBuffer textBuffer, int stdFeatures, int csvFeatures) { _owner = owner; @@ -266,6 +270,7 @@ public CsvDecoder(CsvParser owner, IOContext ctxt, Reader r, CsvSchema schema, T final boolean legacy = JsonParser.Feature.ALLOW_YAML_COMMENTS.enabledIn(stdFeatures); _allowComments = legacy | CsvParser.Feature.ALLOW_COMMENTS.enabledIn(csvFeatures); _trimSpaces = CsvParser.Feature.TRIM_SPACES.enabledIn(csvFeatures); + _skipBlankLines = CsvParser.Feature.SKIP_EMPTY_LINES.enabledIn(csvFeatures); _inputBuffer = ctxt.allocTokenBuffer(); _bufferRecyclable = true; // since we allocated it _inputSource = r; @@ -292,6 +297,7 @@ public void setSchema(CsvSchema schema) */ public void overrideFormatFeatures(int csvFeatures) { _trimSpaces = CsvParser.Feature.TRIM_SPACES.enabledIn(csvFeatures); + _skipBlankLines = CsvParser.Feature.SKIP_EMPTY_LINES.enabledIn(csvFeatures); } /* @@ -482,39 +488,53 @@ public boolean startNewLine() throws IOException } _handleLF(); } - /* For now, we will only require that there is SOME data - * following linefeed -- even spaces will do. - * In future we may want to use better heuristics to possibly - * skip trailing empty line? - */ - if ((_inputPtr >= _inputEnd) && !loadMore()) { - return false; - } - - if (_allowComments && _inputBuffer[_inputPtr] == '#') { - int i = _skipCommentLines(); - // end-of-input? - if (i < 0) { - return false; - } - // otherwise push last read char back - --_inputPtr; - } - return true; + return skipLinesWhenNeeded(); } - public void skipLeadingComments() throws IOException - { - if (_allowComments) { - if ((_inputPtr < _inputEnd) || loadMore()) { - if (_inputBuffer[_inputPtr] == '#') { - _skipCommentLines(); - --_inputPtr; + /** + * optionally skip lines that are empty or are comments, depending on the feature activated in the parser + * @return false if the end of input was reached + * @throws IOException + * @since 2.10.1 + */ + public boolean skipLinesWhenNeeded() throws IOException { + if (!(_allowComments || _skipBlankLines)) { + return hasMoreInput(); + } + int firstCharacterPtr = _inputPtr; + while (hasMoreInput()) { + char ch = _inputBuffer[_inputPtr++]; + if (ch == '\r' || ch == '\n') { + _pendingLF = ch; + _handleLF(); + // track the start of the new line + firstCharacterPtr = _inputPtr; + continue; + } + if (ch == ' ') { + // skip all blanks (in both comments/blanks skip mode) + continue; + } + if (_allowComments) { + if (_inputBuffer[firstCharacterPtr] == '#') { + // on a commented line, skip everything + continue; + } + if (ch == '#') { + // we reach this point when whitespaces precedes the hash character + // move the firstCharacterPtr to the '#' location in order to skip the line completely + firstCharacterPtr = _inputPtr-1; + continue; } } + // we reached a non skippable character, this line needs to be parsed + // rollback the input pointer to the beginning of the line + _inputPtr = firstCharacterPtr; + return true; // processing can go on } + return false; // end of input } - + protected int _skipCommentLines() throws IOException { while ((_inputPtr < _inputEnd) || loadMore()) { diff --git a/csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/CommentsTest.java b/csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/CommentsTest.java index d4bd2918..ae40d753 100644 --- a/csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/CommentsTest.java +++ b/csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/CommentsTest.java @@ -8,7 +8,7 @@ // Tests for [csv#56] public class CommentsTest extends ModuleTestBase { - final String CSV_WITH_COMMENTS = "x,y\n# comment!\na,b\n# another...\n"; + final String CSV_WITH_COMMENTS = "x,y\n# comment!\na,b\n # another...\n"; public void testWithoutComments() throws Exception { diff --git a/csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/SkipBlankLines15Test.java b/csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/SkipBlankLines15Test.java new file mode 100644 index 00000000..7e8de0fe --- /dev/null +++ b/csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/SkipBlankLines15Test.java @@ -0,0 +1,159 @@ +package com.fasterxml.jackson.dataformat.csv.deser; + +import com.fasterxml.jackson.databind.ObjectReader; +import com.fasterxml.jackson.dataformat.csv.CsvParser; +import com.fasterxml.jackson.dataformat.csv.ModuleTestBase; + +import static org.junit.Assert.assertArrayEquals; + +// for [dataformats-text#15]: Allow skipping of empty lines +public class SkipBlankLines15Test extends ModuleTestBase { + + private static final String CSV_WITH_EMPTY_LINE = "1,\"xyz\"\n\ntrue,\n"; + private static final String CSV_WITH_BLANK_LINE = "1,\"xyz\"\n \ntrue,\n"; + private static final String CSV_WITH_BLANK_LINE_AND_COMMENT = "1,\"xyz\"\n \n #comment\n\ntrue,\n"; + private static final String CSV_WITH_FIRST_BLANK_LINE = "\n1,\"xyz\"\ntrue,\n"; + private static final String CSV_WITH_TRAILING_BLANK_LINES = "1,\"xyz\"\ntrue,\n \n\n"; + + public void testCsvWithEmptyLineSkipBlankLinesFeatureDisabled() throws Exception { + String[][] rows = mapperForCsvAsArray().readValue(CSV_WITH_EMPTY_LINE); + // First, verify default behavior: + assertArrayEquals(expected( + row("1", "xyz"), + row(""), + row("true", "") + ), rows); + } + + public void testCsvWithEmptyLineSkipBlankLinesFeatureEnabled() throws Exception { + String[][] rows = mapperForCsvAsArray() + .with(CsvParser.Feature.SKIP_EMPTY_LINES) + .readValue(CSV_WITH_EMPTY_LINE); + // empty line is skipped + assertArrayEquals(expected( + row("1", "xyz"), + row("true", "") + ), rows); + } + + + public void testCsvWithBlankLineSkipBlankLinesFeatureDisabled() throws Exception { + String[][] rows = mapperForCsvAsArray() + .readValue(CSV_WITH_BLANK_LINE); + // First, verify default behavior: + assertArrayEquals(expected( + row("1", "xyz"), + row(" "), + row("true", "") + ), rows); + } + + public void testCsvWithBlankLineSkipBlankLinesFeatureEnabled() throws Exception { + String[][] rows = mapperForCsvAsArray() + .with(CsvParser.Feature.SKIP_EMPTY_LINES) + .readValue(CSV_WITH_BLANK_LINE); + // blank line is skipped + assertArrayEquals(expected( + row("1", "xyz"), + row("true", "") + ), rows); + } + + public void testCsvWithBlankLineAndCommentSkipBlankLinesFeatureDisabled() throws Exception { + String[][] rows = mapperForCsvAsArray() + .readValue(CSV_WITH_BLANK_LINE_AND_COMMENT); + // First, verify default behavior: + assertArrayEquals(expected( + row("1", "xyz"), + row(" "), + row(" #comment"), + row(""), + row("true", "") + ), rows); + } + + public void testCsvWithBlankLineAndCommentSkipBlankLinesFeatureEnabled() throws Exception { + String[][] rows = mapperForCsvAsArray() + .with(CsvParser.Feature.SKIP_EMPTY_LINES) + .readValue(CSV_WITH_BLANK_LINE_AND_COMMENT); + // blank/empty lines are skipped + assertArrayEquals(expected( + row("1", "xyz"), + row(" #comment"), + row("true", "") + ), rows); + } + + public void testCsvWithBlankLineAndCommentSkipBlankLinesFeatureEnabledAndAllowComments() throws Exception { + String[][] rows = mapperForCsvAsArray() + .with(CsvParser.Feature.SKIP_EMPTY_LINES) + .with(CsvParser.Feature.ALLOW_COMMENTS) + .readValue(CSV_WITH_BLANK_LINE_AND_COMMENT); + // blank/empty/comment lines are skipped + assertArrayEquals(expected( + row("1", "xyz"), + row("true", "") + ), rows); + } + + public void testCsvWithFirstBlankLineSkipBlankLinesFeatureDisabled() throws Exception { + String[][] rows = mapperForCsvAsArray() + .readValue(CSV_WITH_FIRST_BLANK_LINE); + // First, verify default behavior: + assertArrayEquals(expected( + row(""), + row("1", "xyz"), + row("true", "") + ), rows); + } + + public void testCsvWithFirstBlankLineSkipBlankLinesFeatureEnabled() throws Exception { + String[][] rows = mapperForCsvAsArray() + .with(CsvParser.Feature.SKIP_EMPTY_LINES) + .readValue(CSV_WITH_FIRST_BLANK_LINE); + // blank line is skipped + assertArrayEquals(expected( + row("1", "xyz"), + row("true", "") + ), rows); + } + + + public void testCsvWithTrailingBlankLineSkipBlankLinesFeatureDisabled() throws Exception { + String[][] rows = mapperForCsvAsArray() + .readValue(CSV_WITH_TRAILING_BLANK_LINES); + // First, verify default behavior: + assertArrayEquals(expected( + row("1", "xyz"), + row("true", ""), + row(" "), + row("") + ), rows); + } + + public void testCsvWithTrailingBlankLineSkipBlankLinesFeatureEnabled() throws Exception { + String[][] rows = mapperForCsvAsArray() + .with(CsvParser.Feature.SKIP_EMPTY_LINES) + .readValue(CSV_WITH_FIRST_BLANK_LINE); + // blank lines are skipped + assertArrayEquals(expected( + row("1", "xyz"), + row("true", "") + ), rows); + } + + private ObjectReader mapperForCsvAsArray() { + // when wrapped as an array, we'll get array of Lists: + return mapperForCsv() + .readerFor(String[][].class) + .with(CsvParser.Feature.WRAP_AS_ARRAY); + } + + private String[][] expected(String[]... rowInputs) { + return rowInputs; + } + + private String[] row(String... cellInputs) { + return cellInputs; + } +} diff --git a/pom.xml b/pom.xml index b72f8b7f..84565230 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ com.fasterxml.jackson jackson-base - 2.10.0 + 2.10.1-SNAPSHOT com.fasterxml.jackson.dataformat jackson-dataformats-text diff --git a/release-notes/CREDITS-2.x b/release-notes/CREDITS-2.x index 83463a4d..92bf50f4 100644 --- a/release-notes/CREDITS-2.x +++ b/release-notes/CREDITS-2.x @@ -79,3 +79,8 @@ Matti Bickel (wundrian@github) Maarten Winkels (mwinkels@github) * Contributed fix for #83: Update index of sequence context (2.10.0) + +Vincent Boulaye (vboulaye@github) +* Implemented #15: Add a `CsvParser.Feature.SKIP_EMPTY_LINES` to allow + skipping empty rows + (2.10.1) diff --git a/release-notes/VERSION-2.x b/release-notes/VERSION-2.x index e8a766c5..dece3d10 100644 --- a/release-notes/VERSION-2.x +++ b/release-notes/VERSION-2.x @@ -8,6 +8,11 @@ Modules: === Releases === ------------------------------------------------------------------------ +2.10.1 (not yet released) + +#15: Add a `CsvParser.Feature.SKIP_EMPTY_LINES` to allow skipping empty rows + (implemented by Vincent B) + 2.10.0 (26-Sep-2019) #50: (yaml) Empty string serialized without quotes if MINIMIZE_QUOTES is enabled