diff --git a/csv/src/main/java/com/fasterxml/jackson/dataformat/csv/impl/CsvDecoder.java b/csv/src/main/java/com/fasterxml/jackson/dataformat/csv/impl/CsvDecoder.java index c9fcf705..c3924b3f 100644 --- a/csv/src/main/java/com/fasterxml/jackson/dataformat/csv/impl/CsvDecoder.java +++ b/csv/src/main/java/com/fasterxml/jackson/dataformat/csv/impl/CsvDecoder.java @@ -505,44 +505,68 @@ public boolean startNewLine() throws IOException * @since 2.10.1 */ public boolean skipLinesWhenNeeded() throws IOException { - if (!(_allowComments || _skipBlankLines)) { + if (_allowComments) { + return _skipCommentLines(); + } + if (!_skipBlankLines) { return hasMoreInput(); } - int firstCharacterPtr = _inputPtr; + + // only need to skip fully empty lines while (hasMoreInput()) { - char ch = _inputBuffer[_inputPtr++]; + char ch = _inputBuffer[_inputPtr]; if (ch == '\r' || ch == '\n') { + ++_inputPtr; _pendingLF = ch; _handleLF(); - // track the start of the new line - firstCharacterPtr = _inputPtr; continue; } - if (ch == ' ') { + if (ch != ' ') { + return true; // processing can go on + } + ++_inputPtr; + } + return false; // end of input + } + + public boolean _skipCommentLines() throws IOException + { + while ((_inputPtr < _inputEnd) || loadMore()) { + char ch = _inputBuffer[_inputPtr]; + switch (ch) { + case '#': + ++_inputPtr; + _skipCommentContents(); + continue; + case '\r': + case '\n': + ++_inputPtr; + _pendingLF = ch; + _handleLF(); + continue; + case ' ': // skip all blanks (in both comments/blanks skip mode) + ++_inputPtr; continue; + default: + return true; } - if (_allowComments) { - if (_inputBuffer[firstCharacterPtr] == '#') { - // on a commented line, skip everything - continue; - } - if (ch == '#') { - // we reach this point when whitespaces precedes the hash character - // move the firstCharacterPtr to the '#' location in order to skip the line completely - firstCharacterPtr = _inputPtr-1; - continue; - } - } - // we reached a non skippable character, this line needs to be parsed - // rollback the input pointer to the beginning of the line - _inputPtr = firstCharacterPtr; - return true; // processing can go on } return false; // end of input } - // 12-Apr-2020, tatu: Not used any more (probably replaced by above?) + private void _skipCommentContents() throws IOException + { + while ((_inputPtr < _inputEnd) || loadMore()) { + char ch = _inputBuffer[_inputPtr++]; + if (ch == '\r' || ch == '\n') { + _pendingLF = ch; + _handleLF(); + break; + } + } + } + /* private final static int INT_HASH = '#'; @@ -559,7 +583,8 @@ protected int _skipCommentLines() throws IOException // Ok, skipped the end of the line. Check next one... int i = _nextChar(); if (i != INT_HASH) { - return i; + --_inputPtr; + return true; } } return -1; // end of input diff --git a/csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/SkipBlankLines15Test.java b/csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/SkipEmptyLines15Test.java similarity index 95% rename from csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/SkipBlankLines15Test.java rename to csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/SkipEmptyLines15Test.java index 8777f343..a105c235 100644 --- a/csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/SkipBlankLines15Test.java +++ b/csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/SkipEmptyLines15Test.java @@ -9,7 +9,7 @@ import static org.junit.Assert.assertArrayEquals; // for [dataformats-text#15]: Allow skipping of empty lines -public class SkipBlankLines15Test extends ModuleTestBase { +public class SkipEmptyLines15Test extends ModuleTestBase { private static final String CSV_WITH_EMPTY_LINE = "1,\"xyz\"\n\ntrue,\n"; private static final String CSV_WITH_BLANK_LINE = "1,\"xyz\"\n \ntrue,\n"; @@ -129,6 +129,8 @@ public void testCsvWithBlankLineAndCommentSkipBlankLinesFeatureDisabled() throws ), rows); } + // 14-Apr-2020, tatu: Due to [dataformats-text#191], can not retain leading spaces + // when trimming empty lines and/or comments, so test changed for 2.11 public void testCsvWithBlankLineAndCommentSkipBlankLinesFeatureEnabled() throws Exception { String[][] rows = mapperForCsvAsArray() .with(CsvParser.Feature.SKIP_EMPTY_LINES) @@ -136,7 +138,9 @@ public void testCsvWithBlankLineAndCommentSkipBlankLinesFeatureEnabled() throws // blank/empty lines are skipped assertArrayEquals(expected( row("1", "xyz"), - row(" #comment"), + // As per: [dataformats-text#191] +// row(" #comment"), + row("#comment"), row("true", "") ), rows); } diff --git a/csv/src/test/java/com/fasterxml/jackson/dataformat/csv/failing/ParserSkipEmpty191Test.java b/csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/SkipEmptyLines191Test.java similarity index 94% rename from csv/src/test/java/com/fasterxml/jackson/dataformat/csv/failing/ParserSkipEmpty191Test.java rename to csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/SkipEmptyLines191Test.java index 5c4eba78..f0b4786b 100644 --- a/csv/src/test/java/com/fasterxml/jackson/dataformat/csv/failing/ParserSkipEmpty191Test.java +++ b/csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/SkipEmptyLines191Test.java @@ -1,4 +1,4 @@ -package com.fasterxml.jackson.dataformat.csv.failing; +package com.fasterxml.jackson.dataformat.csv.deser; import java.io.Reader; import java.io.StringReader; @@ -10,7 +10,7 @@ import com.fasterxml.jackson.dataformat.csv.ModuleTestBase; // [dataformats-text#191] -public class ParserSkipEmpty191Test extends ModuleTestBase { +public class SkipEmptyLines191Test extends ModuleTestBase { private static String COL_1 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; private static String COL_2 = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; diff --git a/release-notes/VERSION-2.x b/release-notes/VERSION-2.x index f790df11..77c3157b 100644 --- a/release-notes/VERSION-2.x +++ b/release-notes/VERSION-2.x @@ -18,6 +18,8 @@ Modules: #180: (yaml) YAMLGenerator serializes string with special chars unquoted when using `MINIMIZE_QUOTES` mode (reported, fix contributed by Timo R) +#191: (csv) `ArrayIndexOutOfBoundsException` when skipping empty lines, comments + (reported by f-julian@github) 2.10.4 (not yet released)