diff --git a/csv/README.md b/csv/README.md index 85e419c9..c63f3b84 100644 --- a/csv/README.md +++ b/csv/README.md @@ -236,8 +236,8 @@ Jackson supports the following extension or variations: * Linefeed character: when generating content, the default linefeed String used is "`\n`" but this may be changed * Null value: by default, null values are serialized as empty Strings (""), but any other String value be configured to be used instead (for example, "null", "N/A" etc) * Use of first row as a set of column names: as explained earlier, it is possible to configure `CsvSchema` to indicate that the contents of the first (non-comment) document row is taken to mean the set of column names to use -* Comments - * When enabled (via `CsvSchema`, or enabling `JsonParser.Feature.ALLOW_YAML_COMMENTS`), if a row starts with a `#` character, it will be considered a comment and skipped +* Comments: when enabled (via `CsvSchema`, or enabling `CsvParser.Feature.ALLOW_COMMENTS`), if a row starts with a `#` character, it will be considered a comment and skipped +* Blank lines: when enabled (using `CsvParser.Feature.SKIP_BLANK_LINES`) rows that are empty or composed only of whitespaces are skipped # Limitations diff --git a/csv/src/main/java/com/fasterxml/jackson/dataformat/csv/CsvParser.java b/csv/src/main/java/com/fasterxml/jackson/dataformat/csv/CsvParser.java index c0d078ad..6a6d2de9 100644 --- a/csv/src/main/java/com/fasterxml/jackson/dataformat/csv/CsvParser.java +++ b/csv/src/main/java/com/fasterxml/jackson/dataformat/csv/CsvParser.java @@ -73,13 +73,13 @@ public enum Feature IGNORE_TRAILING_UNMAPPABLE(false), /** - * Feature that allows skipping input lines that are completely empty, instead - * of being decoded as lines of just a single column with empty String value (or, + * Feature that allows skipping input lines that are completely empty or blank (composed only of whitespace), + * instead of being decoded as lines of just a single column with an empty/blank String value (or, * depending on binding, `null`). *

* Feature is disabled by default. */ - SKIP_EMPTY_LINES(false), + SKIP_BLANK_LINES(false), /** * Feature that allows there to be a trailing single extraneous data diff --git a/csv/src/main/java/com/fasterxml/jackson/dataformat/csv/impl/CsvDecoder.java b/csv/src/main/java/com/fasterxml/jackson/dataformat/csv/impl/CsvDecoder.java index f9b83615..ae319e45 100644 --- a/csv/src/main/java/com/fasterxml/jackson/dataformat/csv/impl/CsvDecoder.java +++ b/csv/src/main/java/com/fasterxml/jackson/dataformat/csv/impl/CsvDecoder.java @@ -70,7 +70,7 @@ public class CsvDecoder { protected boolean _allowComments; - protected boolean _skipEmptyLines; + protected boolean _skipBlankLines; /** * Maximum of quote character, linefeeds (\r and \n), escape character. @@ -269,11 +269,10 @@ public CsvDecoder(IOContext ctxt, CsvParser owner, Reader r, _textBuffer = textBuffer; _autoCloseInput = StreamReadFeature.AUTO_CLOSE_SOURCE.enabledIn(stdFeatures); _allowComments = CsvParser.Feature.ALLOW_COMMENTS.enabledIn(csvFeatures); - _skipEmptyLines = CsvParser.Feature.SKIP_EMPTY_LINES.enabledIn(csvFeatures); + _skipBlankLines = CsvParser.Feature.SKIP_BLANK_LINES.enabledIn(csvFeatures); _trimSpaces = CsvParser.Feature.TRIM_SPACES.enabledIn(csvFeatures); _inputBuffer = ctxt.allocTokenBuffer(); _bufferRecyclable = true; // since we allocated it - _inputSource = r; _tokenInputRow = -1; _tokenInputCol = -1; setSchema(schema); @@ -480,7 +479,7 @@ public boolean startNewLine() throws IOException { } public boolean skipLinesWhenNeeded() throws IOException { - if (!(_allowComments || _skipEmptyLines)) { + if (!(_allowComments || _skipBlankLines)) { return hasMoreInput(); } int firstCharacterPtr = _inputPtr; @@ -493,15 +492,22 @@ public boolean skipLinesWhenNeeded() throws IOException { firstCharacterPtr = _inputPtr; continue; } - if (_skipEmptyLines && ch == ' ') { - // skip all blanks + if (ch == ' ') { + // skip all blanks (in both comments/blanks skip mode) continue; } - if (_allowComments && _inputBuffer[firstCharacterPtr] == '#') { - // this line is commented, skip everything - continue; + if (_allowComments) { + if (_inputBuffer[firstCharacterPtr] == '#') { + // on a commented line, skip everything + continue; + } + if (ch == '#') { + // we reach this point when whitespaces precedes the hash character + // move the firstCharacterPtr to the '#' location in order to skip the line completely + firstCharacterPtr = _inputPtr-1; + continue; + } } - // we reached a non skippable character, this line needs to be parsed // rollback the input pointer to the beginning of the line _inputPtr = firstCharacterPtr; diff --git a/csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/CommentsTest.java b/csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/CommentsTest.java index b21a8d9f..f084d380 100644 --- a/csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/CommentsTest.java +++ b/csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/CommentsTest.java @@ -1,14 +1,16 @@ package com.fasterxml.jackson.dataformat.csv.deser; -import java.util.Map; - import com.fasterxml.jackson.databind.MappingIterator; -import com.fasterxml.jackson.dataformat.csv.*; +import com.fasterxml.jackson.dataformat.csv.CsvMapper; +import com.fasterxml.jackson.dataformat.csv.CsvParser; +import com.fasterxml.jackson.dataformat.csv.ModuleTestBase; + +import java.util.Map; // Tests for [csv#56] public class CommentsTest extends ModuleTestBase { - final String CSV_WITH_COMMENTS = "x,y\n# comment!\na,b\n# another...\n"; + final String CSV_WITH_COMMENTS = "x,y\n# comment!\na,b\n # another...\n"; public void testWithoutComments() throws Exception { diff --git a/csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/SkipEmptyLines15Test.java b/csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/SkipBlankLines15Test.java similarity index 66% rename from csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/SkipEmptyLines15Test.java rename to csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/SkipBlankLines15Test.java index a64d6ae1..aa5608c2 100644 --- a/csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/SkipEmptyLines15Test.java +++ b/csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/SkipBlankLines15Test.java @@ -7,14 +7,15 @@ import static org.junit.Assert.assertArrayEquals; // for [dataformats-text#15]: Allow skipping of empty lines -public class SkipEmptyLines15Test extends ModuleTestBase { +public class SkipBlankLines15Test extends ModuleTestBase { private static final String CSV_WITH_EMPTY_LINE = "1,\"xyz\"\n\ntrue,\n"; private static final String CSV_WITH_BLANK_LINE = "1,\"xyz\"\n \ntrue,\n"; - private static final String CSV_WITH_BLANK_LINE_AND_COMMENT = "1,\"xyz\"\n \n#comment\n\ntrue,\n"; + private static final String CSV_WITH_BLANK_LINE_AND_COMMENT = "1,\"xyz\"\n \n #comment\n\ntrue,\n"; private static final String CSV_WITH_FIRST_BLANK_LINE = "\n1,\"xyz\"\ntrue,\n"; + private static final String CSV_WITH_TRAILING_BLANK_LINES = "1,\"xyz\"\ntrue,\n \n\n"; - public void testCsvWithEmptyLineSkipEmptyLinesFeatureDisabled() throws Exception { + public void testCsvWithEmptyLineSkipBlankLinesFeatureDisabled() throws Exception { String[][] rows = mapperForCsvAsArray().readValue(CSV_WITH_EMPTY_LINE); // First, verify default behavior: assertArrayEquals(expected( @@ -24,9 +25,9 @@ public void testCsvWithEmptyLineSkipEmptyLinesFeatureDisabled() throws Exception ), rows); } - public void testCsvWithEmptyLineSkipEmptyLinesFeatureEnabled() throws Exception { + public void testCsvWithEmptyLineSkipBlankLinesFeatureEnabled() throws Exception { String[][] rows = mapperForCsvAsArray() - .with(CsvParser.Feature.SKIP_EMPTY_LINES) + .with(CsvParser.Feature.SKIP_BLANK_LINES) .readValue(CSV_WITH_EMPTY_LINE); // empty line is skipped assertArrayEquals(expected( @@ -36,7 +37,7 @@ public void testCsvWithEmptyLineSkipEmptyLinesFeatureEnabled() throws Exception } - public void testCsvWithBlankLineSkipEmptyLinesFeatureDisabled() throws Exception { + public void testCsvWithBlankLineSkipBlankLinesFeatureDisabled() throws Exception { String[][] rows = mapperForCsvAsArray() .readValue(CSV_WITH_BLANK_LINE); // First, verify default behavior: @@ -47,9 +48,9 @@ public void testCsvWithBlankLineSkipEmptyLinesFeatureDisabled() throws Exception ), rows); } - public void testCsvWithBlankLineSkipEmptyLinesFeatureEnabled() throws Exception { + public void testCsvWithBlankLineSkipBlankLinesFeatureEnabled() throws Exception { String[][] rows = mapperForCsvAsArray() - .with(CsvParser.Feature.SKIP_EMPTY_LINES) + .with(CsvParser.Feature.SKIP_BLANK_LINES) .readValue(CSV_WITH_BLANK_LINE); // blank line is skipped assertArrayEquals(expected( @@ -58,34 +59,34 @@ public void testCsvWithBlankLineSkipEmptyLinesFeatureEnabled() throws Exception ), rows); } - public void testCsvWithBlankLineAndCommentSkipEmptyLinesFeatureDisabled() throws Exception { + public void testCsvWithBlankLineAndCommentSkipBlankLinesFeatureDisabled() throws Exception { String[][] rows = mapperForCsvAsArray() .readValue(CSV_WITH_BLANK_LINE_AND_COMMENT); // First, verify default behavior: assertArrayEquals(expected( row("1", "xyz"), row(" "), - row("#comment"), + row(" #comment"), row(""), row("true", "") ), rows); } - public void testCsvWithBlankLineAndCommentSkipEmptyLinesFeatureEnabled() throws Exception { + public void testCsvWithBlankLineAndCommentSkipBlankLinesFeatureEnabled() throws Exception { String[][] rows = mapperForCsvAsArray() - .with(CsvParser.Feature.SKIP_EMPTY_LINES) + .with(CsvParser.Feature.SKIP_BLANK_LINES) .readValue(CSV_WITH_BLANK_LINE_AND_COMMENT); // blank/empty lines are skipped assertArrayEquals(expected( row("1", "xyz"), - row("#comment"), + row(" #comment"), row("true", "") ), rows); } - public void testCsvWithBlankLineAndCommentSkipEmptyLinesFeatureEnabledAndAllowComments() throws Exception { + public void testCsvWithBlankLineAndCommentSkipBlankLinesFeatureEnabledAndAllowComments() throws Exception { String[][] rows = mapperForCsvAsArray() - .with(CsvParser.Feature.SKIP_EMPTY_LINES) + .with(CsvParser.Feature.SKIP_BLANK_LINES) .with(CsvParser.Feature.ALLOW_COMMENTS) .readValue(CSV_WITH_BLANK_LINE_AND_COMMENT); // blank/empty/comment lines are skipped @@ -95,7 +96,7 @@ public void testCsvWithBlankLineAndCommentSkipEmptyLinesFeatureEnabledAndAllowCo ), rows); } - public void testCsvWithFirstBlankLineSkipEmptyLinesFeatureDisabled() throws Exception { + public void testCsvWithFirstBlankLineSkipBlankLinesFeatureDisabled() throws Exception { String[][] rows = mapperForCsvAsArray() .readValue(CSV_WITH_FIRST_BLANK_LINE); // First, verify default behavior: @@ -106,9 +107,9 @@ public void testCsvWithFirstBlankLineSkipEmptyLinesFeatureDisabled() throws Exce ), rows); } - public void testCsvWithFirstBlankLineSkipEmptyLinesFeatureEnabled() throws Exception { + public void testCsvWithFirstBlankLineSkipBlankLinesFeatureEnabled() throws Exception { String[][] rows = mapperForCsvAsArray() - .with(CsvParser.Feature.SKIP_EMPTY_LINES) + .with(CsvParser.Feature.SKIP_BLANK_LINES) .readValue(CSV_WITH_FIRST_BLANK_LINE); // blank line is skipped assertArrayEquals(expected( @@ -117,6 +118,30 @@ public void testCsvWithFirstBlankLineSkipEmptyLinesFeatureEnabled() throws Excep ), rows); } + + public void testCsvWithTrailingBlankLineSkipBlankLinesFeatureDisabled() throws Exception { + String[][] rows = mapperForCsvAsArray() + .readValue(CSV_WITH_TRAILING_BLANK_LINES); + // First, verify default behavior: + assertArrayEquals(expected( + row("1", "xyz"), + row("true", ""), + row(" "), + row("") + ), rows); + } + + public void testCsvWithTrailingBlankLineSkipBlankLinesFeatureEnabled() throws Exception { + String[][] rows = mapperForCsvAsArray() + .with(CsvParser.Feature.SKIP_BLANK_LINES) + .readValue(CSV_WITH_FIRST_BLANK_LINE); + // blank lines are skipped + assertArrayEquals(expected( + row("1", "xyz"), + row("true", "") + ), rows); + } + private ObjectReader mapperForCsvAsArray() { // when wrapped as an array, we'll get array of Lists: return mapperForCsv()