diff --git a/csv/pom.xml b/csv/pom.xml
index 0ed667d3..5cf19f04 100644
--- a/csv/pom.xml
+++ b/csv/pom.xml
@@ -40,7 +40,7 @@ abstractions.
* Feature is disabled by default.
*
- * @since 2.9
+ * @since 2.10
*/
SKIP_EMPTY_LINES(false),
@@ -787,19 +788,19 @@ protected void _readHeaderLine() throws IOException {
*/
protected JsonToken _handleStartDoc() throws IOException
{
- // also, if comments enabled, may need to skip leading ones
- _reader.skipLeadingComments();
+ // also, if comments enabled, or skip empty lines, may need to skip leading ones
+ _reader.skipLinesWhenNeeded();
// First things first: are we expecting header line? If so, read, process
if (_schema.usesHeader()) {
_readHeaderLine();
- _reader.skipLeadingComments();
+ _reader.skipLinesWhenNeeded();
}
// and if we are to skip the first data line, skip it
if (_schema.skipsFirstDataRow()) {
_reader.skipLine();
- _reader.skipLeadingComments();
+ _reader.skipLinesWhenNeeded();
}
-
+
// Only one real complication, actually; empty documents (zero bytes).
// Those have no entries. Should be easy enough to detect like so:
final boolean wrapAsArray = Feature.WRAP_AS_ARRAY.enabledIn(_formatFeatures);
diff --git a/csv/src/main/java/com/fasterxml/jackson/dataformat/csv/impl/CsvDecoder.java b/csv/src/main/java/com/fasterxml/jackson/dataformat/csv/impl/CsvDecoder.java
index 7037edaf..82929133 100644
--- a/csv/src/main/java/com/fasterxml/jackson/dataformat/csv/impl/CsvDecoder.java
+++ b/csv/src/main/java/com/fasterxml/jackson/dataformat/csv/impl/CsvDecoder.java
@@ -65,7 +65,12 @@ public class CsvDecoder
protected boolean _trimSpaces;
protected boolean _allowComments;
-
+
+ /**
+ * @since 2.10.1
+ */
+ protected boolean _skipBlankLines; // NOTE: can be final in 3.0, not before
+
/**
* Maximum of quote character, linefeeds (\r and \n), escape character.
*/
@@ -111,14 +116,14 @@ public class CsvDecoder
* needs to be handled (indicates end-of-record).
*/
protected int _pendingLF = 0;
-
+
/**
* Flag that indicates whether parser is closed or not. Gets
* set when parser is either closed by explicit call
* ({@link #close}) or when end-of-input is reached.
*/
protected boolean _closed;
-
+
/*
/**********************************************************************
/* Current input location information
@@ -152,7 +157,7 @@ public class CsvDecoder
* For big (gigabyte-sized) sizes are possible, needs to be long,
* unlike pointers and sizes related to in-memory buffers.
*/
- protected long _tokenInputTotal = 0;
+ protected long _tokenInputTotal = 0;
/**
* Input row on which current token starts, 1-based
@@ -202,8 +207,7 @@ public class CsvDecoder
final static double MIN_INT_D = Integer.MIN_VALUE;
final static double MAX_INT_D = Integer.MAX_VALUE;
-
-
+
// Digits, numeric
final protected static int INT_0 = '0';
final protected static int INT_1 = '1';
@@ -254,8 +258,8 @@ public class CsvDecoder
/**********************************************************************
*/
- @SuppressWarnings("deprecation")
- public CsvDecoder(CsvParser owner, IOContext ctxt, Reader r, CsvSchema schema, TextBuffer textBuffer,
+ public CsvDecoder(CsvParser owner, IOContext ctxt, Reader r, CsvSchema schema,
+ TextBuffer textBuffer,
int stdFeatures, int csvFeatures)
{
_owner = owner;
@@ -266,6 +270,7 @@ public CsvDecoder(CsvParser owner, IOContext ctxt, Reader r, CsvSchema schema, T
final boolean legacy = JsonParser.Feature.ALLOW_YAML_COMMENTS.enabledIn(stdFeatures);
_allowComments = legacy | CsvParser.Feature.ALLOW_COMMENTS.enabledIn(csvFeatures);
_trimSpaces = CsvParser.Feature.TRIM_SPACES.enabledIn(csvFeatures);
+ _skipBlankLines = CsvParser.Feature.SKIP_EMPTY_LINES.enabledIn(csvFeatures);
_inputBuffer = ctxt.allocTokenBuffer();
_bufferRecyclable = true; // since we allocated it
_inputSource = r;
@@ -292,6 +297,7 @@ public void setSchema(CsvSchema schema)
*/
public void overrideFormatFeatures(int csvFeatures) {
_trimSpaces = CsvParser.Feature.TRIM_SPACES.enabledIn(csvFeatures);
+ _skipBlankLines = CsvParser.Feature.SKIP_EMPTY_LINES.enabledIn(csvFeatures);
}
/*
@@ -482,39 +488,53 @@ public boolean startNewLine() throws IOException
}
_handleLF();
}
- /* For now, we will only require that there is SOME data
- * following linefeed -- even spaces will do.
- * In future we may want to use better heuristics to possibly
- * skip trailing empty line?
- */
- if ((_inputPtr >= _inputEnd) && !loadMore()) {
- return false;
- }
-
- if (_allowComments && _inputBuffer[_inputPtr] == '#') {
- int i = _skipCommentLines();
- // end-of-input?
- if (i < 0) {
- return false;
- }
- // otherwise push last read char back
- --_inputPtr;
- }
- return true;
+ return skipLinesWhenNeeded();
}
- public void skipLeadingComments() throws IOException
- {
- if (_allowComments) {
- if ((_inputPtr < _inputEnd) || loadMore()) {
- if (_inputBuffer[_inputPtr] == '#') {
- _skipCommentLines();
- --_inputPtr;
+ /**
+ * optionally skip lines that are empty or are comments, depending on the feature activated in the parser
+ * @return false if the end of input was reached
+ * @throws IOException
+ * @since 2.10.1
+ */
+ public boolean skipLinesWhenNeeded() throws IOException {
+ if (!(_allowComments || _skipBlankLines)) {
+ return hasMoreInput();
+ }
+ int firstCharacterPtr = _inputPtr;
+ while (hasMoreInput()) {
+ char ch = _inputBuffer[_inputPtr++];
+ if (ch == '\r' || ch == '\n') {
+ _pendingLF = ch;
+ _handleLF();
+ // track the start of the new line
+ firstCharacterPtr = _inputPtr;
+ continue;
+ }
+ if (ch == ' ') {
+ // skip all blanks (in both comments/blanks skip mode)
+ continue;
+ }
+ if (_allowComments) {
+ if (_inputBuffer[firstCharacterPtr] == '#') {
+ // on a commented line, skip everything
+ continue;
+ }
+ if (ch == '#') {
+ // we reach this point when whitespaces precedes the hash character
+ // move the firstCharacterPtr to the '#' location in order to skip the line completely
+ firstCharacterPtr = _inputPtr-1;
+ continue;
}
}
+ // we reached a non skippable character, this line needs to be parsed
+ // rollback the input pointer to the beginning of the line
+ _inputPtr = firstCharacterPtr;
+ return true; // processing can go on
}
+ return false; // end of input
}
-
+
protected int _skipCommentLines() throws IOException
{
while ((_inputPtr < _inputEnd) || loadMore()) {
diff --git a/csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/CommentsTest.java b/csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/CommentsTest.java
index d4bd2918..ae40d753 100644
--- a/csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/CommentsTest.java
+++ b/csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/CommentsTest.java
@@ -8,7 +8,7 @@
// Tests for [csv#56]
public class CommentsTest extends ModuleTestBase
{
- final String CSV_WITH_COMMENTS = "x,y\n# comment!\na,b\n# another...\n";
+ final String CSV_WITH_COMMENTS = "x,y\n# comment!\na,b\n # another...\n";
public void testWithoutComments() throws Exception
{
diff --git a/csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/SkipBlankLines15Test.java b/csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/SkipBlankLines15Test.java
new file mode 100644
index 00000000..7e8de0fe
--- /dev/null
+++ b/csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/SkipBlankLines15Test.java
@@ -0,0 +1,159 @@
+package com.fasterxml.jackson.dataformat.csv.deser;
+
+import com.fasterxml.jackson.databind.ObjectReader;
+import com.fasterxml.jackson.dataformat.csv.CsvParser;
+import com.fasterxml.jackson.dataformat.csv.ModuleTestBase;
+
+import static org.junit.Assert.assertArrayEquals;
+
+// for [dataformats-text#15]: Allow skipping of empty lines
+public class SkipBlankLines15Test extends ModuleTestBase {
+
+ private static final String CSV_WITH_EMPTY_LINE = "1,\"xyz\"\n\ntrue,\n";
+ private static final String CSV_WITH_BLANK_LINE = "1,\"xyz\"\n \ntrue,\n";
+ private static final String CSV_WITH_BLANK_LINE_AND_COMMENT = "1,\"xyz\"\n \n #comment\n\ntrue,\n";
+ private static final String CSV_WITH_FIRST_BLANK_LINE = "\n1,\"xyz\"\ntrue,\n";
+ private static final String CSV_WITH_TRAILING_BLANK_LINES = "1,\"xyz\"\ntrue,\n \n\n";
+
+ public void testCsvWithEmptyLineSkipBlankLinesFeatureDisabled() throws Exception {
+ String[][] rows = mapperForCsvAsArray().readValue(CSV_WITH_EMPTY_LINE);
+ // First, verify default behavior:
+ assertArrayEquals(expected(
+ row("1", "xyz"),
+ row(""),
+ row("true", "")
+ ), rows);
+ }
+
+ public void testCsvWithEmptyLineSkipBlankLinesFeatureEnabled() throws Exception {
+ String[][] rows = mapperForCsvAsArray()
+ .with(CsvParser.Feature.SKIP_EMPTY_LINES)
+ .readValue(CSV_WITH_EMPTY_LINE);
+ // empty line is skipped
+ assertArrayEquals(expected(
+ row("1", "xyz"),
+ row("true", "")
+ ), rows);
+ }
+
+
+ public void testCsvWithBlankLineSkipBlankLinesFeatureDisabled() throws Exception {
+ String[][] rows = mapperForCsvAsArray()
+ .readValue(CSV_WITH_BLANK_LINE);
+ // First, verify default behavior:
+ assertArrayEquals(expected(
+ row("1", "xyz"),
+ row(" "),
+ row("true", "")
+ ), rows);
+ }
+
+ public void testCsvWithBlankLineSkipBlankLinesFeatureEnabled() throws Exception {
+ String[][] rows = mapperForCsvAsArray()
+ .with(CsvParser.Feature.SKIP_EMPTY_LINES)
+ .readValue(CSV_WITH_BLANK_LINE);
+ // blank line is skipped
+ assertArrayEquals(expected(
+ row("1", "xyz"),
+ row("true", "")
+ ), rows);
+ }
+
+ public void testCsvWithBlankLineAndCommentSkipBlankLinesFeatureDisabled() throws Exception {
+ String[][] rows = mapperForCsvAsArray()
+ .readValue(CSV_WITH_BLANK_LINE_AND_COMMENT);
+ // First, verify default behavior:
+ assertArrayEquals(expected(
+ row("1", "xyz"),
+ row(" "),
+ row(" #comment"),
+ row(""),
+ row("true", "")
+ ), rows);
+ }
+
+ public void testCsvWithBlankLineAndCommentSkipBlankLinesFeatureEnabled() throws Exception {
+ String[][] rows = mapperForCsvAsArray()
+ .with(CsvParser.Feature.SKIP_EMPTY_LINES)
+ .readValue(CSV_WITH_BLANK_LINE_AND_COMMENT);
+ // blank/empty lines are skipped
+ assertArrayEquals(expected(
+ row("1", "xyz"),
+ row(" #comment"),
+ row("true", "")
+ ), rows);
+ }
+
+ public void testCsvWithBlankLineAndCommentSkipBlankLinesFeatureEnabledAndAllowComments() throws Exception {
+ String[][] rows = mapperForCsvAsArray()
+ .with(CsvParser.Feature.SKIP_EMPTY_LINES)
+ .with(CsvParser.Feature.ALLOW_COMMENTS)
+ .readValue(CSV_WITH_BLANK_LINE_AND_COMMENT);
+ // blank/empty/comment lines are skipped
+ assertArrayEquals(expected(
+ row("1", "xyz"),
+ row("true", "")
+ ), rows);
+ }
+
+ public void testCsvWithFirstBlankLineSkipBlankLinesFeatureDisabled() throws Exception {
+ String[][] rows = mapperForCsvAsArray()
+ .readValue(CSV_WITH_FIRST_BLANK_LINE);
+ // First, verify default behavior:
+ assertArrayEquals(expected(
+ row(""),
+ row("1", "xyz"),
+ row("true", "")
+ ), rows);
+ }
+
+ public void testCsvWithFirstBlankLineSkipBlankLinesFeatureEnabled() throws Exception {
+ String[][] rows = mapperForCsvAsArray()
+ .with(CsvParser.Feature.SKIP_EMPTY_LINES)
+ .readValue(CSV_WITH_FIRST_BLANK_LINE);
+ // blank line is skipped
+ assertArrayEquals(expected(
+ row("1", "xyz"),
+ row("true", "")
+ ), rows);
+ }
+
+
+ public void testCsvWithTrailingBlankLineSkipBlankLinesFeatureDisabled() throws Exception {
+ String[][] rows = mapperForCsvAsArray()
+ .readValue(CSV_WITH_TRAILING_BLANK_LINES);
+ // First, verify default behavior:
+ assertArrayEquals(expected(
+ row("1", "xyz"),
+ row("true", ""),
+ row(" "),
+ row("")
+ ), rows);
+ }
+
+ public void testCsvWithTrailingBlankLineSkipBlankLinesFeatureEnabled() throws Exception {
+ String[][] rows = mapperForCsvAsArray()
+ .with(CsvParser.Feature.SKIP_EMPTY_LINES)
+ .readValue(CSV_WITH_FIRST_BLANK_LINE);
+ // blank lines are skipped
+ assertArrayEquals(expected(
+ row("1", "xyz"),
+ row("true", "")
+ ), rows);
+ }
+
+ private ObjectReader mapperForCsvAsArray() {
+ // when wrapped as an array, we'll get array of Lists:
+ return mapperForCsv()
+ .readerFor(String[][].class)
+ .with(CsvParser.Feature.WRAP_AS_ARRAY);
+ }
+
+ private String[][] expected(String[]... rowInputs) {
+ return rowInputs;
+ }
+
+ private String[] row(String... cellInputs) {
+ return cellInputs;
+ }
+}
diff --git a/pom.xml b/pom.xml
index b72f8b7f..84565230 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3,7 +3,7 @@