Skip to content

Commit

Permalink
experiment: try working on issue 368
Browse files Browse the repository at this point in the history
still very early experiment stage. not sure if all test cases
match desired results.

implementation missing, just fixed compile errors

- issue: FasterXML#368
- similar issue: FasterXML#15
- maybe code changes to gain insights: FasterXML@f44a320
  • Loading branch information
tlahn committed Nov 20, 2023
1 parent 783cc72 commit 0020b61
Show file tree
Hide file tree
Showing 3 changed files with 250 additions and 1 deletion.
Expand Up @@ -152,7 +152,17 @@ public enum Feature
*
* Feature is disabled by default
*/
EMPTY_STRING_AS_NULL(false)
EMPTY_STRING_AS_NULL(false),

/**
* Feature that allows skipping input lines that are contain nothing but column separators and whitespaces,
* instead of being decoded as rows of all empty (or, depending on binding, `null`) values.
*<p>
* Feature is disabled by default.
*
* @since 2.18
*/
SKIP_EMPTY_ROWS(false),
;

final boolean _defaultState;
Expand Down
Expand Up @@ -74,6 +74,8 @@ public class CsvDecoder
*/
protected boolean _skipBlankLines; // NOTE: can be final in 3.0, not before

protected boolean _skipEmptyRows;

/**
* Maximum of quote character, linefeeds (\r and \n), escape character.
*/
Expand Down Expand Up @@ -284,6 +286,7 @@ public CsvDecoder(CsvParser owner, IOContext ctxt, Reader r, CsvSchema schema,
_allowComments = oldComments | CsvParser.Feature.ALLOW_COMMENTS.enabledIn(csvFeatures);
_trimSpaces = CsvParser.Feature.TRIM_SPACES.enabledIn(csvFeatures);
_skipBlankLines = CsvParser.Feature.SKIP_EMPTY_LINES.enabledIn(csvFeatures);
_skipEmptyRows = CsvParser.Feature.SKIP_EMPTY_ROWS.enabledIn(csvFeatures);
_inputBuffer = ctxt.allocTokenBuffer();
_bufferRecyclable = true; // since we allocated it
_inputReader = r;
Expand Down
@@ -0,0 +1,236 @@
package com.fasterxml.jackson.dataformat.csv.deser;

import com.fasterxml.jackson.annotation.JsonPropertyOrder;
import com.fasterxml.jackson.databind.ObjectReader;
import com.fasterxml.jackson.dataformat.csv.CsvMapper;
import com.fasterxml.jackson.dataformat.csv.CsvParser;
import com.fasterxml.jackson.dataformat.csv.ModuleTestBase;

import static org.junit.Assert.assertArrayEquals;

// for [dataformats-text#368]: Allow skipping of lines that only contain column separators
public class SkipEmptyRows368Test extends ModuleTestBase {

private static final String CSV_WITH_EMPTY_LINE = "1,\"xyz\"\n,\ntrue,\n";
private static final String CSV_WITH_BLANK_LINE = "1,\"xyz\"\n , \ntrue,\n";
private static final String CSV_WITH_BLANK_LINE_AND_COMMENT = "1,\"xyz\"\n , \n , #comment\n,\ntrue,\n";
private static final String CSV_WITH_FIRST_BLANK_LINE = ",\n1,\"xyz\"\ntrue,\n";
private static final String CSV_WITH_TRAILING_BLANK_LINES = "1,\"xyz\"\ntrue,\n , \n,\n";

@JsonPropertyOrder({ "age", "name", "cute" })
protected static class Entry {
public int age;
public String name;
public boolean cute;
}

// [dataformats-text#368]: Allow skipping of lines that only contain column separators
public void testSkipEmptyLinesFeature() throws Exception
{
final String CSV = "1,\"xyz\"\n,\ntrue,\n";

CsvMapper mapper = mapperForCsv();

// First, verify default behavior:

String[][] rows = mapper
.readerFor(String[][].class)
.with(CsvParser.Feature.WRAP_AS_ARRAY)
.readValue(CSV);
assertEquals(3, rows.length);
String[] row;

row = rows[0];
assertEquals(2, row.length);
assertEquals("1",row[0]);
assertEquals("xyz", row[1]);

row = rows[1];
assertEquals(2, row.length);
assertEquals("", row[0]);
assertEquals("", row[1]);

row = rows[2];
assertEquals(2, row.length);
assertEquals("true", row[0]);
assertEquals("", row[1]);

// when wrapped as an array, we'll get array of Lists:
rows = mapper.readerFor(String[][].class)
.with(CsvParser.Feature.SKIP_EMPTY_ROWS)
.with(CsvParser.Feature.WRAP_AS_ARRAY)
.readValue(CSV);

assertEquals(2, rows.length);
row = rows[0];
assertEquals(2, row.length);
assertEquals("1",row[0]);
assertEquals("xyz", row[1]);

row = rows[1];
assertEquals(2, row.length);
assertEquals("true", row[0]);
assertEquals("", row[1]);
}

public void testCsvWithEmptyLineSkipBlankLinesFeatureDisabled() throws Exception {
String[][] rows = mapperForCsvAsArray().readValue(CSV_WITH_EMPTY_LINE);
// First, verify default behavior:
assertArrayEquals(expected(
row("1", "xyz"),
row("", ""),
row("true", "")
), rows);
}

public void testCsvWithEmptyLineSkipBlankLinesFeatureEnabled() throws Exception {
String[][] rows = mapperForCsvAsArray()
.with(CsvParser.Feature.SKIP_EMPTY_ROWS)
.readValue(CSV_WITH_EMPTY_LINE);
// empty line is skipped
assertArrayEquals(expected(
row("1", "xyz"),
row("true", "")
), rows);
}


public void testCsvWithBlankLineSkipBlankLinesFeatureDisabled() throws Exception {
String[][] rows = mapperForCsvAsArray()
.readValue(CSV_WITH_BLANK_LINE);
// First, verify default behavior:
assertArrayEquals(expected(
row("1", "xyz"),
row(" "," "),
row("true", "")
), rows);
}

public void testCsvWithBlankLineSkipBlankLinesFeatureEnabled() throws Exception {
String[][] rows = mapperForCsvAsArray()
.with(CsvParser.Feature.SKIP_EMPTY_ROWS)
.readValue(CSV_WITH_BLANK_LINE);
// blank line is skipped
assertArrayEquals(expected(
row("1", "xyz"),
row("true", "")
), rows);
}

public void testCsvWithBlankLineAndCommentSkipBlankLinesFeatureDisabled() throws Exception {
String[][] rows = mapperForCsvAsArray()
.readValue(CSV_WITH_BLANK_LINE_AND_COMMENT);
// First, verify default behavior:
assertArrayEquals(expected(
row("1", "xyz"),
row(" ", " "),
row(" "," #comment"),
row("", ""),
row("true", "")
), rows);
}

public void testCsvWithBlankLineAndCommentSkipBlankLinesFeatureDisabledButAllowComments() throws Exception {
String[][] rows = mapperForCsvAsArray()
.with(CsvParser.Feature.ALLOW_COMMENTS)
.readValue(CSV_WITH_BLANK_LINE_AND_COMMENT);
// comment must be removed but remaining empty row must be kept
assertArrayEquals(expected(
row("1", "xyz"),
row("", " "),
row("",""), // #comment
row("", ""),
row("true", "")
), rows);
}

// 14-Apr-2020, tatu: Due to [dataformats-text#191], can not retain leading spaces
// when trimming empty lines and/or comments, so test changed for 2.11
public void testCsvWithBlankLineAndCommentSkipBlankLinesFeatureEnabled() throws Exception {
String[][] rows = mapperForCsvAsArray()
.with(CsvParser.Feature.SKIP_EMPTY_ROWS)
.readValue(CSV_WITH_BLANK_LINE_AND_COMMENT);
// blank/empty lines are skipped
assertArrayEquals(expected(
row("1", "xyz"),
// As per: [dataformats-text#191]
// row(" #comment"),
row(" ", " #comment"),
row("true", "")
), rows);
}

public void testCsvWithBlankLineAndCommentSkipBlankLinesFeatureEnabledAndAllowComments() throws Exception {
String[][] rows = mapperForCsvAsArray()
.with(CsvParser.Feature.SKIP_EMPTY_ROWS)
.with(CsvParser.Feature.ALLOW_COMMENTS)
.readValue(CSV_WITH_BLANK_LINE_AND_COMMENT);
// blank/empty/comment lines are skipped

assertArrayEquals(expected(
row("1", "xyz"),
row("true", "")
), rows);
}

public void testCsvWithFirstBlankLineSkipBlankLinesFeatureDisabled() throws Exception {
String[][] rows = mapperForCsvAsArray()
.readValue(CSV_WITH_FIRST_BLANK_LINE);
// First, verify default behavior:
assertArrayEquals(expected(
row("", ""),
row("1", "xyz"),
row("true", "")
), rows);
}

public void testCsvWithFirstBlankLineSkipBlankLinesFeatureEnabled() throws Exception {
String[][] rows = mapperForCsvAsArray()
.with(CsvParser.Feature.SKIP_EMPTY_ROWS)
.readValue(CSV_WITH_FIRST_BLANK_LINE);
// blank line is skipped
assertArrayEquals(expected(
row("1", "xyz"),
row("true", "")
), rows);
}


public void testCsvWithTrailingBlankLineSkipBlankLinesFeatureDisabled() throws Exception {
String[][] rows = mapperForCsvAsArray()
.readValue(CSV_WITH_TRAILING_BLANK_LINES);
// First, verify default behavior:
assertArrayEquals(expected(
row("1", "xyz"),
row("true", ""),
row(" ", " "),
row("", "")
), rows);
}

public void testCsvWithTrailingBlankLineSkipBlankLinesFeatureEnabled() throws Exception {
String[][] rows = mapperForCsvAsArray()
.with(CsvParser.Feature.SKIP_EMPTY_ROWS)
.readValue(CSV_WITH_FIRST_BLANK_LINE);
// blank lines are skipped
assertArrayEquals(expected(
row("1", "xyz"),
row("true", "")
), rows);
}

private ObjectReader mapperForCsvAsArray() {
// when wrapped as an array, we'll get array of Lists:
return mapperForCsv()
.readerFor(String[][].class)
.with(CsvParser.Feature.WRAP_AS_ARRAY);
}

private String[][] expected(String[]... rowInputs) {
return rowInputs;
}

private String[] row(String... cellInputs) {
return cellInputs;
}
}

0 comments on commit 0020b61

Please sign in to comment.