Skip to content

Commit

Permalink
Fix #285 (add CsvParser.Feature.FAIL_ON_MISSING_HEADER_COLUMNS)
Browse files Browse the repository at this point in the history
  • Loading branch information
cowtowncoder committed Aug 21, 2022
1 parent c3d7deb commit 88a278f
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import java.io.*;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.LinkedHashSet;
import java.util.Set;

import com.fasterxml.jackson.core.*;
import com.fasterxml.jackson.core.base.ParserMinimalBase;
Expand Down Expand Up @@ -106,7 +108,7 @@ public enum Feature
/**
* Feature that allows failing (with a {@link CsvReadException}) in cases
* where number of column values encountered is less than number of columns
* declared in active schema ("missing columns").
* declared in the active schema ("missing columns").
*<p>
* Note that this feature has precedence over {@link #INSERT_NULLS_FOR_MISSING_COLUMNS}
*<p>
Expand All @@ -116,6 +118,17 @@ public enum Feature
*/
FAIL_ON_MISSING_COLUMNS(false),

/**
* Feature that allows failing (with a {@link CsvReadException}) in cases
* where number of header columns encountered is less than number of columns
* declared in the active schema (if there is one).
*<p>
* Feature is enabled by default
*
* @since 2.14
*/
FAIL_ON_MISSING_HEADER_COLUMNS(true),

/**
* Feature that allows "inserting" virtual key / `null` value pairs in case
* a row contains fewer columns than declared by configured schema.
Expand Down Expand Up @@ -784,7 +797,8 @@ protected void _readHeaderLine() throws IOException {
default schema based on the columns found in the header.
*/

if (_schema.size() > 0 && !_schema.reordersColumns()) {
final int schemaColumnCount = _schema.size();
if (schemaColumnCount > 0 && !_schema.reordersColumns()) {
if (_schema.strictHeaders()) {
String name;
int ix = 0;
Expand Down Expand Up @@ -840,13 +854,24 @@ protected void _readHeaderLine() throws IOException {

// Ok: did we get any columns?
CsvSchema newSchema = builder.build();
int size = newSchema.size();
if (size < 2) { // 1 just because we may get 'empty' header name
String first = (size == 0) ? "" : newSchema.columnName(0).trim();
int newColumnCount = newSchema.size();
if (newColumnCount < 2) { // 1 just because we may get 'empty' header name
String first = (newColumnCount == 0) ? "" : newSchema.columnName(0).trim();
if (first.length() == 0) {
_reportCsvMappingError("Empty header line: can not bind data");
}
}
// [dataformats-text#285]: Are we missing something?
int diff = schemaColumnCount - newColumnCount;
if (diff > 0) {
Set<String> oldColumnNames = new LinkedHashSet<>();
_schema.getColumnNames(oldColumnNames);
oldColumnNames.removeAll(newSchema.getColumnNames());
_reportCsvMappingError(String.format("Missing %d header column%s: [\"%s\"]",
diff, (diff == 1) ? "" : "s",
String.join("\",\"", oldColumnNames)));
}

// otherwise we will use what we got
setSchema(builder.build());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -927,7 +927,7 @@ public CsvSchema(Column[] columns, int features,
if (_columns.length == 0) {
_columnsByName = Collections.emptyMap();
} else {
_columnsByName = new HashMap<String,Column>(4 + _columns.length);
_columnsByName = new LinkedHashMap<String,Column>(4 + _columns.length);
for (Column c : _columns) {
_columnsByName.put(c.getName(), c);
}
Expand Down Expand Up @@ -976,7 +976,7 @@ protected CsvSchema(CsvSchema base, Column[] columns)
if (_columns.length == 0) {
_columnsByName = Collections.emptyMap();
} else {
_columnsByName = new HashMap<String,Column>(4 + _columns.length);
_columnsByName = new LinkedHashMap<String,Column>(4 + _columns.length);
for (Column c : _columns) {
_columnsByName.put(c.getName(), c);
}
Expand Down Expand Up @@ -1407,7 +1407,7 @@ public String getNullValueString() {
/* Public API, extended; column access
/**********************************************************************
*/

@Override
public Iterator<Column> iterator() {
return Arrays.asList(_columns).iterator();
Expand Down Expand Up @@ -1456,7 +1456,31 @@ public Column column(String name, int probableIndex) {
}
return _columnsByName.get(name);
}


/**
* Accessor for getting names of included columns, in the order they are
* included in the schema.
*
* @since 2.14
*/
public List<String> getColumnNames() {
return (List<String>) getColumnNames(new ArrayList<String>(_columns.length));
}

/**
* Accessor for getting names of included columns, added in given
* {@code Collection}.
*
* @since 2.14
*/
public Collection<String> getColumnNames(Collection<String> names) {
final int len = _columns.length;
for (int i = 0; i < len; ++i) {
names.add(_columns[i].getName());
}
return names;
}

/**
* Method for getting description of column definitions in
* developer-readable form
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,17 @@ public void testMissingWithReorder() throws Exception
.addColumn("name").addColumn("age").build();
final String CSV = "name\n"
+"Roger\n";
MappingIterator<Map<String, Object>> it = MAPPER
.readerFor(Map.class)
.with(csvSchema)
.readValues(CSV);
// Need to have it all inside try block since construction tries to read
// the first token
try {
MappingIterator<Map<String, Object>> it = MAPPER
.readerFor(Map.class)
.with(csvSchema)
.readValues(CSV);
it.nextValue();
fail("Should not pass with missing columns");
} catch (CsvReadException e) {
verifyException(e, "Not enough column values");
verifyException(e, "expected 2, found 1");
verifyException(e, "Missing 1 header column: [\"age\"]");
}
}
}
5 changes: 4 additions & 1 deletion release-notes/CREDITS-2.x
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,11 @@ Francesco Tumanischvili (frantuma@github)
(2.11.1)
Björn Michael (bjmi@github)
* Reported #204: `CsvParser.Feature.ALLOW_TRAILING_COMMA` doesn't work with header columns
* Reported #204: (csv) `CsvParser.Feature.ALLOW_TRAILING_COMMA` doesn't work with header columns
(2.11.2)
* Reported #285: (csv) Missing columns from header line (compare to `CsvSchema`) not detected
when reordering columns (add `CsvParser.Feature.FAIL_ON_MISSING_HEADER_COLUMNS`)
(2.14.0)

Jesper Nielsen (jn-asseco@github)
* Requested #175: (yaml) Add `YAMLGenerator.Feature.INDENT_ARRAYS_WITH_INDICATOR`
Expand Down
3 changes: 3 additions & 0 deletions release-notes/VERSION-2.x
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ Active Maintainers:

2.14.0 (not yet released)

#285: (csv) Missing columns from header line (compare to `CsvSchema`) not detected
when reordering columns (add `CsvParser.Feature.FAIL_ON_MISSING_HEADER_COLUMNS`)
(reported by Björn M)
#297: (csv) CSV schema caching POJOs with different views
(contributed by Falk H)
#314: (csv) Add fast floating-point parsing, generation support
Expand Down

0 comments on commit 88a278f

Please sign in to comment.