Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generate headers in Parser, based upon presence of _fields, without changing _input (fixes issue #985) #989

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
113 changes: 44 additions & 69 deletions papaparse.js
Expand Up @@ -1097,11 +1097,8 @@ License: MIT
}

var parserConfig = copy(_config);
if (_config.preview && _config.header)
parserConfig.preview++; // to compensate for header row

_input = input;
_parser = new Parser(parserConfig);
_parser = new Parser(parserConfig, _fields);
_results = _parser.parse(_input, baseIndex, ignoreLastRow);
processResults();
return _paused ? { meta: { paused: true } } : (_results || { meta: { paused: false } });
Expand Down Expand Up @@ -1178,8 +1175,9 @@ License: MIT
});
}

if (needsHeaderRow())
fillHeaderFields();
if (needsHeaderRow() && _results.meta.fields) {
_fields = _fields.concat(_results.meta.fields);
}

return applyHeaderAndDynamicTypingAndTransformation();
}
Expand All @@ -1189,31 +1187,6 @@ License: MIT
return _config.header && _fields.length === 0;
}

function fillHeaderFields()
{
if (!_results)
return;

function addHeader(header, i)
{
if (isFunction(_config.transformHeader))
header = _config.transformHeader(header, i);

_fields.push(header);
}

if (Array.isArray(_results.data[0]))
{
for (var i = 0; needsHeaderRow() && i < _results.data.length; i++)
_results.data[i].forEach(addHeader);

_results.data.splice(0, 1);
}
// if _results.data[0] is not an array, we are in a step where _results.data is the row.
else
_results.data.forEach(addHeader);
}

function shouldApplyDynamicTyping(field) {
// Cache function values to avoid calling it for each row
if (_config.dynamicTypingFunction && _config.dynamicTyping[field] === undefined) {
Expand Down Expand Up @@ -1316,7 +1289,7 @@ License: MIT
delimiter: delim,
newline: newline,
preview: 10
}).parse(input);
}, _fields).parse(input);

for (var j = 0; j < preview.data.length; j++) {
if (skipEmptyLines && testEmptyLine(preview.data[j])) {
Expand Down Expand Up @@ -1402,7 +1375,7 @@ License: MIT
}

/** The core parser implements speedy and correct CSV parsing */
function Parser(config)
function Parser(config, _fields)
{
// Unpack the config object
config = config || {};
Expand Down Expand Up @@ -1444,6 +1417,7 @@ License: MIT
// We're gonna need these at the Parser scope
var cursor = 0;
var aborted = false;
var fields = _fields;

this.parse = function(input, baseIndex, ignoreLastRow)
{
Expand All @@ -1466,40 +1440,6 @@ License: MIT
if (!input)
return returnable();

// Rename headers if there are duplicates
if (config.header && !baseIndex)
{
var firstLine = input.split(newline)[0];
var headers = firstLine.split(delim);
var separator = '_';
var headerMap = [];
var headerCount = {};
var duplicateHeaders = false;

for (var j in headers) {
var header = headers[j];
if (isFunction(config.transformHeader))
header = config.transformHeader(header, j);
var headerName = header;

var count = headerCount[header] || 0;
if (count > 0) {
duplicateHeaders = true;
headerName = header + separator + count;
}
headerCount[header] = count + 1;
// In case it already exists, we add more separtors
while (headerMap.includes(headerName)) {
headerName = headerName + separator + count;
}
headerMap.push(headerName);
}
if (duplicateHeaders) {
var editedInput = input.split(newline);
editedInput[0] = headerMap.join(delim);
input = editedInput.join(newline);
}
}
if (fastMode || (fastMode !== false && input.indexOf(quoteChar) === -1))
{
var rows = input.split(newline);
Expand Down Expand Up @@ -1707,10 +1647,40 @@ License: MIT

function pushRow(row)
{
data.push(row);
if (config.header && (fields.length === 0))
buildHeaders(row);
else
data.push(row);
lastCursor = cursor;
}

function buildHeaders(headers) {
// duplicate headers will have '_x" appended to them.
var separator = '_';
var headerMap = [];
var headerCount = {};

for (var j in headers) {
var header = headers[j];
if (isFunction(config.transformHeader))
header = config.transformHeader(header, j);
var headerName = header;

var count = headerCount[header] || 0;
if (count > 0) {
headerName = header + separator + count;
}
headerCount[header] = count + 1;
// In case it already exists, we add more separtors
while (headerMap.includes(headerName)) {
headerName = headerName + separator + count;
}
headerMap.push(headerName);
}

fields = headerMap;
}

/**
* checks if there are extra spaces after closing quote and given index without any text
* if Yes, returns the number of spaces
Expand Down Expand Up @@ -1761,7 +1731,7 @@ License: MIT
/** Returns an object with the results, errors, and meta. */
function returnable(stopped)
{
return {
var results = {
data: data,
errors: errors,
meta: {
Expand All @@ -1772,6 +1742,11 @@ License: MIT
cursor: lastCursor + (baseIndex || 0)
}
};

if (config.header && fields.length > 0)
results.meta.fields = fields.concat([]);

return results;
}

/** Executes the user's step function and resets data & errors. */
Expand Down
62 changes: 55 additions & 7 deletions tests/test-cases.js
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changes to existing tests were required to reflect that:

  • Parser now requires an array as its second argument for its constructor
  • Parser.parse() no longer returns the header row as data

Expand Up @@ -591,34 +591,43 @@ var CORE_PARSER_TESTS = [
input: 'A,A,A,A\n1,2,3,4',
config: { header: true },
expected: {
data: [['A', 'A_1', 'A_2', 'A_3'], ['1', '2', '3', '4']],
errors: []
data: [['1', '2', '3', '4']],
errors: [],
meta: {
fields: ['A', 'A_1', 'A_2', 'A_3']
}
}
},
{
description: "Duplicate header names with headerTransform",
input: 'A,A,A,A\n1,2,3,4',
config: { header: true, transformHeader: function(header) { return header.toLowerCase(); } },
expected: {
data: [['a', 'a_1', 'a_2', 'a_3'], ['1', '2', '3', '4']],
errors: []
data: [['1', '2', '3', '4']],
errors: [],
meta: {
fields: ['a', 'a_1', 'a_2', 'a_3']
}
}
},
{
description: "Duplicate header names existing column",
input: 'c,c,c,c_1\n1,2,3,4',
config: { header: true },
expected: {
data: [['c', 'c_1', 'c_2', 'c_1_0'], ['1', '2', '3', '4']],
errors: []
data: [['1', '2', '3', '4']],
errors: [],
meta: {
fields: ['c', 'c_1', 'c_2', 'c_1_0']
}
}
},
];

describe('Core Parser Tests', function() {
function generateTest(test) {
(test.disabled ? it.skip : it)(test.description, function() {
var actual = new Papa.Parser(test.config).parse(test.input);
var actual = new Papa.Parser(test.config, []).parse(test.input);
assert.deepEqual(actual.errors, test.expected.errors);
assert.deepEqual(actual.data, test.expected.data);
});
Expand Down Expand Up @@ -2674,6 +2683,45 @@ var CUSTOM_TESTS = [
});
}
},
{
description: "Pause and resume works with headers and duplicate fields (Regression Test for Bug #985)",
expected: [[
["Column 1", "Column 2", "Column 3", "Column 4"],
["Column 1", "Column 2", "Column 3", "Column 4"],
], [
{ "Column 1": "R1C1", "Column 2": "", "Column 3": "R1C3", "Column 4": "" },
{ "Column 1": "R2C1", "Column 2": "", "Column 3": "", "Column 4": "" },
]],
run: function(callback) {
var inputString = [
"Column 1,Column 2,Column 3,Column 4",
"R1C1,,R1C3,",
"R2C1,,,"
].join("\n");
var output = [];
var dataRows = [];
var headerResults = [];
Papa.parse(inputString, {
header: true,
step: function(results, parser) {
if (results)
{
headerResults.push(results.meta.fields);
parser.pause();
parser.resume();
if (results.data) {
dataRows.push(results.data);
}
}
},
complete: function() {
output.push(headerResults);
output.push(dataRows);
callback(output);
}
});
}
},
];

describe('Custom Tests', function() {
Expand Down