Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Strict quoting flag #730

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
58 changes: 47 additions & 11 deletions papaparse.js
Expand Up @@ -1393,6 +1393,8 @@ License: MIT
var step = config.step;
var preview = config.preview;
var fastMode = config.fastMode;
var _strictQuote = config.strictQuote;
var strictQuote = _strictQuote === undefined ? false : Boolean(_strictQuote);
var quoteChar;
/** Allows for no quoteChar by setting quoteChar to undefined in config */
if (config.quoteChar === undefined) {
Expand Down Expand Up @@ -1484,19 +1486,22 @@ License: MIT
var nextNewline = input.indexOf(newline, cursor);
var quoteCharRegex = new RegExp(escapeRegExp(escapeChar) + escapeRegExp(quoteChar), 'g');
var quoteSearch = input.indexOf(quoteChar, cursor);
var savedNextDelim;
var savedNextNewline;
var savedQuoteSearch;

// Parser loop
for (;;)
{
// Field has opening quote
if (input[cursor] === quoteChar)
{
var quoteFallThrough = false;
quoteSaveState();

// Start our search for the closing quote where the cursor is
quoteSearch = cursor;

// Skip the opening quote
cursor++;

for (;;)
{
// Find closing quote
Expand All @@ -1505,23 +1510,36 @@ License: MIT
//No other quotes are found - no other delimiters
if (quoteSearch === -1)
{
if (!ignoreLastRow) {
// No closing quote... what a pity
if(ignoreLastRow)
return returnable();

if(strictQuote) {
errors.push({
type: 'Quotes',
code: 'MissingQuotes',
message: 'Quoted field unterminated',
row: data.length, // row has yet to be inserted
index: cursor
});
quoteRestoreState();
quoteFallThrough = true;
break; // fall through to parse as non-quote.
}
return finish();
// No closing quote... what a pity
errors.push({
type: 'Quotes',
code: 'MissingQuotes',
message: 'Quoted field unterminated',
row: data.length, // row has yet to be inserted
index: cursor
});
return finish(input.substring(cursor + 1));
}

// Closing quote at EOF
if (quoteSearch === inputLen - 1)
{
var value = input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar);
var value = input.substring(cursor + 1, quoteSearch).replace(quoteCharRegex, quoteChar);
return finish(value);
}

Expand Down Expand Up @@ -1552,7 +1570,7 @@ License: MIT
// Closing quote followed by delimiter or 'unnecessary spaces + delimiter'
if (input[quoteSearch + 1 + spacesBetweenQuoteAndDelimiter] === delim)
{
row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
row.push(input.substring(cursor + 1, quoteSearch).replace(quoteCharRegex, quoteChar));
cursor = quoteSearch + 1 + spacesBetweenQuoteAndDelimiter + delimLen;

// If char after following delimiter is not quoteChar, we find next quote char position
Expand All @@ -1570,7 +1588,7 @@ License: MIT
// Closing quote followed by newline or 'unnecessary spaces + newLine'
if (input.substring(quoteSearch + 1 + spacesBetweenQuoteAndNewLine, quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen) === newline)
{
row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
row.push(input.substring(cursor + 1, quoteSearch).replace(quoteCharRegex, quoteChar));
saveRow(quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen);
nextDelim = input.indexOf(delim, cursor); // because we may have skipped the nextDelim in the quoted field
quoteSearch = input.indexOf(quoteChar, cursor); // we search for first quote in next line
Expand Down Expand Up @@ -1598,12 +1616,18 @@ License: MIT
index: cursor
});

if(strictQuote) {
quoteRestoreState();
quoteFallThrough = true;
break; // fall through to parse as non-quote.
}
quoteSearch++;
continue;

}

continue;
if(!quoteFallThrough) {
continue;
}
}

// Comment found at start of new line
Expand Down Expand Up @@ -1779,6 +1803,18 @@ License: MIT

return result;
}

function quoteSaveState() {
savedNextDelim = nextDelim;
savedNextNewline = nextNewline;
savedQuoteSearch = quoteSearch;
}

function quoteRestoreState() {
quoteSearch = savedQuoteSearch;
nextNewline = savedNextNewline;
nextDelim = savedNextDelim;
}
};

/** Sets the abort flag */
Expand Down
47 changes: 39 additions & 8 deletions tests/test-cases.js
Expand Up @@ -194,7 +194,7 @@ var CORE_PARSER_TESTS = [
"code": "MissingQuotes",
"message": "Quoted field unterminated",
"row": 0,
"index": 3
"index": 2
}]
}
},
Expand All @@ -209,7 +209,7 @@ var CORE_PARSER_TESTS = [
"code": "InvalidQuotes",
"message": "Trailing quote on quoted field is malformed",
"row": 0,
"index": 1
"index": 0
}]
}
},
Expand All @@ -224,14 +224,14 @@ var CORE_PARSER_TESTS = [
"code": "InvalidQuotes",
"message": "Trailing quote on quoted field is malformed",
"row": 0,
"index": 3
"index": 2
},
{
"type": "Quotes",
"code": "MissingQuotes",
"message": "Quoted field unterminated",
"row": 0,
"index": 3
"index": 2
}]
}
},
Expand All @@ -246,14 +246,14 @@ var CORE_PARSER_TESTS = [
"code": "InvalidQuotes",
"message": "Trailing quote on quoted field is malformed",
"row": 0,
"index": 3
"index": 2
},
{
"type": "Quotes",
"code": "MissingQuotes",
"message": "Quoted field unterminated",
"row": 0,
"index": 3
"index": 2
}]
}
},
Expand All @@ -268,14 +268,14 @@ var CORE_PARSER_TESTS = [
"code": "InvalidQuotes",
"message": "Trailing quote on quoted field is malformed",
"row": 0,
"index": 3
"index": 2
},
{
"type": "Quotes",
"code": "MissingQuotes",
"message": "Quoted field unterminated",
"row": 0,
"index": 3
"index": 2
}]
}
},
Expand Down Expand Up @@ -585,6 +585,37 @@ var CORE_PARSER_TESTS = [
data: [['a', 'b', 'c'], ['']],
errors: []
}
},
{
description: "Quoted field has invalid trailing quote after delimiter with a valid closer in strict quote mode",
input: '"a,"b,c"\nd,e,f',
notes: "The input is malformed, opening quotes identified, trailing quote is malformed. Trailing quote should be escaped or followed by valid new line or delimiter to be valid",
config: { strictQuote: true },
expected: {
data: [['"a','b,c'], ['d', 'e', 'f']],
errors: [{
"type": "Quotes",
"code": "InvalidQuotes",
"message": "Trailing quote on quoted field is malformed",
"row": 0,
"index": 0
}]
}
},
{
description: "Quoted field has no closing quote in strict quote mode",
input: 'a,"b,c\nd,e,f',
config: { strictQuote: true },
expected: {
data: [['a','"b','c'],['d','e','f']],
errors: [{
"type": "Quotes",
"code": "MissingQuotes",
"message": "Quoted field unterminated",
"row": 0,
"index": 2
}]
}
}
];

Expand Down