Skip to content

Commit

Permalink
Skip lines future (Feature Suggestion) #738
Browse files Browse the repository at this point in the history
  • Loading branch information
Bhuvaneshwara Raja committed Sep 25, 2023
1 parent 841e1d4 commit b133cfb
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 3 deletions.
11 changes: 10 additions & 1 deletion docs/docs.html
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,8 @@ <h5 id="config-default">Default Config With All Options</h5>
beforeFirstChunk: undefined,
withCredentials: undefined,
transform: undefined,
delimitersToGuess: [',', '\t', '|', ';', <a href="#readonly">Papa.RECORD_SEP</a>, <a href="#readonly">Papa.UNIT_SEP</a>]
delimitersToGuess: [',', '\t', '|', ';', <a href="#readonly">Papa.RECORD_SEP</a>, <a href="#readonly">Papa.UNIT_SEP</a>],
skipFirstNLines: 0
}</code></pre>
</div>
<div class="clear"></div>
Expand Down Expand Up @@ -682,6 +683,14 @@ <h5 id="config-details">Config Options</h5>
An array of delimiters to guess from if the <code>delimiter</code> option is not set.
</td>
</tr>
<tr>
<td>
<code>skipFirstNLines</code>
</td>
<td>
To skip first N number of lines when converting a CSV file to JSON
</td>
</tr>
</table>
</div>
</div>
Expand Down
20 changes: 18 additions & 2 deletions papaparse.js
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,10 @@ License: MIT
}
}

function customSplice(inputArray, startIndex, count) {
return [...inputArray.slice(0, startIndex), ...inputArray.slice(startIndex + count)];
}

/** ChunkStreamer is the base prototype for various streamer implementations. */
function ChunkStreamer(config)
{
Expand Down Expand Up @@ -519,8 +523,21 @@ License: MIT

// Rejoin the line we likely just split in two by chunking the file
var aggregate = this._partialLine + chunk;
this._skipLines = parseInt(this._config.skipFirstNLines) || 0;
this._pendingSkip = parseInt(this._config.skipFirstNLines) || 0;
this._skipHeader = this._config.header ? 1 : 0;
if (this._pendingSkip > 0 && this._pendingSkip <= this._skipLines) {
var splitChunk = aggregate.split('\n');
var currentChunkLength = splitChunk.length;
if (currentChunkLength <= this._pendingSkip) {
aggregate = this._partialLine;
}
else{
aggregate = this._partialLine + customSplice(splitChunk,this._skipHeader,this._pendingSkip).join('\n');
}
this._pendingSkip = this._skipLines - currentChunkLength;
}
this._partialLine = '';

var results = this._handle.parse(aggregate, this._baseIndex, !this._finished);

if (this._handle.paused() || this._handle.aborted()) {
Expand Down Expand Up @@ -1929,7 +1946,6 @@ License: MIT
{
return function() { f.apply(self, arguments); };
}

function isFunction(func)
{
return typeof func === 'function';
Expand Down
1 change: 1 addition & 0 deletions player/player.html
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ <h1><a href="http://papaparse.com">Papa Parse</a> Player</h1>
<label><input type="checkbox" id="skipEmptyLines"> Skip empty lines</label>
<label><input type="checkbox" id="step-pause"> Pause on step</label>
<label><input type="checkbox" id="print-steps"> Log each step/chunk</label>
<label>Skip First N Lines: <input type="number" id="skipFirstNLines"></label>

<label>Delimiter: <input type="text" size="5" placeholder="auto" id="delimiter"> <a href="javascript:" id="insert-tab">tab</a></label>

Expand Down
1 change: 1 addition & 0 deletions player/player.js
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ function buildConfig()
skipEmptyLines: $('#skipEmptyLines').prop('checked'),
chunk: $('#chunk').prop('checked') ? chunkFn : undefined,
beforeFirstChunk: undefined,
skipFirstNLines: $('#skipFirstNLines').val()
};

function getLineEnding()
Expand Down
54 changes: 54 additions & 0 deletions tests/test-cases.js
Original file line number Diff line number Diff line change
Expand Up @@ -1574,6 +1574,60 @@ var PARSE_TESTS = [
data: [['a', 'b', 'c\n'], ['d', 'e', 'f']],
errors: []
}
},
{
description: "Skip First N number of lines , with header and 2 rows",
input: 'a,b,c,d\n1,2,3,4',
config: { header: true, skipFirstNLines: 1 },
expected: {
data: [],
errors: []
}
},
{
description: "Skip First N number of lines , with header and 3 rows",
input: 'a,b,c,d\n1,2,3,4\n4,5,6,7',
config: { header: true, skipFirstNLines: 1 },
expected: {
data: [{a: '4', b: '5', c: '6', d: '7'}],
errors: []
}
},
{
description: "Skip First N number of lines , with header false",
input: 'a,b,c,d\n1,2,3,4\n4,5,6,7',
config: { header: false, skipFirstNLines: 1 },
expected: {
data: [['1','2','3','4'],['4','5','6','7']],
errors: []
}
},
{
description: "Skip First N number of lines , with header false and skipFirstNLines as 0",
input: 'a,b,c,d\n1,2,3,4\n4,5,6,7',
config: { header: false, skipFirstNLines: 0 },
expected: {
data: [['a','b','c','d'],['1','2','3','4'],['4','5','6','7']],
errors: []
}
},
{
description: "Skip First N number of lines , with header false and skipFirstNLines as negative value",
input: 'a,b,c,d\n1,2,3,4\n4,5,6,7',
config: { header: false, skipFirstNLines: -2 },
expected: {
data: [['a','b','c','d'],['1','2','3','4'],['4','5','6','7']],
errors: []
}
},
{
description: "Without Skip First N number of lines",
input: 'a,b,c,d\n1,2,3,4\n4,5,6,7',
config: { header: false},
expected: {
data: [['a','b','c','d'],['1','2','3','4'],['4','5','6','7']],
errors: []
}
}
];

Expand Down

0 comments on commit b133cfb

Please sign in to comment.