Skip to content

Commit

Permalink
Allow to skip first lines #738 (#1021)
Browse files Browse the repository at this point in the history
Co-authored-by: Bhuvaneshwara Raja <bhuvaneshwara.raja@ei.study>
  • Loading branch information
bhuvaneshwararaja and Bhuvaneshwara Raja committed Oct 9, 2023
1 parent f1d10ee commit e2d570e
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 3 deletions.
11 changes: 10 additions & 1 deletion docs/docs.html
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,8 @@ <h5 id="config-default">Default Config With All Options</h5>
beforeFirstChunk: undefined,
withCredentials: undefined,
transform: undefined,
delimitersToGuess: [',', '\t', '|', ';', <a href="#readonly">Papa.RECORD_SEP</a>, <a href="#readonly">Papa.UNIT_SEP</a>]
delimitersToGuess: [',', '\t', '|', ';', <a href="#readonly">Papa.RECORD_SEP</a>, <a href="#readonly">Papa.UNIT_SEP</a>],
skipFirstNLines: 0
}</code></pre>
</div>
<div class="clear"></div>
Expand Down Expand Up @@ -682,6 +683,14 @@ <h5 id="config-details">Config Options</h5>
An array of delimiters to guess from if the <code>delimiter</code> option is not set.
</td>
</tr>
<tr>
<td>
<code>skipFirstNLines</code>
</td>
<td>
To skip first N number of lines when converting a CSV file to JSON
</td>
</tr>
</table>
</div>
</div>
Expand Down
19 changes: 17 additions & 2 deletions papaparse.js
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,7 @@ License: MIT
}
}


/** ChunkStreamer is the base prototype for various streamer implementations. */
function ChunkStreamer(config)
{
Expand Down Expand Up @@ -521,8 +522,23 @@ License: MIT

// Rejoin the line we likely just split in two by chunking the file
var aggregate = this._partialLine + chunk;
this._pendingSkip = parseInt(this._config.skipFirstNLines) || 0;
this._skipHeader = 0;
if (this._config.header) {
this._skipHeader++;
}
if (this._pendingSkip > 0) {
var splitChunk = aggregate.split('\n');
var currentChunkLength = splitChunk.length;
if (currentChunkLength <= this._pendingSkip) {
aggregate = this._partialLine;
}
else{
aggregate = this._partialLine + [...splitChunk.slice(0, this._skipHeader), ...splitChunk.slice(this._skipHeader + this._pendingSkip)].join('\n');
}
this._pendingSkip -= currentChunkLength;
}
this._partialLine = '';

var results = this._handle.parse(aggregate, this._baseIndex, !this._finished);

if (this._handle.paused() || this._handle.aborted()) {
Expand Down Expand Up @@ -1931,7 +1947,6 @@ License: MIT
{
return function() { f.apply(self, arguments); };
}

function isFunction(func)
{
return typeof func === 'function';
Expand Down
1 change: 1 addition & 0 deletions player/player.html
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ <h1><a href="http://papaparse.com">Papa Parse</a> Player</h1>
<label><input type="checkbox" id="skipEmptyLines"> Skip empty lines</label>
<label><input type="checkbox" id="step-pause"> Pause on step</label>
<label><input type="checkbox" id="print-steps"> Log each step/chunk</label>
<label>Skip First N Lines: <input type="number" id="skipFirstNLines"></label>

<label>Delimiter: <input type="text" size="5" placeholder="auto" id="delimiter"> <a href="javascript:" id="insert-tab">tab</a></label>

Expand Down
1 change: 1 addition & 0 deletions player/player.js
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ function buildConfig()
skipEmptyLines: $('#skipEmptyLines').prop('checked'),
chunk: $('#chunk').prop('checked') ? chunkFn : undefined,
beforeFirstChunk: undefined,
skipFirstNLines: $('#skipFirstNLines').val()
};

function getLineEnding()
Expand Down
36 changes: 36 additions & 0 deletions tests/test-cases.js
Original file line number Diff line number Diff line change
Expand Up @@ -1574,6 +1574,42 @@ var PARSE_TESTS = [
data: [['a', 'b', 'c\n'], ['d', 'e', 'f']],
errors: []
}
},
{
description: "Skip First N number of lines , with header and 2 rows",
input: 'a,b,c,d\n1,2,3,4',
config: { header: true, skipFirstNLines: 1 },
expected: {
data: [],
errors: []
}
},
{
description: "Skip First N number of lines , with header and 3 rows",
input: 'a,b,c,d\n1,2,3,4\n4,5,6,7',
config: { header: true, skipFirstNLines: 1 },
expected: {
data: [{a: '4', b: '5', c: '6', d: '7'}],
errors: []
}
},
{
description: "Skip First N number of lines , with header false",
input: 'a,b,c,d\n1,2,3,4\n4,5,6,7',
config: { header: false, skipFirstNLines: 1 },
expected: {
data: [['1','2','3','4'],['4','5','6','7']],
errors: []
}
},
{
description: "Skip First N number of lines , with header false and skipFirstNLines as negative value",
input: 'a,b,c,d\n1,2,3,4\n4,5,6,7',
config: { header: false, skipFirstNLines: -2 },
expected: {
data: [['a','b','c','d'],['1','2','3','4'],['4','5','6','7']],
errors: []
}
}
];

Expand Down

0 comments on commit e2d570e

Please sign in to comment.