Skip to content

Commit

Permalink
fix: retain line breaks in tokens properly (#2341)
Browse files Browse the repository at this point in the history
* Fix lexer and tokenizer to retain line breaks properly

* Add test for bug

* Check for line breaks not just spaces

* Fix lint

* Fix spacing in test

* clean up code

Co-authored-by: Tony Brix <tony@brix.ninja>
  • Loading branch information
phillipb and UziTech committed Jan 6, 2022
1 parent 6aacd13 commit a9696e2
Show file tree
Hide file tree
Showing 4 changed files with 177 additions and 68 deletions.
6 changes: 5 additions & 1 deletion src/Lexer.js
Expand Up @@ -152,7 +152,11 @@ export class Lexer {
// newline
if (token = this.tokenizer.space(src)) {
src = src.substring(token.raw.length);
if (token.type) {
if (token.raw.length === 1 && tokens.length > 0) {
// if there's a single \n as a spacer, it's terminating the last line,
// so move it there so that we don't get unecessary paragraph tags
tokens[tokens.length - 1].raw += '\n';
} else {
tokens.push(token);
}
continue;
Expand Down
32 changes: 23 additions & 9 deletions src/Tokenizer.js
Expand Up @@ -72,14 +72,11 @@ export class Tokenizer {

space(src) {
const cap = this.rules.block.newline.exec(src);
if (cap) {
if (cap[0].length > 1) {
return {
type: 'space',
raw: cap[0]
};
}
return { raw: '\n' };
if (cap && cap[0].length > 0) {
return {
type: 'space',
raw: cap[0]
};
}
}

Expand Down Expand Up @@ -303,7 +300,24 @@ export class Tokenizer {
for (i = 0; i < l; i++) {
this.lexer.state.top = false;
list.items[i].tokens = this.lexer.blockTokens(list.items[i].text, []);
if (!list.loose && list.items[i].tokens.some(t => t.type === 'space')) {
const spacers = list.items[i].tokens.filter(t => t.type === 'space');
const hasMultipleLineBreaks = spacers.every(t => {
const chars = t.raw.split('');
let lineBreaks = 0;
for (const char of chars) {
if (char === '\n') {
lineBreaks += 1;
}
if (lineBreaks > 1) {
return true;
}
}

return false;
});

if (!list.loose && spacers.length && hasMultipleLineBreaks) {
// Having a single line break doesn't mean a list is loose. A single line break is terminating the last list item
list.loose = true;
list.items[i].loose = true;
}
Expand Down
206 changes: 148 additions & 58 deletions test/unit/Lexer-spec.js
Expand Up @@ -93,6 +93,10 @@ lheading 2
----------
`,
tokens: [
{
type: 'space',
raw: '\n'
},
{
type: 'heading',
raw: '# heading 1\n\n',
Expand Down Expand Up @@ -175,6 +179,9 @@ lheading 2
| 1 | 2 |
`,
tokens: [{
type: 'space',
raw: '\n'
}, {
type: 'table',
align: [null, null],
raw: '| a | b |\n|---|---|\n| 1 | 2 |\n',
Expand Down Expand Up @@ -212,40 +219,42 @@ paragraph 1
|---|---|
| 1 | 2 |
`,
tokens: [
{
type: 'paragraph',
raw: 'paragraph 1',
text: 'paragraph 1',
tokens: [{ type: 'text', raw: 'paragraph 1', text: 'paragraph 1' }]
},
{
type: 'table',
align: [null, null],
raw: '| a | b |\n|---|---|\n| 1 | 2 |\n',
header: [
tokens: [{
type: 'space',
raw: '\n'
}, {
type: 'paragraph',
raw: 'paragraph 1\n',
text: 'paragraph 1',
tokens: [{ type: 'text', raw: 'paragraph 1', text: 'paragraph 1' }]
},
{
type: 'table',
align: [null, null],
raw: '| a | b |\n|---|---|\n| 1 | 2 |\n',
header: [
{
text: 'a',
tokens: [{ type: 'text', raw: 'a', text: 'a' }]
},
{
text: 'b',
tokens: [{ type: 'text', raw: 'b', text: 'b' }]
}
],
rows: [
[
{
text: 'a',
tokens: [{ type: 'text', raw: 'a', text: 'a' }]
text: '1',
tokens: [{ type: 'text', raw: '1', text: '1' }]
},
{
text: 'b',
tokens: [{ type: 'text', raw: 'b', text: 'b' }]
text: '2',
tokens: [{ type: 'text', raw: '2', text: '2' }]
}
],
rows: [
[
{
text: '1',
tokens: [{ type: 'text', raw: '1', text: '1' }]
},
{
text: '2',
tokens: [{ type: 'text', raw: '2', text: '2' }]
}
]
]
}
]
}
]
});
});
Expand All @@ -258,6 +267,9 @@ paragraph 1
| 1 | 2 | 3 |
`,
tokens: [{
type: 'space',
raw: '\n'
}, {
type: 'table',
align: ['left', 'center', 'right'],
raw: '| a | b | c |\n|:--|:-:|--:|\n| 1 | 2 | 3 |\n',
Expand Down Expand Up @@ -302,33 +314,37 @@ a | b
--|--
1 | 2
`,
tokens: [{
type: 'table',
align: [null, null],
raw: 'a | b\n--|--\n1 | 2\n',
header: [
{
text: 'a',
tokens: [{ type: 'text', raw: 'a', text: 'a' }]
},
{
text: 'b',
tokens: [{ type: 'text', raw: 'b', text: 'b' }]
}
],
rows: [
[
tokens: [
{
type: 'space',
raw: '\n'
}, {
type: 'table',
align: [null, null],
raw: 'a | b\n--|--\n1 | 2\n',
header: [
{
text: '1',
tokens: [{ type: 'text', raw: '1', text: '1' }]
text: 'a',
tokens: [{ type: 'text', raw: 'a', text: 'a' }]
},
{
text: '2',
tokens: [{ type: 'text', raw: '2', text: '2' }]
text: 'b',
tokens: [{ type: 'text', raw: 'b', text: 'b' }]
}
],
rows: [
[
{
text: '1',
tokens: [{ type: 'text', raw: '1', text: '1' }]
},
{
text: '2',
tokens: [{ type: 'text', raw: '2', text: '2' }]
}
]
]
]
}]
}]
});
});
});
Expand All @@ -342,6 +358,19 @@ a | b
]
});
});

it('after line break does not consume raw \n', () => {
expectTokens({
md: 'T\nh\n---',
tokens:
jasmine.arrayContaining([
jasmine.objectContaining({
raw: 'T\nh\n'
}),
{ type: 'hr', raw: '---' }
])
});
});
});

describe('blockquote', () => {
Expand Down Expand Up @@ -376,8 +405,11 @@ a | b
`,
tokens: [
{
type: 'space',
raw: '\n'
}, {
type: 'list',
raw: '- item 1\n- item 2',
raw: '- item 1\n- item 2\n',
ordered: false,
start: '',
loose: false,
Expand Down Expand Up @@ -423,9 +455,13 @@ a | b
2. item 2
`,
tokens: jasmine.arrayContaining([
jasmine.objectContaining({
type: 'space',
raw: '\n'
}),
jasmine.objectContaining({
type: 'list',
raw: '1. item 1\n2. item 2',
raw: '1. item 1\n2. item 2\n',
ordered: true,
start: 1,
items: [
Expand All @@ -448,9 +484,13 @@ a | b
2) item 2
`,
tokens: jasmine.arrayContaining([
jasmine.objectContaining({
type: 'space',
raw: '\n'
}),
jasmine.objectContaining({
type: 'list',
raw: '1) item 1\n2) item 2',
raw: '1) item 1\n2) item 2\n',
ordered: true,
start: 1,
items: [
Expand All @@ -475,6 +515,10 @@ a | b
paragraph
`,
tokens: [
{
type: 'space',
raw: '\n'
},
{
type: 'list',
raw: '- item 1\n- item 2',
Expand Down Expand Up @@ -515,7 +559,7 @@ paragraph
{ type: 'space', raw: '\n\n' },
{
type: 'paragraph',
raw: 'paragraph',
raw: 'paragraph\n',
text: 'paragraph',
tokens: [{
type: 'text',
Expand All @@ -534,9 +578,13 @@ paragraph
3. item 2
`,
tokens: jasmine.arrayContaining([
jasmine.objectContaining({
type: 'space',
raw: '\n'
}),
jasmine.objectContaining({
type: 'list',
raw: '2. item 1\n3. item 2',
raw: '2. item 1\n3. item 2\n',
ordered: true,
start: 2,
items: [
Expand All @@ -560,9 +608,13 @@ paragraph
- item 2
`,
tokens: jasmine.arrayContaining([
jasmine.objectContaining({
type: 'space',
raw: '\n'
}),
jasmine.objectContaining({
type: 'list',
raw: '- item 1\n\n- item 2',
raw: '- item 1\n\n- item 2\n',
loose: true,
items: [
jasmine.objectContaining({
Expand All @@ -577,16 +629,54 @@ paragraph
});
});

it('not loose with spaces', () => {
expectTokens({
md: `
- item 1
- item 2
`,
tokens: jasmine.arrayContaining([
jasmine.objectContaining({
type: 'space',
raw: '\n'
}),
jasmine.objectContaining({
type: 'list',
raw: '- item 1\n - item 2\n',
loose: false,
items: [
jasmine.objectContaining({
raw: '- item 1\n - item 2',
tokens: jasmine.arrayContaining([
jasmine.objectContaining({
raw: 'item 1\n'
}),
jasmine.objectContaining({
type: 'list',
raw: '- item 2'
})
])
})
]
})
])
});
});

it('task', () => {
expectTokens({
md: `
- [ ] item 1
- [x] item 2
`,
tokens: jasmine.arrayContaining([
jasmine.objectContaining({
type: 'space',
raw: '\n'
}),
jasmine.objectContaining({
type: 'list',
raw: '- [ ] item 1\n- [x] item 2',
raw: '- [ ] item 1\n- [x] item 2\n',
items: [
jasmine.objectContaining({
raw: '- [ ] item 1\n',
Expand Down
1 change: 1 addition & 0 deletions test/unit/marked-spec.js
Expand Up @@ -994,6 +994,7 @@ br
});

expect(tokensSeen).toEqual([
['space', ''],
['paragraph', 'paragraph'],
['text', 'paragraph'],
['space', ''],
Expand Down

1 comment on commit a9696e2

@vercel
Copy link

@vercel vercel bot commented on a9696e2 Jan 6, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.