Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix lexer and tokenizer to retain line breaks properly #2341

Merged
merged 6 commits into from Jan 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 5 additions & 1 deletion src/Lexer.js
Expand Up @@ -152,7 +152,11 @@ export class Lexer {
// newline
if (token = this.tokenizer.space(src)) {
src = src.substring(token.raw.length);
if (token.type) {
if (token.raw.length === 1 && tokens.length > 0) {
// if there's a single \n as a spacer, it's terminating the last line,
// so move it there so that we don't get unecessary paragraph tags
tokens[tokens.length - 1].raw += '\n';
} else {
tokens.push(token);
}
continue;
Expand Down
32 changes: 23 additions & 9 deletions src/Tokenizer.js
Expand Up @@ -72,14 +72,11 @@ export class Tokenizer {

space(src) {
const cap = this.rules.block.newline.exec(src);
if (cap) {
if (cap[0].length > 1) {
return {
type: 'space',
raw: cap[0]
};
}
return { raw: '\n' };
if (cap && cap[0].length > 0) {
return {
type: 'space',
raw: cap[0]
};
}
}

Expand Down Expand Up @@ -303,7 +300,24 @@ export class Tokenizer {
for (i = 0; i < l; i++) {
this.lexer.state.top = false;
list.items[i].tokens = this.lexer.blockTokens(list.items[i].text, []);
if (!list.loose && list.items[i].tokens.some(t => t.type === 'space')) {
const spacers = list.items[i].tokens.filter(t => t.type === 'space');
const hasMultipleLineBreaks = spacers.every(t => {
const chars = t.raw.split('');
let lineBreaks = 0;
for (const char of chars) {
if (char === '\n') {
lineBreaks += 1;
}
if (lineBreaks > 1) {
return true;
}
}

return false;
});

if (!list.loose && spacers.length && hasMultipleLineBreaks) {
// Having a single line break doesn't mean a list is loose. A single line break is terminating the last list item
list.loose = true;
list.items[i].loose = true;
}
Expand Down
206 changes: 148 additions & 58 deletions test/unit/Lexer-spec.js
Expand Up @@ -93,6 +93,10 @@ lheading 2
----------
`,
tokens: [
{
type: 'space',
raw: '\n'
},
{
type: 'heading',
raw: '# heading 1\n\n',
Expand Down Expand Up @@ -175,6 +179,9 @@ lheading 2
| 1 | 2 |
`,
tokens: [{
type: 'space',
raw: '\n'
}, {
type: 'table',
align: [null, null],
raw: '| a | b |\n|---|---|\n| 1 | 2 |\n',
Expand Down Expand Up @@ -212,40 +219,42 @@ paragraph 1
|---|---|
| 1 | 2 |
`,
tokens: [
{
type: 'paragraph',
raw: 'paragraph 1',
text: 'paragraph 1',
tokens: [{ type: 'text', raw: 'paragraph 1', text: 'paragraph 1' }]
},
{
type: 'table',
align: [null, null],
raw: '| a | b |\n|---|---|\n| 1 | 2 |\n',
header: [
tokens: [{
type: 'space',
raw: '\n'
}, {
type: 'paragraph',
raw: 'paragraph 1\n',
text: 'paragraph 1',
tokens: [{ type: 'text', raw: 'paragraph 1', text: 'paragraph 1' }]
},
{
type: 'table',
align: [null, null],
raw: '| a | b |\n|---|---|\n| 1 | 2 |\n',
header: [
{
text: 'a',
tokens: [{ type: 'text', raw: 'a', text: 'a' }]
},
{
text: 'b',
tokens: [{ type: 'text', raw: 'b', text: 'b' }]
}
],
rows: [
[
{
text: 'a',
tokens: [{ type: 'text', raw: 'a', text: 'a' }]
text: '1',
tokens: [{ type: 'text', raw: '1', text: '1' }]
},
{
text: 'b',
tokens: [{ type: 'text', raw: 'b', text: 'b' }]
text: '2',
tokens: [{ type: 'text', raw: '2', text: '2' }]
}
],
rows: [
[
{
text: '1',
tokens: [{ type: 'text', raw: '1', text: '1' }]
},
{
text: '2',
tokens: [{ type: 'text', raw: '2', text: '2' }]
}
]
]
}
]
}
]
});
});
Expand All @@ -258,6 +267,9 @@ paragraph 1
| 1 | 2 | 3 |
`,
tokens: [{
type: 'space',
raw: '\n'
}, {
type: 'table',
align: ['left', 'center', 'right'],
raw: '| a | b | c |\n|:--|:-:|--:|\n| 1 | 2 | 3 |\n',
Expand Down Expand Up @@ -302,33 +314,37 @@ a | b
--|--
1 | 2
`,
tokens: [{
type: 'table',
align: [null, null],
raw: 'a | b\n--|--\n1 | 2\n',
header: [
{
text: 'a',
tokens: [{ type: 'text', raw: 'a', text: 'a' }]
},
{
text: 'b',
tokens: [{ type: 'text', raw: 'b', text: 'b' }]
}
],
rows: [
[
tokens: [
{
type: 'space',
raw: '\n'
}, {
type: 'table',
align: [null, null],
raw: 'a | b\n--|--\n1 | 2\n',
header: [
{
text: '1',
tokens: [{ type: 'text', raw: '1', text: '1' }]
text: 'a',
tokens: [{ type: 'text', raw: 'a', text: 'a' }]
},
{
text: '2',
tokens: [{ type: 'text', raw: '2', text: '2' }]
text: 'b',
tokens: [{ type: 'text', raw: 'b', text: 'b' }]
}
],
rows: [
[
{
text: '1',
tokens: [{ type: 'text', raw: '1', text: '1' }]
},
{
text: '2',
tokens: [{ type: 'text', raw: '2', text: '2' }]
}
]
]
]
}]
}]
});
});
});
Expand All @@ -342,6 +358,19 @@ a | b
]
});
});

it('after line break does not consume raw \n', () => {
expectTokens({
md: 'T\nh\n---',
tokens:
jasmine.arrayContaining([
jasmine.objectContaining({
raw: 'T\nh\n'
}),
{ type: 'hr', raw: '---' }
])
});
});
});

describe('blockquote', () => {
Expand Down Expand Up @@ -376,8 +405,11 @@ a | b
`,
tokens: [
{
type: 'space',
raw: '\n'
}, {
type: 'list',
raw: '- item 1\n- item 2',
raw: '- item 1\n- item 2\n',
ordered: false,
start: '',
loose: false,
Expand Down Expand Up @@ -423,9 +455,13 @@ a | b
2. item 2
`,
tokens: jasmine.arrayContaining([
jasmine.objectContaining({
type: 'space',
raw: '\n'
}),
jasmine.objectContaining({
type: 'list',
raw: '1. item 1\n2. item 2',
raw: '1. item 1\n2. item 2\n',
ordered: true,
start: 1,
items: [
Expand All @@ -448,9 +484,13 @@ a | b
2) item 2
`,
tokens: jasmine.arrayContaining([
jasmine.objectContaining({
type: 'space',
raw: '\n'
}),
jasmine.objectContaining({
type: 'list',
raw: '1) item 1\n2) item 2',
raw: '1) item 1\n2) item 2\n',
ordered: true,
start: 1,
items: [
Expand All @@ -475,6 +515,10 @@ a | b
paragraph
`,
tokens: [
{
type: 'space',
raw: '\n'
},
{
type: 'list',
raw: '- item 1\n- item 2',
Expand Down Expand Up @@ -515,7 +559,7 @@ paragraph
{ type: 'space', raw: '\n\n' },
{
type: 'paragraph',
raw: 'paragraph',
raw: 'paragraph\n',
text: 'paragraph',
tokens: [{
type: 'text',
Expand All @@ -534,9 +578,13 @@ paragraph
3. item 2
`,
tokens: jasmine.arrayContaining([
jasmine.objectContaining({
type: 'space',
raw: '\n'
}),
jasmine.objectContaining({
type: 'list',
raw: '2. item 1\n3. item 2',
raw: '2. item 1\n3. item 2\n',
ordered: true,
start: 2,
items: [
Expand All @@ -560,9 +608,13 @@ paragraph
- item 2
`,
tokens: jasmine.arrayContaining([
jasmine.objectContaining({
type: 'space',
raw: '\n'
}),
jasmine.objectContaining({
type: 'list',
raw: '- item 1\n\n- item 2',
raw: '- item 1\n\n- item 2\n',
loose: true,
items: [
jasmine.objectContaining({
Expand All @@ -577,16 +629,54 @@ paragraph
});
});

it('not loose with spaces', () => {
expectTokens({
md: `
- item 1
- item 2
`,
tokens: jasmine.arrayContaining([
jasmine.objectContaining({
type: 'space',
raw: '\n'
}),
jasmine.objectContaining({
type: 'list',
raw: '- item 1\n - item 2\n',
loose: false,
items: [
jasmine.objectContaining({
raw: '- item 1\n - item 2',
tokens: jasmine.arrayContaining([
jasmine.objectContaining({
raw: 'item 1\n'
}),
jasmine.objectContaining({
type: 'list',
raw: '- item 2'
})
])
})
]
})
])
});
});

it('task', () => {
expectTokens({
md: `
- [ ] item 1
- [x] item 2
`,
tokens: jasmine.arrayContaining([
jasmine.objectContaining({
type: 'space',
raw: '\n'
}),
jasmine.objectContaining({
type: 'list',
raw: '- [ ] item 1\n- [x] item 2',
raw: '- [ ] item 1\n- [x] item 2\n',
items: [
jasmine.objectContaining({
raw: '- [ ] item 1\n',
Expand Down
1 change: 1 addition & 0 deletions test/unit/marked-spec.js
Expand Up @@ -994,6 +994,7 @@ br
});

expect(tokensSeen).toEqual([
['space', ''],
['paragraph', 'paragraph'],
['text', 'paragraph'],
['space', ''],
Expand Down