Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework Lexer to use extendable array of tokenizer functions #1872

Closed
wants to merge 11 commits into from
314 changes: 193 additions & 121 deletions src/Lexer.js
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,22 @@ module.exports = class Lexer {
}
}
this.tokenizer.rules = rules;

this.blockTokenizers = [
this.newline,
calculuschild marked this conversation as resolved.
Show resolved Hide resolved
this.code,
this.fences,
this.nptable,
this.heading,
this.hr,
this.blockquote,
this.list,
this.html,
this.def,
this.table,
this.lheading,
this.paragraph,
this.text];
}

/**
Expand Down Expand Up @@ -119,135 +135,191 @@ module.exports = class Lexer {
/**
* Lexing
*/
blockTokens(src, tokens = [], top = true) {
src = src.replace(/^ +$/gm, '');
let token, i, l, lastToken;

while (src) {
// newline
if (token = this.tokenizer.space(src)) {
src = src.substring(token.raw.length);
if (token.type) {
tokens.push(token);
}
continue;
}
// newline
newline(params) {
if (params.token = params.tokenizer.space(params.src)) {
params.src = params.src.substring(params.token.raw.length);
if (params.token.type) {
params.tokens.push(params.token);
}
return true;
}
}//.bind(this);

// code
code(params) {
if (params.token = params.tokenizer.code(params.src, params.tokens)) {
params.src = params.src.substring(params.token.raw.length);
if (params.token.type) {
params.tokens.push(params.token);
} else {
params.lastToken = params.tokens[params.tokens.length - 1];
params.lastToken.raw += '\n' + params.token.raw;
params.lastToken.text += '\n' + params.token.text;
}
return true;
}
}//.bind(this);

// fences
fences(params) {
if (params.token = params.tokenizer.fences(params.src)) {
params.src = params.src.substring(params.token.raw.length);
params.tokens.push(params.token);
return true;
}
}//.bind(this);

// table no leading pipe (gfm)
nptable(params) {
if (params.token = params.tokenizer.nptable(params.src)) {
params.src = params.src.substring(params.token.raw.length);
params.tokens.push(params.token);
return true;
}
}//.bind(this);

// heading
heading(params) {
if (params.token = params.tokenizer.heading(params.src)) {
params.src = params.src.substring(params.token.raw.length);
params.tokens.push(params.token);
return true;
}
}//.bind(this);

// hr
hr(params) {
if (params.token = params.tokenizer.hr(params.src)) {
params.src = params.src.substring(params.token.raw.length);
params.tokens.push(params.token);
return true;
}
}//.bind(this);

// blockquote
blockquote(params) {
if (params.token = params.tokenizer.blockquote(params.src)) {
params.src = params.src.substring(params.token.raw.length);
params.token.tokens = params.blockTokens(params.token.text, [], params.top);
params.tokens.push(params.token);
return true;
}
}//.bind(this);


// list
list(params) {
if (params.token = params.tokenizer.list(params.src)) {
params.src = params.src.substring(params.token.raw.length);
params.l = params.token.items.length;
for (params.i = 0; params.i < params.l; params.i++) {
params.token.items[params.i].tokens = params.blockTokens(params.token.items[params.i].text, [], false);
}
params.tokens.push(params.token);
return true;
}
}//.bind(this);

// html
html(params) {
if (params.token = params.tokenizer.html(params.src)) {
params.src = params.src.substring(params.token.raw.length);
params.tokens.push(params.token);
return true;
}
}//.bind(this);

// def
def(params) {
if (params.top && (params.token = params.tokenizer.def(params.src))) {
params.src = params.src.substring(params.token.raw.length);
if (params.tokens && (!params.tokens.links || !params.tokens.links[params.token.tag])) {
params.tokens.links[params.token.tag] = {
calculuschild marked this conversation as resolved.
Show resolved Hide resolved
href: params.token.href,
title: params.token.title
};
}
return true;
}
}//.bind(this);

// table (gfm)
table(params) {
if (params.token = params.tokenizer.table(params.src)) {
params.src = params.src.substring(params.token.raw.length);
params.tokens.push(params.token);
return true;
}
}//.bind(this);

// lheading
lheading(params) {
if (params.token = params.tokenizer.lheading(params.src)) {
params.src = params.src.substring(params.token.raw.length);
params.tokens.push(params.token);
return true;
}
}//.bind(this);

// top-level paragraph
paragraph(params) {
if (params.top && (params.token = params.tokenizer.paragraph(params.src))) {
params.src = params.src.substring(params.token.raw.length);
params.tokens.push(params.token);
return true;
}
}//.bind(this);

// text
text(params) {
if (params.token = params.tokenizer.text(params.src, params.tokens)) {
params.src = params.src.substring(params.token.raw.length);
if (params.token.type) {
params.tokens.push(params.token);
} else {
params.lastToken = params.tokens[params.tokens.length - 1];
params.lastToken.raw += '\n' + params.token.raw;
params.lastToken.text += '\n' + params.token.text;
}
return true;
}
}//.bind(this);

// code
if (token = this.tokenizer.code(src, tokens)) {
src = src.substring(token.raw.length);
if (token.type) {
tokens.push(token);
} else {
lastToken = tokens[tokens.length - 1];
lastToken.raw += '\n' + token.raw;
lastToken.text += '\n' + token.text;
}
continue;
}

// fences
if (token = this.tokenizer.fences(src)) {
src = src.substring(token.raw.length);
tokens.push(token);
continue;
}

// heading
if (token = this.tokenizer.heading(src)) {
src = src.substring(token.raw.length);
tokens.push(token);
continue;
}

// table no leading pipe (gfm)
if (token = this.tokenizer.nptable(src)) {
src = src.substring(token.raw.length);
tokens.push(token);
continue;
}

// hr
if (token = this.tokenizer.hr(src)) {
src = src.substring(token.raw.length);
tokens.push(token);
continue;
}

// blockquote
if (token = this.tokenizer.blockquote(src)) {
src = src.substring(token.raw.length);
token.tokens = this.blockTokens(token.text, [], top);
tokens.push(token);
continue;
}

// list
if (token = this.tokenizer.list(src)) {
src = src.substring(token.raw.length);
l = token.items.length;
for (i = 0; i < l; i++) {
token.items[i].tokens = this.blockTokens(token.items[i].text, [], false);
}
tokens.push(token);
continue;
}

// html
if (token = this.tokenizer.html(src)) {
src = src.substring(token.raw.length);
tokens.push(token);
continue;
}

// def
if (top && (token = this.tokenizer.def(src))) {
src = src.substring(token.raw.length);
if (!this.tokens.links[token.tag]) {
this.tokens.links[token.tag] = {
href: token.href,
title: token.title
};
}
continue;
}

// table (gfm)
if (token = this.tokenizer.table(src)) {
src = src.substring(token.raw.length);
tokens.push(token);
continue;
}

// lheading
if (token = this.tokenizer.lheading(src)) {
src = src.substring(token.raw.length);
tokens.push(token);
continue;
}
blockTokens(src, tokens = [], top = true) {
if(!tokens.links){
tokens.links = Object.create(null);
}
calculuschild marked this conversation as resolved.
Show resolved Hide resolved
src = src.replace(/^ +$/gm, '');
let token, i, l, lastToken;
let fn;

let blockParams = {
tokenizer : this.tokenizer,
blockTokens : this.blockTokens.bind(this),
src : src,
token : token,
tokens : tokens,
lastToken : lastToken,
top : top,
i : i,
l : l
};

// top-level paragraph
if (top && (token = this.tokenizer.paragraph(src))) {
src = src.substring(token.raw.length);
tokens.push(token);
continue;
}
outerLoop:
while (blockParams.src) {

// text
if (token = this.tokenizer.text(src, tokens)) {
src = src.substring(token.raw.length);
if (token.type) {
tokens.push(token);
} else {
lastToken = tokens[tokens.length - 1];
lastToken.raw += '\n' + token.raw;
lastToken.text += '\n' + token.text;
for(fn of this.blockTokenizers) {
if(fn(blockParams)) {
calculuschild marked this conversation as resolved.
Show resolved Hide resolved
continue outerLoop;
}
continue;
}

if (src) {
const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
if (blockParams.src) {
const errMsg = 'Infinite loop on byte: ' + blockParams.src.charCodeAt(0);
if (this.options.silent) {
console.error(errMsg);
break;
Expand All @@ -257,7 +329,7 @@ module.exports = class Lexer {
}
}

return tokens;
return blockParams.tokens;
}

inline(tokens) {
Expand Down