From 48808f7f8201036f571b1a01b2871158f492da5d Mon Sep 17 00:00:00 2001 From: Jakub Pawlowicz Date: Wed, 1 Aug 2018 15:53:43 +0200 Subject: [PATCH] Fixes #1000 - carriage return handling in tokenizer. --- History.md | 1 + lib/optimizer/level-1/tidy-rules.js | 2 +- lib/tokenizer/marker.js | 2 +- lib/tokenizer/tokenize.js | 10 ++++++---- test/tokenizer/tokenize-test.js | 12 ++++++++++++ 5 files changed, 21 insertions(+), 6 deletions(-) diff --git a/History.md b/History.md index eb7fdc340..e2bc54121 100644 --- a/History.md +++ b/History.md @@ -6,6 +6,7 @@ * Fixed issue [#895](https://github.com/jakubpawlowicz/clean-css/issues/895) - ignoring specific styles. * Fixed issue [#947](https://github.com/jakubpawlowicz/clean-css/issues/947) - selector based filtering. * Fixed issue [#986](https://github.com/jakubpawlowicz/clean-css/issues/986) - level 2 optimizations and CSS 4 colors. +* Fixed issue [#1000](https://github.com/jakubpawlowicz/clean-css/issues/1000) - carriage return handling in tokenizer. * Fixed issue [#1038](https://github.com/jakubpawlowicz/clean-css/issues/1038) - `font-variation-settings` quoting. * Fixes ReDOS vulnerabilities in validator code. diff --git a/lib/optimizer/level-1/tidy-rules.js b/lib/optimizer/level-1/tidy-rules.js index 0af3b2fe7..d046d0efd 100644 --- a/lib/optimizer/level-1/tidy-rules.js +++ b/lib/optimizer/level-1/tidy-rules.js @@ -68,7 +68,7 @@ function removeWhitespace(value, format) { character = value[i]; isNewLineNix = character == Marker.NEW_LINE_NIX; - isNewLineWin = character == Marker.NEW_LINE_NIX && value[i - 1] == Marker.NEW_LINE_WIN; + isNewLineWin = character == Marker.NEW_LINE_NIX && value[i - 1] == Marker.CARRIAGE_RETURN; isQuoted = isSingleQuoted || isDoubleQuoted; isRelation = !isAttribute && !isEscaped && roundBracketLevel === 0 && RELATION_PATTERN.test(character); isWhitespace = WHITESPACE_PATTERN.test(character); diff --git a/lib/tokenizer/marker.js b/lib/tokenizer/marker.js index 767a5f0e8..270fdbc3b 100644 --- a/lib/tokenizer/marker.js +++ b/lib/tokenizer/marker.js @@ -2,6 +2,7 @@ var Marker = { ASTERISK: '*', AT: '@', BACK_SLASH: '\\', + CARRIAGE_RETURN: '\r', CLOSE_CURLY_BRACKET: '}', CLOSE_ROUND_BRACKET: ')', CLOSE_SQUARE_BRACKET: ']', @@ -12,7 +13,6 @@ var Marker = { FORWARD_SLASH: '/', INTERNAL: '-clean-css-', NEW_LINE_NIX: '\n', - NEW_LINE_WIN: '\r', OPEN_CURLY_BRACKET: '{', OPEN_ROUND_BRACKET: '(', OPEN_SQUARE_BRACKET: '[', diff --git a/lib/tokenizer/tokenize.js b/lib/tokenizer/tokenize.js index 8d8c63c79..39c9e67bc 100644 --- a/lib/tokenizer/tokenize.js +++ b/lib/tokenizer/tokenize.js @@ -97,6 +97,7 @@ function intoTokens(source, externalContext, internalContext, isNested) { var isSpace; var isNewLineNix; var isNewLineWin; + var isCarriageReturn; var isCommentStart; var wasCommentStart = false; var isCommentEnd; @@ -116,7 +117,8 @@ function intoTokens(source, externalContext, internalContext, isNested) { isQuoted = level == Level.SINGLE_QUOTE || level == Level.DOUBLE_QUOTE; isSpace = character == Marker.SPACE || character == Marker.TAB; isNewLineNix = character == Marker.NEW_LINE_NIX; - isNewLineWin = character == Marker.NEW_LINE_NIX && source[position.index - 1] == Marker.NEW_LINE_WIN; + isNewLineWin = character == Marker.NEW_LINE_NIX && source[position.index - 1] == Marker.CARRIAGE_RETURN; + isCarriageReturn = character == Marker.CARRIAGE_RETURN && source[position.index + 1] && source[position.index + 1] != Marker.NEW_LINE_NIX; isCommentStart = !wasCommentEnd && level != Level.COMMENT && !isQuoted && character == Marker.ASTERISK && source[position.index - 1] == Marker.FORWARD_SLASH; isCommentEndMarker = !wasCommentStart && !isQuoted && character == Marker.FORWARD_SLASH && source[position.index - 1] == Marker.ASTERISK; isCommentEnd = level == Level.COMMENT && isCommentEndMarker; @@ -483,7 +485,7 @@ function intoTokens(source, externalContext, internalContext, isNested) { } else if (buffer.length == 1 && isNewLineWin) { // ignore windows newline which is composed of two characters buffer.pop(); - } else if (buffer.length > 0 || !isSpace && !isNewLineNix && !isNewLineWin) { + } else if (buffer.length > 0 || !isSpace && !isNewLineNix && !isNewLineWin && !isCarriageReturn) { // any character buffer.push(character); } @@ -493,8 +495,8 @@ function intoTokens(source, externalContext, internalContext, isNested) { wasCommentStart = isCommentStart; wasCommentEnd = isCommentEnd; - position.line = (isNewLineWin || isNewLineNix) ? position.line + 1 : position.line; - position.column = (isNewLineWin || isNewLineNix) ? 0 : position.column + 1; + position.line = (isNewLineWin || isNewLineNix || isCarriageReturn) ? position.line + 1 : position.line; + position.column = (isNewLineWin || isNewLineNix || isCarriageReturn) ? 0 : position.column + 1; } if (seekingValue) { diff --git a/test/tokenizer/tokenize-test.js b/test/tokenizer/tokenize-test.js index f277a30b5..c111ffeaa 100644 --- a/test/tokenizer/tokenize-test.js +++ b/test/tokenizer/tokenize-test.js @@ -2688,6 +2688,18 @@ vows.describe(tokenize) ] ] ], + 'charset after a carriage return': [ + '\r@charset \n\'utf-8\';', + [ + [ + 'at-rule', + '@charset \n\'utf-8\'', + [ + [2, 0, undefined] + ] + ] + ] + ], '@import': [ 'a{}@import \n"test.css";\n\na{color:red}', [