diff --git a/CHANGELOG.md b/CHANGELOG.md index b3513578..b625c28c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [4.0.0] - WIP +### Fixed + - Astral characters are no longer encoded by dump/safeDump, #587. + ## [3.14.0] - 2020-05-22 ### Changed @@ -479,6 +483,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - First public release +[4.0.0]: https://github.com/nodeca/js-yaml/compare/3.14.0...4.0.0 [3.14.0]: https://github.com/nodeca/js-yaml/compare/3.13.1...3.14.0 [3.13.1]: https://github.com/nodeca/js-yaml/compare/3.13.0...3.13.1 [3.13.0]: https://github.com/nodeca/js-yaml/compare/3.12.2...3.13.0 diff --git a/lib/js-yaml/dumper.js b/lib/js-yaml/dumper.js index df5ef096..5a0b126a 100644 --- a/lib/js-yaml/dumper.js +++ b/lib/js-yaml/dumper.js @@ -254,6 +254,19 @@ function isPlainSafeFirst(c) { && c !== CHAR_GRAVE_ACCENT; } +// Same as 'string'.codePointAt(pos), but works in older browsers. +function codePointAt(string, pos) { + var first = string.charCodeAt(pos), second; + if (first >= 0xD800 && first <= 0xDBFF && pos + 1 < string.length) { + second = string.charCodeAt(pos + 1); + if (second >= 0xDC00 && second <= 0xDFFF) { + // https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae + return (first - 0xD800) * 0x400 + second - 0xDC00 + 0x10000; + } + } + return first; +} + // Determines whether block indentation indicator is required. function needIndentIndicator(string) { var leadingSpaceRe = /^\n* /; @@ -275,29 +288,30 @@ var STYLE_PLAIN = 1, // STYLE_FOLDED => a line > lineWidth and can be folded (and lineWidth != -1). function chooseScalarStyle(string, singleLineOnly, indentPerLevel, lineWidth, testAmbiguousType) { var i; - var char, prev_char; + var char = 0; + var prevChar = null; var hasLineBreak = false; var hasFoldableLine = false; // only checked if shouldTrackWidth var shouldTrackWidth = lineWidth !== -1; var previousLineBreak = -1; // count the first line correctly - var plain = isPlainSafeFirst(string.charCodeAt(0)) - && !isWhitespace(string.charCodeAt(string.length - 1)); + var plain = isPlainSafeFirst(codePointAt(string, 0)) + && !isWhitespace(codePointAt(string, string.length - 1)); if (singleLineOnly) { // Case: no block styles. // Check for disallowed characters to rule out plain and single. - for (i = 0; i < string.length; i++) { - char = string.charCodeAt(i); + for (i = 0; i < string.length; char >= 0x10000 ? i += 2 : i++) { + char = codePointAt(string, i); if (!isPrintable(char)) { return STYLE_DOUBLE; } - prev_char = i > 0 ? string.charCodeAt(i - 1) : null; - plain = plain && isPlainSafe(char, prev_char); + plain = plain && isPlainSafe(char, prevChar); + prevChar = char; } } else { // Case: block styles permitted. - for (i = 0; i < string.length; i++) { - char = string.charCodeAt(i); + for (i = 0; i < string.length; char >= 0x10000 ? i += 2 : i++) { + char = codePointAt(string, i); if (char === CHAR_LINE_FEED) { hasLineBreak = true; // Check if any line can be folded. @@ -311,8 +325,8 @@ function chooseScalarStyle(string, singleLineOnly, indentPerLevel, lineWidth, te } else if (!isPrintable(char)) { return STYLE_DOUBLE; } - prev_char = i > 0 ? string.charCodeAt(i - 1) : null; - plain = plain && isPlainSafe(char, prev_char); + plain = plain && isPlainSafe(char, prevChar); + prevChar = char; } // in case the end is missing a \n hasFoldableLine = hasFoldableLine || (shouldTrackWidth && @@ -489,25 +503,19 @@ function foldLine(line, width) { // Escapes a double-quoted string. function escapeString(string) { var result = ''; - var char, nextChar; + var char = 0; var escapeSeq; - for (var i = 0; i < string.length; i++) { - char = string.charCodeAt(i); - // Check for surrogate pairs (reference Unicode 3.0 section "3.7 Surrogates"). - if (char >= 0xD800 && char <= 0xDBFF/* high surrogate */) { - nextChar = string.charCodeAt(i + 1); - if (nextChar >= 0xDC00 && nextChar <= 0xDFFF/* low surrogate */) { - // Combine the surrogate pair and store it escaped. - result += encodeHex((char - 0xD800) * 0x400 + nextChar - 0xDC00 + 0x10000); - // Advance index one extra since we already used that char here. - i++; continue; - } - } + for (var i = 0; i < string.length; char >= 0x10000 ? i += 2 : i++) { + char = codePointAt(string, i); escapeSeq = ESCAPE_SEQUENCES[char]; - result += !escapeSeq && isPrintable(char) - ? string[i] - : escapeSeq || encodeHex(char); + + if (!escapeSeq && isPrintable(char)) { + result += string[i]; + if (char >= 0x10000) result += string[i + 1]; + } else { + result += escapeSeq || encodeHex(char); + } } return result; diff --git a/test/issues/0369.js b/test/issues/0369.js deleted file mode 100644 index f96baac1..00000000 --- a/test/issues/0369.js +++ /dev/null @@ -1,11 +0,0 @@ -'use strict'; - - -var assert = require('assert'); -var yaml = require('../../'); - - -test('should dump astrals as codepoint', function () { - assert.deepEqual(yaml.safeDump('😀'), '"\\U0001F600"\n'); - assert.deepEqual(yaml.safeLoad('"\\U0001F600"'), '😀'); -}); diff --git a/test/issues/0587.js b/test/issues/0587.js new file mode 100644 index 00000000..91f33886 --- /dev/null +++ b/test/issues/0587.js @@ -0,0 +1,10 @@ +'use strict'; + + +var assert = require('assert'); +var yaml = require('../../'); + + +test('Should not encode astral characters', function () { + assert.strictEqual(yaml.safeDump('😃😊'), '😃😊\n'); +});