Skip to content

Commit

Permalink
Astral characters are no longer encoded by dump/safeDump
Browse files Browse the repository at this point in the history
close #587
ref #588
ref #368
  • Loading branch information
rlidwka committed Dec 1, 2020
1 parent fe72979 commit 0b2e702
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 38 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Expand Up @@ -5,6 +5,10 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [4.0.0] - WIP
### Fixed
- Astral characters are no longer encoded by dump/safeDump, #587.


## [3.14.0] - 2020-05-22
### Changed
Expand Down Expand Up @@ -479,6 +483,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- First public release


[4.0.0]: https://github.com/nodeca/js-yaml/compare/3.14.0...4.0.0
[3.14.0]: https://github.com/nodeca/js-yaml/compare/3.13.1...3.14.0
[3.13.1]: https://github.com/nodeca/js-yaml/compare/3.13.0...3.13.1
[3.13.0]: https://github.com/nodeca/js-yaml/compare/3.12.2...3.13.0
Expand Down
62 changes: 35 additions & 27 deletions lib/js-yaml/dumper.js
Expand Up @@ -254,6 +254,19 @@ function isPlainSafeFirst(c) {
&& c !== CHAR_GRAVE_ACCENT;
}

// Same as 'string'.codePointAt(pos), but works in older browsers.
function codePointAt(string, pos) {
var first = string.charCodeAt(pos), second;
if (first >= 0xD800 && first <= 0xDBFF && pos + 1 < string.length) {
second = string.charCodeAt(pos + 1);
if (second >= 0xDC00 && second <= 0xDFFF) {
// https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
return (first - 0xD800) * 0x400 + second - 0xDC00 + 0x10000;
}
}
return first;
}

// Determines whether block indentation indicator is required.
function needIndentIndicator(string) {
var leadingSpaceRe = /^\n* /;
Expand All @@ -275,29 +288,30 @@ var STYLE_PLAIN = 1,
// STYLE_FOLDED => a line > lineWidth and can be folded (and lineWidth != -1).
function chooseScalarStyle(string, singleLineOnly, indentPerLevel, lineWidth, testAmbiguousType) {
var i;
var char, prev_char;
var char = 0;
var prevChar = null;
var hasLineBreak = false;
var hasFoldableLine = false; // only checked if shouldTrackWidth
var shouldTrackWidth = lineWidth !== -1;
var previousLineBreak = -1; // count the first line correctly
var plain = isPlainSafeFirst(string.charCodeAt(0))
&& !isWhitespace(string.charCodeAt(string.length - 1));
var plain = isPlainSafeFirst(codePointAt(string, 0))
&& !isWhitespace(codePointAt(string, string.length - 1));

if (singleLineOnly) {
// Case: no block styles.
// Check for disallowed characters to rule out plain and single.
for (i = 0; i < string.length; i++) {
char = string.charCodeAt(i);
for (i = 0; i < string.length; char >= 0x10000 ? i += 2 : i++) {
char = codePointAt(string, i);
if (!isPrintable(char)) {
return STYLE_DOUBLE;
}
prev_char = i > 0 ? string.charCodeAt(i - 1) : null;
plain = plain && isPlainSafe(char, prev_char);
plain = plain && isPlainSafe(char, prevChar);
prevChar = char;
}
} else {
// Case: block styles permitted.
for (i = 0; i < string.length; i++) {
char = string.charCodeAt(i);
for (i = 0; i < string.length; char >= 0x10000 ? i += 2 : i++) {
char = codePointAt(string, i);
if (char === CHAR_LINE_FEED) {
hasLineBreak = true;
// Check if any line can be folded.
Expand All @@ -311,8 +325,8 @@ function chooseScalarStyle(string, singleLineOnly, indentPerLevel, lineWidth, te
} else if (!isPrintable(char)) {
return STYLE_DOUBLE;
}
prev_char = i > 0 ? string.charCodeAt(i - 1) : null;
plain = plain && isPlainSafe(char, prev_char);
plain = plain && isPlainSafe(char, prevChar);
prevChar = char;
}
// in case the end is missing a \n
hasFoldableLine = hasFoldableLine || (shouldTrackWidth &&
Expand Down Expand Up @@ -489,25 +503,19 @@ function foldLine(line, width) {
// Escapes a double-quoted string.
function escapeString(string) {
var result = '';
var char, nextChar;
var char = 0;
var escapeSeq;

for (var i = 0; i < string.length; i++) {
char = string.charCodeAt(i);
// Check for surrogate pairs (reference Unicode 3.0 section "3.7 Surrogates").
if (char >= 0xD800 && char <= 0xDBFF/* high surrogate */) {
nextChar = string.charCodeAt(i + 1);
if (nextChar >= 0xDC00 && nextChar <= 0xDFFF/* low surrogate */) {
// Combine the surrogate pair and store it escaped.
result += encodeHex((char - 0xD800) * 0x400 + nextChar - 0xDC00 + 0x10000);
// Advance index one extra since we already used that char here.
i++; continue;
}
}
for (var i = 0; i < string.length; char >= 0x10000 ? i += 2 : i++) {
char = codePointAt(string, i);
escapeSeq = ESCAPE_SEQUENCES[char];
result += !escapeSeq && isPrintable(char)
? string[i]
: escapeSeq || encodeHex(char);

if (!escapeSeq && isPrintable(char)) {
result += string[i];
if (char >= 0x10000) result += string[i + 1];
} else {
result += escapeSeq || encodeHex(char);
}
}

return result;
Expand Down
11 changes: 0 additions & 11 deletions test/issues/0369.js

This file was deleted.

10 changes: 10 additions & 0 deletions test/issues/0587.js
@@ -0,0 +1,10 @@
'use strict';


var assert = require('assert');
var yaml = require('../../');


test('Should not encode astral characters', function () {
assert.strictEqual(yaml.safeDump('😃😊'), '😃😊\n');
});

0 comments on commit 0b2e702

Please sign in to comment.