From b74754555d8f2e596287bdb4e2943ad2aeec2b22 Mon Sep 17 00:00:00 2001 From: Caleb Date: Mon, 11 Sep 2017 01:29:43 -0600 Subject: [PATCH] Combine surrogate pairs into one escape sequence when encoding. (#369) --- lib/js-yaml/dumper.js | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/lib/js-yaml/dumper.js b/lib/js-yaml/dumper.js index 66a45687..025b1855 100644 --- a/lib/js-yaml/dumper.js +++ b/lib/js-yaml/dumper.js @@ -461,11 +461,21 @@ function foldLine(line, width) { // Escapes a double-quoted string. function escapeString(string) { var result = ''; - var char; + var char, nextChar; var escapeSeq; for (var i = 0; i < string.length; i++) { char = string.charCodeAt(i); + // Check for surrogate pairs (reference Unicode 3.0 section "3.7 Surrogates"). + if (char >= 0xD800 && char <= 0xDBFF/* high surrogate */) { + nextChar = string.charCodeAt(i + 1); + if (nextChar >= 0xDC00 && nextChar <= 0xDFFF/* low surrogate */) { + // Combine the surrogate pair and store it escaped. + result += encodeHex((char - 0xD800) * 0x400 + nextChar - 0xDC00 + 0x10000); + // Advance index one extra since we already used that char here. + i++; continue; + } + } escapeSeq = ESCAPE_SEQUENCES[char]; result += !escapeSeq && isPrintable(char) ? string[i]