From 640f868e1c7a2668f24d515c66d2e3e3b27fc9fc Mon Sep 17 00:00:00 2001 From: Caleb Date: Tue, 5 Sep 2017 11:45:06 -0600 Subject: [PATCH] Combine surrogate pairs into one escape sequence when encoding. --- lib/js-yaml/dumper.js | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/lib/js-yaml/dumper.js b/lib/js-yaml/dumper.js index 6e60bbd0..b77850cf 100644 --- a/lib/js-yaml/dumper.js +++ b/lib/js-yaml/dumper.js @@ -461,11 +461,21 @@ function foldLine(line, width) { // Escapes a double-quoted string. function escapeString(string) { var result = ''; - var char; + var char, nextChar; var escapeSeq; for (var i = 0; i < string.length; i++) { char = string.charCodeAt(i); + // Check for surrogate pairs (reference Unicode 3.0 section "3.7 Surrogates"). + if (char >= 0xD800 && char <= 0xDBFF/* high surrogate */) { + nextChar = string.charCodeAt(i + 1); + if (nextChar >= 0xDC00 && nextChar <= 0xDFFF/* low surrogate */) { + // Combine the surrogate pair and store it escaped. + result += encodeHex((char - 0xD800) * 0x400 + nextChar - 0xDC00 + 0x10000); + // Advance index one extra since we already used that char here. + i++; continue; + } + } escapeSeq = ESCAPE_SEQUENCES[char]; result += !escapeSeq && isPrintable(char) ? string[i]