Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(codegen): fix unicode escaping in es5 #3636

Merged
merged 12 commits into from Feb 25, 2022
@@ -1,3 +1,3 @@
//@target: es6
// <TAB>, <VT>, <FF>, <SP>, <NBSP>, <BOM>
"\t\v\f \xa0";
"\t\v\f \xa0\uFEFF";
@@ -1,2 +1,2 @@
// <TAB>, <VT>, <FF>, <SP>, <NBSP>, <BOM>
"\t\v\f \xa0";
"\t\v\f \xa0\uFEFF";
@@ -1,4 +1,4 @@
// @target: es5
// ES6 Spec - 10.1.1 Static Semantics: UTF16Encoding (cp)
// 1. Assert: 0 ≤ cp ≤ 0x10FFFF.
var x = "􏿿";
var x = "\uDBFF\uDFFF";
@@ -1,4 +1,4 @@
// @target: es6
// ES6 Spec - 10.1.1 Static Semantics: UTF16Encoding (cp)
// 1. Assert: 0 ≤ cp ≤ 0x10FFFF.
var x = "􏿿";
var x = "\uDBFF\uDFFF";
Expand Up @@ -2,4 +2,4 @@
// ES6 Spec - 10.1.1 Static Semantics: UTF16Encoding (cp)
// 2. If cp ≤ 65535, return cp.
// (FFFF == 65535)
var x = "￿";
var x = "\uFFFF";
Expand Up @@ -2,4 +2,4 @@
// ES6 Spec - 10.1.1 Static Semantics: UTF16Encoding (cp)
// 2. If cp ≤ 65535, return cp.
// (FFFF == 65535)
var x = "￿";
var x = "\uFFFF";
Expand Up @@ -2,4 +2,4 @@
// ES6 Spec - 10.1.1 Static Semantics: UTF16Encoding (cp)
// 2. If cp ≤ 65535, return cp.
// (10000 == 65536)
var x = "𐀀";
var x = "\uD800\uDC00";
Expand Up @@ -2,4 +2,4 @@
// ES6 Spec - 10.1.1 Static Semantics: UTF16Encoding (cp)
// 2. If cp ≤ 65535, return cp.
// (10000 == 65536)
var x = "𐀀";
var x = "\uD800\uDC00";
@@ -1,2 +1,2 @@
// @target: es5
var x = "󝷝";
var x = "\uDB37\uDDDD";
@@ -1,2 +1,2 @@
// @target: es6
var x = "󝷝";
var x = "\uDB37\uDDDD";
@@ -1,2 +1,2 @@
// @target: es5
var x = "ꯍ㑖碐";
var x = "\uABCD\uEF12\u3456\u7890";
@@ -1,2 +1,2 @@
// @target: es6
var x = "ꯍ㑖碐";
var x = "\uABCD\uEF12\u3456\u7890";
@@ -1,2 +1,2 @@
// @target: es5
var x = "ꯍ㑖碐";
var x = "\uABCD\uEF12\u3456\u7890";
@@ -1,2 +1,2 @@
// @target: es6
var x = "ꯍ㑖碐";
var x = "\uABCD\uEF12\u3456\u7890";
@@ -1,4 +1,4 @@
// @target: es5
// ES6 Spec - 10.1.1 Static Semantics: UTF16Encoding (cp)
// 1. Assert: 0 ≤ cp ≤ 0x10FFFF.
var x = "􏿿";
var x = "\uDBFF\uDFFF";
@@ -1,4 +1,4 @@
// @target: es6
// ES6 Spec - 10.1.1 Static Semantics: UTF16Encoding (cp)
// 1. Assert: 0 ≤ cp ≤ 0x10FFFF.
var x = "􏿿";
var x = "\uDBFF\uDFFF";
Expand Up @@ -2,4 +2,4 @@
// ES6 Spec - 10.1.1 Static Semantics: UTF16Encoding (cp)
// 2. If cp ≤ 65535, return cp.
// (FFFF == 65535)
var x = "￿";
var x = "\uFFFF";
Expand Up @@ -2,4 +2,4 @@
// ES6 Spec - 10.1.1 Static Semantics: UTF16Encoding (cp)
// 2. If cp ≤ 65535, return cp.
// (FFFF == 65535)
var x = "￿";
var x = "\uFFFF";
Expand Up @@ -2,4 +2,4 @@
// ES6 Spec - 10.1.1 Static Semantics: UTF16Encoding (cp)
// 2. If cp ≤ 65535, return cp.
// (10000 == 65536)
var x = "𐀀";
var x = "\uD800\uDC00";
Expand Up @@ -2,4 +2,4 @@
// ES6 Spec - 10.1.1 Static Semantics: UTF16Encoding (cp)
// 2. If cp ≤ 65535, return cp.
// (10000 == 65536)
var x = "𐀀";
var x = "\uD800\uDC00";
@@ -1,2 +1,2 @@
// @target: es5
var x = "󝷝";
var x = "\uDB37\uDDDD";
@@ -1,2 +1,2 @@
// @target: es6
var x = "󝷝";
var x = "\uDB37\uDDDD";
@@ -1,2 +1,2 @@
// @target: es5
var x = "ꯍ㑖碐";
var x = "\uABCD\uEF12\u3456\u7890";
@@ -1,2 +1,2 @@
// @target: es6
var x = "ꯍ㑖碐";
var x = "\uABCD\uEF12\u3456\u7890";
@@ -1,2 +1,2 @@
// @target: es5
var x = "ꯍ㑖碐";
var x = "\uABCD\uEF12\u3456\u7890";
@@ -1,2 +1,2 @@
// @target: es6
var x = "ꯍ㑖碐";
var x = "\uABCD\uEF12\u3456\u7890";
58 changes: 43 additions & 15 deletions crates/swc_ecma_codegen/src/lib.rs
Expand Up @@ -507,7 +507,8 @@ where
}
StrKind::Synthesized => {
let single_quote = false;
let value = escape_without_source(&node.value, self.wr.target(), single_quote);
let value =
escape_without_source(&node.value, self.wr.target(), single_quote, false);

(single_quote, value)
}
Expand Down Expand Up @@ -3070,7 +3071,12 @@ fn unescape_tpl_lit(s: &str, is_synthesized: bool) -> String {
result
}

fn escape_without_source(v: &str, target: EsVersion, single_quote: bool) -> String {
fn escape_without_source(
v: &str,
target: EsVersion,
single_quote: bool,
emit_non_ascii_as_unicode: bool,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

setting this to true is equivalent to disabling jsesc minimal option

https://github.com/mathiasbynens/jsesc#minimal

) -> String {
let mut buf = String::with_capacity(v.len());
let mut iter = v.chars().peekable();

Expand All @@ -3092,7 +3098,6 @@ fn escape_without_source(v: &str, target: EsVersion, single_quote: bool) -> Stri
buf.push_str("\\\\")
}
}

'\'' if single_quote => buf.push_str("\\'"),
'"' if !single_quote => buf.push_str("\\\""),

Expand All @@ -3110,16 +3115,33 @@ fn escape_without_source(v: &str, target: EsVersion, single_quote: bool) -> Stri
'\u{7f}'..='\u{ff}' => {
let _ = write!(buf, "\\x{:x}", c as u8);
}

'\u{2028}' => {
buf.push_str("\\u2028");
}
'\u{2029}' => {
buf.push_str("\\u2029");
}

_ => {
buf.push(c);
if !emit_non_ascii_as_unicode || c.is_ascii() {
buf.push(c);
} else if c > '\u{FFFF}' {
// if we've got this far the char isn't reserved and if the callee has specified
// we should output unicode for non-ascii chars then we have
// to make sure we output unicode that is safe for the target
// Es5 does not support code point escapes and so surrograte formula must be
// used
if target <= EsVersion::Es5 {
// https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
let h = ((c as u32 - 0x10000) / 0x400) + 0xd800;
let l = (c as u32 - 0x10000) % 0x400 + 0xdc00;

let _ = write!(buf, "\\u{:04X}\\u{:04X}", h, l);
} else {
let _ = write!(buf, "\\u{{{:04X}}}", c as u32);
}
} else {
let _ = write!(buf, "\\u{:04X}", c as u16);
}
}
}
}
Expand All @@ -3134,25 +3156,31 @@ fn escape_with_source(
s: &str,
single_quote: Option<bool>,
) -> String {
if target <= EsVersion::Es5 {
return escape_without_source(s, target, single_quote.unwrap_or(false));
}

if span.is_dummy() {
return escape_without_source(s, target, single_quote.unwrap_or(false));
return escape_without_source(s, target, single_quote.unwrap_or(false), false);
}

//
let orig = cm.span_to_snippet(span);
let orig = match orig {
Ok(orig) => orig,
Err(v) => {
return escape_without_source(s, target, single_quote.unwrap_or(false));
return escape_without_source(s, target, single_quote.unwrap_or(false), false);
}
};

if target <= EsVersion::Es5 {
let emit_non_ascii_as_unicode = orig.contains("\\u");

return escape_without_source(
s,
target,
single_quote.unwrap_or(false),
emit_non_ascii_as_unicode,
);
}

if single_quote.is_some() && orig.len() <= 2 {
return escape_without_source(s, target, single_quote.unwrap_or(false));
return escape_without_source(s, target, single_quote.unwrap_or(false), false);
}

let mut orig = &*orig;
Expand All @@ -3162,7 +3190,7 @@ fn escape_with_source(
{
orig = &orig[1..orig.len() - 1];
} else if single_quote.is_some() {
return escape_without_source(s, target, single_quote.unwrap_or(false));
return escape_without_source(s, target, single_quote.unwrap_or(false), false);
}

let mut buf = String::with_capacity(s.len());
Expand Down
44 changes: 42 additions & 2 deletions crates/swc_ecma_codegen/src/tests.rs
Expand Up @@ -554,7 +554,21 @@ CONTENT\r
fn test_escape_without_source() {
fn es2020(src: &str, expected: &str) {
assert_eq!(
super::escape_without_source(src, EsVersion::Es2020, true),
super::escape_without_source(src, EsVersion::Es2020, true, false),
expected
)
}

fn es2020_nonascii(src: &str, expected: &str) {
assert_eq!(
super::escape_without_source(src, EsVersion::Es2020, true, true),
expected
)
}

fn es5(src: &str, expected: &str) {
assert_eq!(
super::escape_without_source(src, EsVersion::Es5, true, true),
expected
)
}
Expand All @@ -576,6 +590,13 @@ fn test_escape_without_source() {
es2020("\u{1000}", "\u{1000}");
es2020("\u{ff}", "\\xff");
es2020("\u{10ffff}", "\u{10ffff}");

es2020_nonascii("\u{FEFF}abc", "\\uFEFFabc");
es2020_nonascii("\u{10ffff}", "\\u{10FFFF}");

es5("\u{FEFF}abc", "\\uFEFFabc");
es5("\u{10ffff}", "\\uDBFF\\uDFFF");
es5("\u{FFFF}", "\\uFFFF");
}

#[test]
Expand Down Expand Up @@ -612,7 +633,7 @@ fn issue_1619_2() {
#[test]
fn issue_1619_3() {
assert_eq!(
escape_without_source("\x00\x31", EsVersion::Es3, true),
escape_without_source("\x00\x31", EsVersion::Es3, true, false),
"\\x001"
);
}
Expand Down Expand Up @@ -657,3 +678,22 @@ impl Write for Buf {
fn issue_2213() {
assert_min("a - -b * c", "a- -b*c")
}

#[test]
fn issue3617() {
let from = r"// a string of all valid unicode whitespaces
module.exports = '\u0009\u000A\u000B\u000C\u000D\u0020\u00A0\u1680\u2000\u2001\u2002' +
'\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u202F\u205F\u3000\u2028\u2029\uFEFF';";
let expected = r#"// a string of all valid unicode whitespaces
module.exports = '\t\n\v\f\r \xa0\u1680\u2000\u2001\u2002' + '\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u202F\u205F\u3000\u2028\u2029\uFEFF';"#;

let out = parse_then_emit(from, Default::default(), Syntax::default(), EsVersion::Es5);

dbg!(&out);
dbg!(&expected);

assert_eq!(
DebugUsingDisplay(out.trim()),
DebugUsingDisplay(expected.trim()),
);
}