From d9433ff1cea32c7a176422d19de2c02030da3269 Mon Sep 17 00:00:00 2001 From: William Tetlow Date: Sat, 19 Feb 2022 23:22:44 +0000 Subject: [PATCH 01/10] fix unicode escpaing in es5 --- crates/swc_ecma_codegen/src/lib.rs | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/crates/swc_ecma_codegen/src/lib.rs b/crates/swc_ecma_codegen/src/lib.rs index 8af615317622..5c838149d103 100644 --- a/crates/swc_ecma_codegen/src/lib.rs +++ b/crates/swc_ecma_codegen/src/lib.rs @@ -3110,16 +3110,14 @@ fn escape_without_source(v: &str, target: EsVersion, single_quote: bool) -> Stri '\u{7f}'..='\u{ff}' => { let _ = write!(buf, "\\x{:x}", c as u8); } - - '\u{2028}' => { - buf.push_str("\\u2028"); - } - '\u{2029}' => { - buf.push_str("\\u2029"); - } - _ => { - buf.push(c); + if let '\u{0001}'..='\u{FFFF}' = c { + // if char is unicode up to U+FFFF and it's not been transformed by prior match + // it's still a valid ES5 unicode char so lets keep it as is + let _ = write!(buf, "\\u{:x}", c as u16); + } else { + buf.push(c); + } } } } From b6b4ce2f9c0714da002083ed5bb22d36b2412f0a Mon Sep 17 00:00:00 2001 From: William Tetlow Date: Sun, 20 Feb 2022 18:36:47 +0000 Subject: [PATCH 02/10] replicate same logic as babel --- Cargo.lock | 1 + crates/swc_ecma_codegen/Cargo.toml | 1 + crates/swc_ecma_codegen/src/lib.rs | 49 +++++++++++++++++++----------- 3 files changed, 34 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d48d203cae7f..8a91821b7040 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2900,6 +2900,7 @@ dependencies = [ "memchr", "num-bigint", "once_cell", + "regex", "sourcemap", "swc_atoms", "swc_common", diff --git a/crates/swc_ecma_codegen/Cargo.toml b/crates/swc_ecma_codegen/Cargo.toml index c7927a5ed558..a7d15a3b464a 100644 --- a/crates/swc_ecma_codegen/Cargo.toml +++ b/crates/swc_ecma_codegen/Cargo.toml @@ -14,6 +14,7 @@ bitflags = "1" memchr = "2.4.1" num-bigint = {version = "0.2", features = ["serde"]} once_cell = "1.9.0" +regex = "1" sourcemap = "6" swc_atoms = {version = "0.2", path = "../swc_atoms"} swc_common = {version = "0.17.3", path = "../swc_common"} diff --git a/crates/swc_ecma_codegen/src/lib.rs b/crates/swc_ecma_codegen/src/lib.rs index 5c838149d103..073584e551f1 100644 --- a/crates/swc_ecma_codegen/src/lib.rs +++ b/crates/swc_ecma_codegen/src/lib.rs @@ -507,7 +507,8 @@ where } StrKind::Synthesized => { let single_quote = false; - let value = escape_without_source(&node.value, self.wr.target(), single_quote); + let value = + escape_without_source(&node.value, self.wr.target(), single_quote, false); (single_quote, value) } @@ -3070,7 +3071,12 @@ fn unescape_tpl_lit(s: &str, is_synthesized: bool) -> String { result } -fn escape_without_source(v: &str, target: EsVersion, single_quote: bool) -> String { +fn escape_without_source( + v: &str, + target: EsVersion, + single_quote: bool, + emit_non_ascii_as_unicode: bool, +) -> String { let mut buf = String::with_capacity(v.len()); let mut iter = v.chars().peekable(); @@ -3092,7 +3098,6 @@ fn escape_without_source(v: &str, target: EsVersion, single_quote: bool) -> Stri buf.push_str("\\\\") } } - '\'' if single_quote => buf.push_str("\\'"), '"' if !single_quote => buf.push_str("\\\""), @@ -3110,13 +3115,17 @@ fn escape_without_source(v: &str, target: EsVersion, single_quote: bool) -> Stri '\u{7f}'..='\u{ff}' => { let _ = write!(buf, "\\x{:x}", c as u8); } + '\u{2028}' => { + buf.push_str("\\u2028"); + } + '\u{2029}' => { + buf.push_str("\\u2029"); + } _ => { - if let '\u{0001}'..='\u{FFFF}' = c { - // if char is unicode up to U+FFFF and it's not been transformed by prior match - // it's still a valid ES5 unicode char so lets keep it as is - let _ = write!(buf, "\\u{:x}", c as u16); - } else { + if !emit_non_ascii_as_unicode || c.is_ascii() { buf.push(c); + } else { + let _ = write!(buf, "\\u{:04x}", c as u32); } } } @@ -3132,25 +3141,31 @@ fn escape_with_source( s: &str, single_quote: Option, ) -> String { - if target <= EsVersion::Es5 { - return escape_without_source(s, target, single_quote.unwrap_or(false)); - } - if span.is_dummy() { - return escape_without_source(s, target, single_quote.unwrap_or(false)); + return escape_without_source(s, target, single_quote.unwrap_or(false), false); } - // let orig = cm.span_to_snippet(span); let orig = match orig { Ok(orig) => orig, Err(v) => { - return escape_without_source(s, target, single_quote.unwrap_or(false)); + return escape_without_source(s, target, single_quote.unwrap_or(false), false); } }; + if target <= EsVersion::Es5 { + let emit_non_ascii_as_unicode = regex::Regex::new(r#"(?i)\\[u]"#).unwrap().is_match(&orig); + + return escape_without_source( + s, + target, + single_quote.unwrap_or(false), + emit_non_ascii_as_unicode, + ); + } + if single_quote.is_some() && orig.len() <= 2 { - return escape_without_source(s, target, single_quote.unwrap_or(false)); + return escape_without_source(s, target, single_quote.unwrap_or(false), false); } let mut orig = &*orig; @@ -3160,7 +3175,7 @@ fn escape_with_source( { orig = &orig[1..orig.len() - 1]; } else if single_quote.is_some() { - return escape_without_source(s, target, single_quote.unwrap_or(false)); + return escape_without_source(s, target, single_quote.unwrap_or(false), false); } let mut buf = String::with_capacity(s.len()); From 3bba63f089822e10550e07ff2e20824aa9306240 Mon Sep 17 00:00:00 2001 From: William Tetlow Date: Mon, 21 Feb 2022 17:47:23 +0000 Subject: [PATCH 03/10] fix tests and generate surrogates --- ...tringWhitespaceEscapes2_ES6_es5.1.normal.js | 2 +- ...ateStringWhitespaceEscapes2_es5.1.normal.js | 2 +- ...endedEscapesInStrings06_ES5_es5.1.normal.js | 2 +- ...endedEscapesInStrings06_ES6_es5.1.normal.js | 2 +- ...endedEscapesInStrings08_ES5_es5.1.normal.js | 2 +- ...endedEscapesInStrings08_ES6_es5.1.normal.js | 2 +- ...endedEscapesInStrings09_ES5_es5.1.normal.js | 2 +- ...endedEscapesInStrings09_ES6_es5.1.normal.js | 2 +- ...endedEscapesInStrings13_ES5_es5.1.normal.js | 2 +- ...endedEscapesInStrings13_ES6_es5.1.normal.js | 2 +- ...endedEscapesInStrings15_ES5_es5.1.normal.js | 2 +- ...endedEscapesInStrings15_ES6_es5.1.normal.js | 2 +- ...endedEscapesInStrings16_ES5_es5.1.normal.js | 2 +- ...endedEscapesInStrings16_ES6_es5.1.normal.js | 2 +- ...dedEscapesInTemplates06_ES5_es5.1.normal.js | 2 +- ...dedEscapesInTemplates06_ES6_es5.1.normal.js | 2 +- ...dedEscapesInTemplates08_ES5_es5.1.normal.js | 2 +- ...dedEscapesInTemplates08_ES6_es5.1.normal.js | 2 +- ...dedEscapesInTemplates09_ES5_es5.1.normal.js | 2 +- ...dedEscapesInTemplates09_ES6_es5.1.normal.js | 2 +- ...dedEscapesInTemplates13_ES5_es5.1.normal.js | 2 +- ...dedEscapesInTemplates13_ES6_es5.1.normal.js | 2 +- ...dedEscapesInTemplates15_ES5_es5.1.normal.js | 2 +- ...dedEscapesInTemplates15_ES6_es5.1.normal.js | 2 +- ...dedEscapesInTemplates16_ES5_es5.1.normal.js | 2 +- ...dedEscapesInTemplates16_ES6_es5.1.normal.js | 2 +- crates/swc_ecma_codegen/src/lib.rs | 18 +++++++++++++++++- crates/swc_ecma_codegen/src/tests.rs | 4 ++-- .../fixture/issues/2257/full/output.js | 6 +++--- 29 files changed, 48 insertions(+), 32 deletions(-) diff --git a/crates/swc/tests/tsc-references/templateStringWhitespaceEscapes2_ES6_es5.1.normal.js b/crates/swc/tests/tsc-references/templateStringWhitespaceEscapes2_ES6_es5.1.normal.js index f45ffb9a6a3a..ef7e035356be 100644 --- a/crates/swc/tests/tsc-references/templateStringWhitespaceEscapes2_ES6_es5.1.normal.js +++ b/crates/swc/tests/tsc-references/templateStringWhitespaceEscapes2_ES6_es5.1.normal.js @@ -1,3 +1,3 @@ //@target: es6 // , , , , , -"\t\v\f \xa0"; +"\t\v\f \xa0\uFEFF"; diff --git a/crates/swc/tests/tsc-references/templateStringWhitespaceEscapes2_es5.1.normal.js b/crates/swc/tests/tsc-references/templateStringWhitespaceEscapes2_es5.1.normal.js index 89f96afaa8f2..4e72eb4a801c 100644 --- a/crates/swc/tests/tsc-references/templateStringWhitespaceEscapes2_es5.1.normal.js +++ b/crates/swc/tests/tsc-references/templateStringWhitespaceEscapes2_es5.1.normal.js @@ -1,2 +1,2 @@ // , , , , , -"\t\v\f \xa0"; +"\t\v\f \xa0\uFEFF"; diff --git a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings06_ES5_es5.1.normal.js b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings06_ES5_es5.1.normal.js index 6088f471691a..70770b241500 100644 --- a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings06_ES5_es5.1.normal.js +++ b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings06_ES5_es5.1.normal.js @@ -1,4 +1,4 @@ // @target: es5 // ES6 Spec - 10.1.1 Static Semantics: UTF16Encoding (cp) // 1. Assert: 0 ≤ cp ≤ 0x10FFFF. -var x = "􏿿"; +var x = "\uDBFF\uDFFF"; diff --git a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings06_ES6_es5.1.normal.js b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings06_ES6_es5.1.normal.js index 1ffb788159a4..25613d866c42 100644 --- a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings06_ES6_es5.1.normal.js +++ b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings06_ES6_es5.1.normal.js @@ -1,4 +1,4 @@ // @target: es6 // ES6 Spec - 10.1.1 Static Semantics: UTF16Encoding (cp) // 1. Assert: 0 ≤ cp ≤ 0x10FFFF. -var x = "􏿿"; +var x = "\uDBFF\uDFFF"; diff --git a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings08_ES5_es5.1.normal.js b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings08_ES5_es5.1.normal.js index a7ba86229a7d..d7da70015b20 100644 --- a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings08_ES5_es5.1.normal.js +++ b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings08_ES5_es5.1.normal.js @@ -2,4 +2,4 @@ // ES6 Spec - 10.1.1 Static Semantics: UTF16Encoding (cp) // 2. If cp ≤ 65535, return cp. // (FFFF == 65535) -var x = "￿"; +var x = "\uFFFF"; diff --git a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings08_ES6_es5.1.normal.js b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings08_ES6_es5.1.normal.js index c9f7a321c679..88cf4d389668 100644 --- a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings08_ES6_es5.1.normal.js +++ b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings08_ES6_es5.1.normal.js @@ -2,4 +2,4 @@ // ES6 Spec - 10.1.1 Static Semantics: UTF16Encoding (cp) // 2. If cp ≤ 65535, return cp. // (FFFF == 65535) -var x = "￿"; +var x = "\uFFFF"; diff --git a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings09_ES5_es5.1.normal.js b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings09_ES5_es5.1.normal.js index 370c189a9026..731d7275bb78 100644 --- a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings09_ES5_es5.1.normal.js +++ b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings09_ES5_es5.1.normal.js @@ -2,4 +2,4 @@ // ES6 Spec - 10.1.1 Static Semantics: UTF16Encoding (cp) // 2. If cp ≤ 65535, return cp. // (10000 == 65536) -var x = "𐀀"; +var x = "\uD800\uDC00"; diff --git a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings09_ES6_es5.1.normal.js b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings09_ES6_es5.1.normal.js index 1f4a9590c952..89077e978c13 100644 --- a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings09_ES6_es5.1.normal.js +++ b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings09_ES6_es5.1.normal.js @@ -2,4 +2,4 @@ // ES6 Spec - 10.1.1 Static Semantics: UTF16Encoding (cp) // 2. If cp ≤ 65535, return cp. // (10000 == 65536) -var x = "𐀀"; +var x = "\uD800\uDC00"; diff --git a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings13_ES5_es5.1.normal.js b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings13_ES5_es5.1.normal.js index 4382fd9c20cf..cc4b9052f398 100644 --- a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings13_ES5_es5.1.normal.js +++ b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings13_ES5_es5.1.normal.js @@ -1,2 +1,2 @@ // @target: es5 -var x = "󝷝"; +var x = "\uDB37\uDDDD"; diff --git a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings13_ES6_es5.1.normal.js b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings13_ES6_es5.1.normal.js index fb3cf9c67315..ea3458a25a04 100644 --- a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings13_ES6_es5.1.normal.js +++ b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings13_ES6_es5.1.normal.js @@ -1,2 +1,2 @@ // @target: es6 -var x = "󝷝"; +var x = "\uDB37\uDDDD"; diff --git a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings15_ES5_es5.1.normal.js b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings15_ES5_es5.1.normal.js index 98ceb7cb6230..e8708f018ece 100644 --- a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings15_ES5_es5.1.normal.js +++ b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings15_ES5_es5.1.normal.js @@ -1,2 +1,2 @@ // @target: es5 -var x = "ꯍ㑖碐"; +var x = "\uABCD\uEF12\u3456\u7890"; diff --git a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings15_ES6_es5.1.normal.js b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings15_ES6_es5.1.normal.js index 288ef567c13b..70dd6e167b91 100644 --- a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings15_ES6_es5.1.normal.js +++ b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings15_ES6_es5.1.normal.js @@ -1,2 +1,2 @@ // @target: es6 -var x = "ꯍ㑖碐"; +var x = "\uABCD\uEF12\u3456\u7890"; diff --git a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings16_ES5_es5.1.normal.js b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings16_ES5_es5.1.normal.js index 98ceb7cb6230..e8708f018ece 100644 --- a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings16_ES5_es5.1.normal.js +++ b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings16_ES5_es5.1.normal.js @@ -1,2 +1,2 @@ // @target: es5 -var x = "ꯍ㑖碐"; +var x = "\uABCD\uEF12\u3456\u7890"; diff --git a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings16_ES6_es5.1.normal.js b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings16_ES6_es5.1.normal.js index 288ef567c13b..70dd6e167b91 100644 --- a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings16_ES6_es5.1.normal.js +++ b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings16_ES6_es5.1.normal.js @@ -1,2 +1,2 @@ // @target: es6 -var x = "ꯍ㑖碐"; +var x = "\uABCD\uEF12\u3456\u7890"; diff --git a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates06_ES5_es5.1.normal.js b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates06_ES5_es5.1.normal.js index 6088f471691a..70770b241500 100644 --- a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates06_ES5_es5.1.normal.js +++ b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates06_ES5_es5.1.normal.js @@ -1,4 +1,4 @@ // @target: es5 // ES6 Spec - 10.1.1 Static Semantics: UTF16Encoding (cp) // 1. Assert: 0 ≤ cp ≤ 0x10FFFF. -var x = "􏿿"; +var x = "\uDBFF\uDFFF"; diff --git a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates06_ES6_es5.1.normal.js b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates06_ES6_es5.1.normal.js index 1ffb788159a4..25613d866c42 100644 --- a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates06_ES6_es5.1.normal.js +++ b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates06_ES6_es5.1.normal.js @@ -1,4 +1,4 @@ // @target: es6 // ES6 Spec - 10.1.1 Static Semantics: UTF16Encoding (cp) // 1. Assert: 0 ≤ cp ≤ 0x10FFFF. -var x = "􏿿"; +var x = "\uDBFF\uDFFF"; diff --git a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates08_ES5_es5.1.normal.js b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates08_ES5_es5.1.normal.js index a7ba86229a7d..d7da70015b20 100644 --- a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates08_ES5_es5.1.normal.js +++ b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates08_ES5_es5.1.normal.js @@ -2,4 +2,4 @@ // ES6 Spec - 10.1.1 Static Semantics: UTF16Encoding (cp) // 2. If cp ≤ 65535, return cp. // (FFFF == 65535) -var x = "￿"; +var x = "\uFFFF"; diff --git a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates08_ES6_es5.1.normal.js b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates08_ES6_es5.1.normal.js index c9f7a321c679..88cf4d389668 100644 --- a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates08_ES6_es5.1.normal.js +++ b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates08_ES6_es5.1.normal.js @@ -2,4 +2,4 @@ // ES6 Spec - 10.1.1 Static Semantics: UTF16Encoding (cp) // 2. If cp ≤ 65535, return cp. // (FFFF == 65535) -var x = "￿"; +var x = "\uFFFF"; diff --git a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates09_ES5_es5.1.normal.js b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates09_ES5_es5.1.normal.js index 370c189a9026..731d7275bb78 100644 --- a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates09_ES5_es5.1.normal.js +++ b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates09_ES5_es5.1.normal.js @@ -2,4 +2,4 @@ // ES6 Spec - 10.1.1 Static Semantics: UTF16Encoding (cp) // 2. If cp ≤ 65535, return cp. // (10000 == 65536) -var x = "𐀀"; +var x = "\uD800\uDC00"; diff --git a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates09_ES6_es5.1.normal.js b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates09_ES6_es5.1.normal.js index 1f4a9590c952..89077e978c13 100644 --- a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates09_ES6_es5.1.normal.js +++ b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates09_ES6_es5.1.normal.js @@ -2,4 +2,4 @@ // ES6 Spec - 10.1.1 Static Semantics: UTF16Encoding (cp) // 2. If cp ≤ 65535, return cp. // (10000 == 65536) -var x = "𐀀"; +var x = "\uD800\uDC00"; diff --git a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates13_ES5_es5.1.normal.js b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates13_ES5_es5.1.normal.js index 4382fd9c20cf..cc4b9052f398 100644 --- a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates13_ES5_es5.1.normal.js +++ b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates13_ES5_es5.1.normal.js @@ -1,2 +1,2 @@ // @target: es5 -var x = "󝷝"; +var x = "\uDB37\uDDDD"; diff --git a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates13_ES6_es5.1.normal.js b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates13_ES6_es5.1.normal.js index fb3cf9c67315..ea3458a25a04 100644 --- a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates13_ES6_es5.1.normal.js +++ b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates13_ES6_es5.1.normal.js @@ -1,2 +1,2 @@ // @target: es6 -var x = "󝷝"; +var x = "\uDB37\uDDDD"; diff --git a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates15_ES5_es5.1.normal.js b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates15_ES5_es5.1.normal.js index 98ceb7cb6230..e8708f018ece 100644 --- a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates15_ES5_es5.1.normal.js +++ b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates15_ES5_es5.1.normal.js @@ -1,2 +1,2 @@ // @target: es5 -var x = "ꯍ㑖碐"; +var x = "\uABCD\uEF12\u3456\u7890"; diff --git a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates15_ES6_es5.1.normal.js b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates15_ES6_es5.1.normal.js index 288ef567c13b..70dd6e167b91 100644 --- a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates15_ES6_es5.1.normal.js +++ b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates15_ES6_es5.1.normal.js @@ -1,2 +1,2 @@ // @target: es6 -var x = "ꯍ㑖碐"; +var x = "\uABCD\uEF12\u3456\u7890"; diff --git a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates16_ES5_es5.1.normal.js b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates16_ES5_es5.1.normal.js index 98ceb7cb6230..e8708f018ece 100644 --- a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates16_ES5_es5.1.normal.js +++ b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates16_ES5_es5.1.normal.js @@ -1,2 +1,2 @@ // @target: es5 -var x = "ꯍ㑖碐"; +var x = "\uABCD\uEF12\u3456\u7890"; diff --git a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates16_ES6_es5.1.normal.js b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates16_ES6_es5.1.normal.js index 288ef567c13b..70dd6e167b91 100644 --- a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates16_ES6_es5.1.normal.js +++ b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates16_ES6_es5.1.normal.js @@ -1,2 +1,2 @@ // @target: es6 -var x = "ꯍ㑖碐"; +var x = "\uABCD\uEF12\u3456\u7890"; diff --git a/crates/swc_ecma_codegen/src/lib.rs b/crates/swc_ecma_codegen/src/lib.rs index 073584e551f1..6a0b57d83f08 100644 --- a/crates/swc_ecma_codegen/src/lib.rs +++ b/crates/swc_ecma_codegen/src/lib.rs @@ -3124,8 +3124,23 @@ fn escape_without_source( _ => { if !emit_non_ascii_as_unicode || c.is_ascii() { buf.push(c); + } else if c > '\u{FFFF}' { + // if we've got this far the char isn't reserved and if the callee has specified + // we should output unicode for non-ascii chars then we have + // to make sure we output unicode that is safe for the target + // Es5 does not support code point escapes and so surrograte formula must be + // used + if target <= EsVersion::Es5 { + // https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae + let h = ((c as u32 - 0x10000) / 0x400) + 0xd800; + let l = (c as u32 - 0x10000) % 0x400 + 0xdc00; + + let _ = write!(buf, "\\u{:04X}\\u{:04X}", h, l); + } else { + let _ = write!(buf, "\\u{{{:04X}}}", c as u32); + } } else { - let _ = write!(buf, "\\u{:04x}", c as u32); + let _ = write!(buf, "\\u{:04X}", c as u16); } } } @@ -3142,6 +3157,7 @@ fn escape_with_source( single_quote: Option, ) -> String { if span.is_dummy() { + println!("dummy"); return escape_without_source(s, target, single_quote.unwrap_or(false), false); } diff --git a/crates/swc_ecma_codegen/src/tests.rs b/crates/swc_ecma_codegen/src/tests.rs index 8f3743ba4963..7879b01f508f 100644 --- a/crates/swc_ecma_codegen/src/tests.rs +++ b/crates/swc_ecma_codegen/src/tests.rs @@ -554,7 +554,7 @@ CONTENT\r fn test_escape_without_source() { fn es2020(src: &str, expected: &str) { assert_eq!( - super::escape_without_source(src, EsVersion::Es2020, true), + super::escape_without_source(src, EsVersion::Es2020, true, false), expected ) } @@ -612,7 +612,7 @@ fn issue_1619_2() { #[test] fn issue_1619_3() { assert_eq!( - escape_without_source("\x00\x31", EsVersion::Es3, true), + escape_without_source("\x00\x31", EsVersion::Es3, true, false), "\\x001" ); } diff --git a/crates/swc_ecma_minifier/tests/compress/fixture/issues/2257/full/output.js b/crates/swc_ecma_minifier/tests/compress/fixture/issues/2257/full/output.js index 02b7ed09435d..5a7ed2a134f1 100644 --- a/crates/swc_ecma_minifier/tests/compress/fixture/issues/2257/full/output.js +++ b/crates/swc_ecma_minifier/tests/compress/fixture/issues/2257/full/output.js @@ -9412,8 +9412,8 @@ } catch (err) { return (function(input) { for(var replaceMap = { - "%FE%FF": "��", - "%FF%FE": "��" + "%FE%FF": "\uFFFD\uFFFD", + "%FF%FE": "\uFFFD\uFFFD" }, match = multiMatcher.exec(input); match;){ try { replaceMap[match[0]] = decodeURIComponent(match[0]); @@ -9423,7 +9423,7 @@ } match = multiMatcher.exec(input); } - replaceMap["%C2"] = "�"; + replaceMap["%C2"] = "\uFFFD\uFFFD"; for(var entries = Object.keys(replaceMap), i = 0; i < entries.length; i++){ var key = entries[i]; input = input.replace(new RegExp(key, "g"), replaceMap[key]); From 88befb449519bdbf93e75e00191e16f171fdc86e Mon Sep 17 00:00:00 2001 From: William Tetlow Date: Mon, 21 Feb 2022 17:57:01 +0000 Subject: [PATCH 04/10] add unit tests for escape_without_source --- crates/swc_ecma_codegen/src/tests.rs | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/crates/swc_ecma_codegen/src/tests.rs b/crates/swc_ecma_codegen/src/tests.rs index 7879b01f508f..2689531a25f6 100644 --- a/crates/swc_ecma_codegen/src/tests.rs +++ b/crates/swc_ecma_codegen/src/tests.rs @@ -559,6 +559,20 @@ fn test_escape_without_source() { ) } + fn es2020_nonascii(src: &str, expected: &str) { + assert_eq!( + super::escape_without_source(src, EsVersion::Es2020, true, true), + expected + ) + } + + fn es5(src: &str, expected: &str) { + assert_eq!( + super::escape_without_source(src, EsVersion::Es5, true, true), + expected + ) + } + es2020("abcde", "abcde"); es2020( "\x00\r\n\u{85}\u{2028}\u{2029};", @@ -576,6 +590,13 @@ fn test_escape_without_source() { es2020("\u{1000}", "\u{1000}"); es2020("\u{ff}", "\\xff"); es2020("\u{10ffff}", "\u{10ffff}"); + + es2020_nonascii("\u{FEFF}abc", "\\uFEFFabc"); + es2020_nonascii("\u{10ffff}", "\\u{10FFFF}"); + + es5("\u{FEFF}abc", "\\uFEFFabc"); + es5("\u{10ffff}", "\\uDBFF\\uDFFF"); + es5("\u{FFFF}", "\\uFFFF"); } #[test] From da40f012d9b7bcb000933df1640f44ba64c618ce Mon Sep 17 00:00:00 2001 From: William Tetlow Date: Mon, 21 Feb 2022 18:23:32 +0000 Subject: [PATCH 05/10] revert change to 2257 --- .../tests/compress/fixture/issues/2257/full/output.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/swc_ecma_minifier/tests/compress/fixture/issues/2257/full/output.js b/crates/swc_ecma_minifier/tests/compress/fixture/issues/2257/full/output.js index 760eab33f571..a353014fe393 100644 --- a/crates/swc_ecma_minifier/tests/compress/fixture/issues/2257/full/output.js +++ b/crates/swc_ecma_minifier/tests/compress/fixture/issues/2257/full/output.js @@ -9412,8 +9412,8 @@ } catch (err) { return (function(input) { for(var replaceMap = { - "%FE%FF": "\uFFFD\uFFFD", - "%FF%FE": "\uFFFD\uFFFD" + "%FE%FF": "��", + "%FF%FE": "��" }, match = multiMatcher.exec(input); match;){ try { replaceMap[match[0]] = decodeURIComponent(match[0]); @@ -9423,7 +9423,7 @@ } match = multiMatcher.exec(input); } - replaceMap["%C2"] = "\uFFFD\uFFFD"; + replaceMap["%C2"] = "�"; for(var entries = Object.keys(replaceMap), i = 0; i < entries.length; i++){ var key = entries[i]; input = input.replace(new RegExp(key, "g"), replaceMap[key]); From e92cc069e84cd15dc518e9bddf90fd9a57e55853 Mon Sep 17 00:00:00 2001 From: William Tetlow Date: Mon, 21 Feb 2022 20:18:07 +0000 Subject: [PATCH 06/10] add regression test for issue 3617 --- crates/swc_ecma_codegen/src/tests.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/crates/swc_ecma_codegen/src/tests.rs b/crates/swc_ecma_codegen/src/tests.rs index 2689531a25f6..0e6fb6c83f03 100644 --- a/crates/swc_ecma_codegen/src/tests.rs +++ b/crates/swc_ecma_codegen/src/tests.rs @@ -678,3 +678,22 @@ impl Write for Buf { fn issue_2213() { assert_min("a - -b * c", "a- -b*c") } + +#[test] +fn issue3617() { + let from = r"// a string of all valid unicode whitespaces + module.exports = '\u0009\u000A\u000B\u000C\u000D\u0020\u00A0\u1680\u2000\u2001\u2002' + + '\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u202F\u205F\u3000\u2028\u2029\uFEFF';"; + let expected = r#"// a string of all valid unicode whitespaces +module.exports = '\t\n\v\f\r \xa0\u1680\u2000\u2001\u2002' + '\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u202F\u205F\u3000\u2028\u2029\uFEFF';"#; + + let out = parse_then_emit(from, Default::default(), Syntax::default(), EsVersion::Es5); + + dbg!(&out); + dbg!(&expected); + + assert_eq!( + DebugUsingDisplay(out.trim()), + DebugUsingDisplay(expected.trim()), + ); +} From 287ec00e73d761eb360d91593bb93fd0226ffbe3 Mon Sep 17 00:00:00 2001 From: William Tetlow Date: Mon, 21 Feb 2022 20:31:14 +0000 Subject: [PATCH 07/10] lazily initialize regex --- crates/swc_ecma_codegen/src/lib.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/crates/swc_ecma_codegen/src/lib.rs b/crates/swc_ecma_codegen/src/lib.rs index bcb58229309b..a198ef148ab3 100644 --- a/crates/swc_ecma_codegen/src/lib.rs +++ b/crates/swc_ecma_codegen/src/lib.rs @@ -3156,6 +3156,9 @@ fn escape_with_source( s: &str, single_quote: Option, ) -> String { + static UNICODE_CODEPOINT: Lazy = + Lazy::new(|| regex::Regex::new(r#"(?i)\\[u]"#).unwrap()); + if span.is_dummy() { println!("dummy"); return escape_without_source(s, target, single_quote.unwrap_or(false), false); @@ -3170,7 +3173,7 @@ fn escape_with_source( }; if target <= EsVersion::Es5 { - let emit_non_ascii_as_unicode = regex::Regex::new(r#"(?i)\\[u]"#).unwrap().is_match(&orig); + let emit_non_ascii_as_unicode = UNICODE_CODEPOINT.is_match(&orig); return escape_without_source( s, From fd31c41153a56f01d6e820e0a49ba302fb34b1b3 Mon Sep 17 00:00:00 2001 From: William Tetlow Date: Mon, 21 Feb 2022 20:33:45 +0000 Subject: [PATCH 08/10] remove debug log --- crates/swc_ecma_codegen/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/swc_ecma_codegen/src/lib.rs b/crates/swc_ecma_codegen/src/lib.rs index a198ef148ab3..d2b52e302c0b 100644 --- a/crates/swc_ecma_codegen/src/lib.rs +++ b/crates/swc_ecma_codegen/src/lib.rs @@ -3160,7 +3160,6 @@ fn escape_with_source( Lazy::new(|| regex::Regex::new(r#"(?i)\\[u]"#).unwrap()); if span.is_dummy() { - println!("dummy"); return escape_without_source(s, target, single_quote.unwrap_or(false), false); } From 4a11bab72e265ab7cdeecb1746262a6c48b317fb Mon Sep 17 00:00:00 2001 From: William Tetlow Date: Fri, 25 Feb 2022 09:44:56 +0000 Subject: [PATCH 09/10] do string contains instead of regex --- crates/swc_ecma_codegen/src/lib.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/crates/swc_ecma_codegen/src/lib.rs b/crates/swc_ecma_codegen/src/lib.rs index d2b52e302c0b..0279dcf3da8d 100644 --- a/crates/swc_ecma_codegen/src/lib.rs +++ b/crates/swc_ecma_codegen/src/lib.rs @@ -3156,9 +3156,6 @@ fn escape_with_source( s: &str, single_quote: Option, ) -> String { - static UNICODE_CODEPOINT: Lazy = - Lazy::new(|| regex::Regex::new(r#"(?i)\\[u]"#).unwrap()); - if span.is_dummy() { return escape_without_source(s, target, single_quote.unwrap_or(false), false); } @@ -3172,7 +3169,7 @@ fn escape_with_source( }; if target <= EsVersion::Es5 { - let emit_non_ascii_as_unicode = UNICODE_CODEPOINT.is_match(&orig); + let emit_non_ascii_as_unicode = orig.contains("\\u"); return escape_without_source( s, From 0ef810b4a4d699c0ebf59a02d5cc139e0f41c062 Mon Sep 17 00:00:00 2001 From: William Tetlow Date: Fri, 25 Feb 2022 12:31:20 +0000 Subject: [PATCH 10/10] remove unused crate --- Cargo.lock | 1 - crates/swc_ecma_codegen/Cargo.toml | 1 - 2 files changed, 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bbc2c4cf50d0..bbe17f4ef815 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2900,7 +2900,6 @@ dependencies = [ "memchr", "num-bigint", "once_cell", - "regex", "sourcemap", "swc_atoms", "swc_common", diff --git a/crates/swc_ecma_codegen/Cargo.toml b/crates/swc_ecma_codegen/Cargo.toml index 4132a14b63f3..1f49cbe42d9a 100644 --- a/crates/swc_ecma_codegen/Cargo.toml +++ b/crates/swc_ecma_codegen/Cargo.toml @@ -14,7 +14,6 @@ bitflags = "1" memchr = "2.4.1" num-bigint = {version = "0.2", features = ["serde"]} once_cell = "1.9.0" -regex = "1" sourcemap = "6" swc_atoms = {version = "0.2", path = "../swc_atoms"} swc_common = {version = "0.17.3", path = "../swc_common"}