swc-project · kdy1 · Nov 29, 2022 · Nov 18, 2022 · Nov 28, 2022 · Nov 28, 2022
@@ -29,6 +29,8 @@ pub enum Child {
     Element(Element),
     #[tag("Text")]
     Text(Text),
+    #[tag("CdataSection")]
+    CdataSection(CdataSection),
     #[tag("Comment")]
     Comment(Comment),
     #[tag("ProcessingInstruction")]
@@ -90,6 +92,14 @@ pub struct Text {
     pub raw: Option<JsWord>,
 }
 
+#[ast_node("CdataSection")]
+#[derive(Eq, Hash, EqIgnoreSpan)]
+pub struct CdataSection {
+    pub span: Span,
+    pub data: JsWord,
+    pub raw: Option<JsWord>,
+}
+
 #[ast_node("ProcessingInstruction")]
 #[derive(Eq, Hash, EqIgnoreSpan)]
 pub struct ProcessingInstruction {

@@ -60,5 +60,9 @@ pub enum Token {
         target: JsWord,
         data: JsWord,
     },
+    Cdata {
+        data: JsWord,
+        raw: JsWord,
+    },
     Eof,
 }
@@ -62,6 +62,7 @@ where
             Child::Text(n) => emit!(self, n),
             Child::Comment(n) => emit!(self, n),
             Child::ProcessingInstruction(n) => emit!(self, n),
+            Child::CdataSection(n) => emit!(self, n),
         }
     }
 
@@ -287,6 +288,17 @@ where
         write_multiline_raw!(self, n.span, &processing_instruction);
     }
 
+    #[emitter]
+    fn emit_cdata_section(&mut self, n: &CdataSection) -> Result {
+        let mut cdata_section = String::with_capacity(n.data.len() + 12);
+
+        cdata_section.push_str("<![CDATA[");
+        cdata_section.push_str(&n.data);
+        cdata_section.push_str("]]>");
+
+        write_multiline_raw!(self, n.span, &cdata_section);
+    }
+
     fn create_context_for_element(&self, n: &Element) -> Ctx {
         let need_escape_text = match &*n.tag_name {
             "noscript" => !self.config.scripting_enabled,

@@ -0,0 +1,25 @@
+<root>
+    <description>An example of escaped CENDs</description>
+    <!-- This text contains a CEND ]]> -->
+    <!-- In this first case we put the ]] at the end of the first CDATA block
+         and the > in the second CDATA block -->
+    <exampleOfACDATA>
+        <![CDATA[
+    Since this is a CDATA section
+    I can use all sorts of reserved characters
+    but my document is still well formed!
+    ]]>
+    </exampleOfACDATA>
+    <p><![CDATA[<greeting>Hello, world!</greeting>]]></p>
+    <p><![CDATA[content]]></p>
+    <p><![CDATA[&amping]]></p>
+    <p><![CDATA[&amping ]]]></p>
+    <p><![CDATA[&amping]] ]]></p>
+    <p><![CDATA[<message>text</message>]]></p>
+    <p><![CDATA[</this is malformed!</malformed</malformed & worse>]]></p>
+    <p><![CDATA[1]]><![CDATA[2]]></p>
+    <p>
+        <![CDATA[data]]>  </p>
+    <p><![CDATA[bracket ]after]]></p>
+    <p><![CDATA[]]></p>
+</root>
@@ -0,0 +1,25 @@
+<root>
+    <description>An example of escaped CENDs</description>
+    <!-- This text contains a CEND ]]> -->
+    <!-- In this first case we put the ]] at the end of the first CDATA block
+         and the > in the second CDATA block -->
+    <exampleOfACDATA>
+        <![CDATA[
+    Since this is a CDATA section
+    I can use all sorts of reserved characters
+    but my document is still well formed!
+    ]]>
+    </exampleOfACDATA>
+    <p><![CDATA[<greeting>Hello, world!</greeting>]]></p>
+    <p><![CDATA[content]]></p>
+    <p><![CDATA[&amping]]></p>
+    <p><![CDATA[&amping ]]]></p>
+    <p><![CDATA[&amping]] ]]></p>
+    <p><![CDATA[<message>text</message>]]></p>
+    <p><![CDATA[</this is malformed!</malformed</malformed & worse>]]></p>
+    <p><![CDATA[1]]><![CDATA[2]]></p>
+    <p>
+        <![CDATA[data]]>  </p>
+    <p><![CDATA[bracket ]after]]></p>
+    <p><![CDATA[]]></p>
+</root>
@@ -0,0 +1,25 @@
+<root>
+    <description>An example of escaped CENDs</description>
+    <!-- This text contains a CEND ]]> -->
+    <!-- In this first case we put the ]] at the end of the first CDATA block
+         and the > in the second CDATA block -->
+    <exampleOfACDATA>
+        <![CDATA[
+    Since this is a CDATA section
+    I can use all sorts of reserved characters
+    but my document is still well formed!
+    ]]>
+    </exampleOfACDATA>
+    <p><![CDATA[<greeting>Hello, world!</greeting>]]></p>
+    <p><![CDATA[content]]></p>
+    <p><![CDATA[&amping]]></p>
+    <p><![CDATA[&amping ]]]></p>
+    <p><![CDATA[&amping]] ]]></p>
+    <p><![CDATA[<message>text</message>]]></p>
+    <p><![CDATA[</this is malformed!</malformed</malformed & worse>]]></p>
+    <p><![CDATA[1]]><![CDATA[2]]></p>
+    <p>
+        <![CDATA[data]]>  </p>
+    <p><![CDATA[bracket ]after]]></p>
+    <p><![CDATA[]]></p>
+</root>
@@ -111,6 +111,12 @@ struct ProcessingInstruction {
     data: String,
 }
 
+#[derive(PartialEq, Eq, Clone, Debug)]
+struct Cdata {
+    data: String,
+    raw: String,
+}
+
 pub(crate) type LexResult<T> = Result<T, ErrorKind>;
 
 pub struct Lexer<I>
@@ -128,11 +134,11 @@ where
     additional_allowed_character: Option<char>,
     pending_tokens: VecDeque<TokenAndSpan>,
     doctype_raw: Option<String>,
-    cdata_raw: Option<String>,
     current_doctype_token: Option<Doctype>,
     current_comment_token: Option<Comment>,
     current_processing_instruction: Option<ProcessingInstruction>,
     current_tag_token: Option<Tag>,
+    current_cdata_token: Option<Cdata>,
     attribute_start_position: Option<BytePos>,
 }
 
@@ -155,11 +161,11 @@ where
             additional_allowed_character: None,
             pending_tokens: VecDeque::new(),
             doctype_raw: None,
-            cdata_raw: None,
             current_doctype_token: None,
             current_comment_token: None,
             current_processing_instruction: None,
             current_tag_token: None,
+            current_cdata_token: None,
             attribute_start_position: None,
         };
 
@@ -864,6 +870,34 @@ where
         });
     }
 
+    fn create_cdata_token(&mut self) {
+        let data = String::new();
+        let raw = String::with_capacity(12);
+
+        self.current_cdata_token = Some(Cdata { data, raw });
+    }
+
+    fn append_to_cdata_token(&mut self, c: Option<char>, raw_c: Option<char>) {
+        if let Some(Cdata { data, raw }) = &mut self.current_cdata_token {
+            if let Some(c) = c {
+                data.push(c);
+            }
+
+            if let Some(raw_c) = raw_c {
+                raw.push(raw_c);
+            }
+        }
+    }
+
+    fn emit_cdata_token(&mut self) {
+        let cdata = self.current_cdata_token.take().unwrap();
+
+        self.emit_token(Token::Cdata {
+            data: cdata.data.into(),
+            raw: cdata.raw.into(),
+        });
+    }
+
     fn handle_raw_and_emit_character_token(&mut self, c: char) {
         let is_cr = c == '\r';
 
@@ -1400,17 +1434,16 @@ where
                                     Some(t @ 'T') => match self.consume_next_char() {
                                         Some(a2 @ 'A') => match self.consume_next_char() {
                                             Some('[') => {
-                                                let mut data = String::with_capacity(7);
-
-                                                data.push('[');
-                                                data.push(c);
-                                                data.push(d);
-                                                data.push(a1);
-                                                data.push(t);
-                                                data.push(a2);
-                                                data.push('[');
-
-                                                self.cdata_raw = Some(data);
+                                                self.create_cdata_token();
+                                                self.append_to_cdata_token(None, Some('<'));
+                                                self.append_to_cdata_token(None, Some('!'));
+                                                self.append_to_cdata_token(None, Some('['));
+                                                self.append_to_cdata_token(None, Some(c));
+                                                self.append_to_cdata_token(None, Some(d));
+                                                self.append_to_cdata_token(None, Some(a1));
+                                                self.append_to_cdata_token(None, Some(t));
+                                                self.append_to_cdata_token(None, Some(a2));
+                                                self.append_to_cdata_token(None, Some('['));
                                                 self.state = State::Cdata;
                                             }
                                             _ => {
@@ -1734,11 +1767,11 @@ where
                         self.reconsume_in_state(State::Data);
                     }
                     // Anything else
-                    // Emit the current input character as character token. Stay in the current
+                    // Append the current input character to the cdata dta. Stay in the current
                     // state.
                     Some(c) => {
                         self.validate_input_stream_character(c);
-                        self.handle_raw_and_emit_character_token(c);
+                        self.append_to_cdata_token(Some(c), Some(c));
                     }
                 }
             }
@@ -1760,9 +1793,9 @@ where
                     // Emit a U+005D RIGHT SQUARE BRACKET character token. Reconsume in the
                     // CDATA section state.
                     Some(c) => {
-                        self.emit_character_token((']', ']'));
-                        self.emit_character_token((c, c));
-                        self.reconsume_in_state(State::Cdata);
+                        self.append_to_cdata_token(Some(']'), Some(']'));
+                        self.append_to_cdata_token(Some(c), Some(c));
+                        self.state = State::Cdata;
                     }
                 }
             }
@@ -1772,13 +1805,17 @@ where
                     // U+003E GREATER-THAN SIGN (>)
                     // Switch to the data state.
                     Some('>') => {
+                        self.append_to_cdata_token(None, Some(']'));
+                        self.append_to_cdata_token(None, Some(']'));
+                        self.append_to_cdata_token(None, Some('>'));
+                        self.emit_cdata_token();
                         self.state = State::Data;
                     }
                     // U+005D RIGHT SQUARE BRACKET (])
                     // Emit the current input character as character token. Stay in the current
                     // state.
                     Some(c @ ']') => {
-                        self.emit_character_token((c, c));
+                        self.append_to_cdata_token(Some(c), Some(c));
                     }
                     // EOF
                     // Parse error. Reconsume the current input character in the data state.
@@ -1791,9 +1828,9 @@ where
                     // also emit the current input character as character token. Switch to the CDATA
                     // state.
                     Some(c) => {
-                        self.emit_character_token((']', ']'));
-                        self.emit_character_token((']', ']'));
-                        self.emit_character_token((c, c));
+                        self.append_to_cdata_token(Some(']'), Some(']'));
+                        self.append_to_cdata_token(Some(']'), Some(']'));
+                        self.append_to_cdata_token(Some(c), Some(c));
                         self.state = State::Cdata;
                     }
                 }

@@ -188,6 +188,11 @@ where
                     data,
                 })
             }
+            Data::CdataSection { data, raw } => Child::CdataSection(CdataSection {
+                span: start_span,
+                data,
+                raw,
+            }),
             _ => {
                 unreachable!();
             }
@@ -267,6 +272,14 @@ where
                 Token::ProcessingInstruction { .. } => {
                     self.append_processing_instruction_to_doc(token_and_info)?;
                 }
+                Token::Cdata { .. } => {
+                    self.errors.push(Error::new(
+                        token_and_info.span,
+                        ErrorKind::UnexpectedTokenInStartPhase,
+                    ));
+
+                    self.append_cdata_to_doc(token_and_info)?;
+                }
                 Token::Character { value, .. } => {
                     if !is_whitespace(*value) {
                         self.errors.push(Error::new(
@@ -354,6 +367,11 @@ where
 
                     self.append_node(self.get_current_element(), processing_instruction);
                 }
+                Token::Cdata { .. } => {
+                    let cdata = self.create_cdata_section(token_and_info);
+
+                    self.append_node(self.get_current_element(), cdata);
+                }
                 Token::Eof => {
                     self.errors.push(Error::new(
                         token_and_info.span,
@@ -376,6 +394,14 @@ where
                 Token::ProcessingInstruction { .. } => {
                     self.append_processing_instruction_to_doc(token_and_info)?;
                 }
+                Token::Cdata { .. } => {
+                    self.errors.push(Error::new(
+                        token_and_info.span,
+                        ErrorKind::UnexpectedTokenInEndPhase,
+                    ));
+
+                    self.append_cdata_to_doc(token_and_info)?;
+                }
                 Token::Character { value, .. } => {
                     if !is_whitespace(*value) {
                         self.errors.push(Error::new(
@@ -603,6 +629,25 @@ where
         Ok(())
     }
 
+    fn create_cdata_section(&self, token_and_info: &mut TokenAndInfo) -> RcNode {
+        let (data, raw) = match &token_and_info.token {
+            Token::Cdata { data, raw } => (data.clone(), Some(raw.clone())),
+            _ => {
+                unreachable!()
+            }
+        };
+
+        Node::new(Data::CdataSection { data, raw }, token_and_info.span)
+    }
+
+    fn append_cdata_to_doc(&mut self, token_and_info: &mut TokenAndInfo) -> PResult<()> {
+        let child = self.create_cdata_section(token_and_info);
+
+        self.append_node(self.document.as_ref().unwrap(), child);
+
+        Ok(())
+    }
+
     fn update_end_tag_span(&self, node: Option<&RcNode>, span: Span) {
         if let Some(node) = node {
             if node.start_span.borrow().is_dummy() {

@@ -36,6 +36,10 @@ pub enum Data {
         target: JsWord,
         data: JsWord,
     },
+    CdataSection {
+        data: JsWord,
+        raw: Option<JsWord>,
+    },
     Comment {
         data: JsWord,
         raw: Option<JsWord>,