diff --git a/src/element_ref/serializable.rs b/src/element_ref/serializable.rs index 1e12566c..98dda704 100644 --- a/src/element_ref/serializable.rs +++ b/src/element_ref/serializable.rs @@ -1,9 +1,8 @@ use std::io::Error; -use ego_tree::iter::Edge; use html5ever::serialize::{Serialize, Serializer, TraversalScope}; -use crate::{ElementRef, Node}; +use crate::ElementRef; impl<'a> Serialize for ElementRef<'a> { fn serialize( @@ -11,43 +10,6 @@ impl<'a> Serialize for ElementRef<'a> { serializer: &mut S, traversal_scope: TraversalScope, ) -> Result<(), Error> { - for edge in self.traverse() { - match edge { - Edge::Open(node) => { - if node == **self && traversal_scope == TraversalScope::ChildrenOnly(None) { - continue; - } - - match *node.value() { - Node::Doctype(ref doctype) => { - serializer.write_doctype(doctype.name())?; - } - Node::Comment(ref comment) => { - serializer.write_comment(comment)?; - } - Node::Text(ref text) => { - serializer.write_text(text)?; - } - Node::Element(ref elem) => { - let attrs = elem.attrs.iter().map(|(k, v)| (k, &v[..])); - serializer.start_elem(elem.name.clone(), attrs)?; - } - _ => (), - } - } - - Edge::Close(node) => { - if node == **self && traversal_scope == TraversalScope::ChildrenOnly(None) { - continue; - } - - if let Some(elem) = node.value().as_element() { - serializer.end_elem(elem.name.clone())?; - } - } - } - } - - Ok(()) + crate::node::serializable::serialize(**self, serializer, traversal_scope) } } diff --git a/src/html/mod.rs b/src/html/mod.rs index c5dd086a..125049c5 100644 --- a/src/html/mod.rs +++ b/src/html/mod.rs @@ -4,9 +4,10 @@ use std::borrow::Cow; use ego_tree::iter::Nodes; use ego_tree::Tree; -use html5ever::driver; +use html5ever::serialize::SerializeOpts; use html5ever::tree_builder::QuirksMode; use html5ever::QualName; +use html5ever::{driver, serialize}; use tendril::TendrilSink; use crate::selector::Selector; @@ -101,6 +102,18 @@ impl Html { .expect("html node missing"); ElementRef::wrap(root_node).unwrap() } + + /// Serialize entire document into HTML. + pub fn html(&self) -> String { + let opts = SerializeOpts { + scripting_enabled: false, // It's not clear what this does. + traversal_scope: html5ever::serialize::TraversalScope::IncludeNode, + create_missing_parent: false, + }; + let mut buf = Vec::new(); + serialize(&mut buf, self, opts).unwrap(); + String::from_utf8(buf).unwrap() + } } /// Iterator over elements matching a selector. @@ -125,6 +138,7 @@ impl<'a, 'b> Iterator for Select<'a, 'b> { } } +mod serializable; mod tree_sink; #[cfg(test)] diff --git a/src/html/serializable.rs b/src/html/serializable.rs new file mode 100644 index 00000000..a30a2ccd --- /dev/null +++ b/src/html/serializable.rs @@ -0,0 +1,27 @@ +use std::io::Error; + +use html5ever::serialize::{Serialize, Serializer, TraversalScope}; + +use crate::Html; + +impl Serialize for Html { + fn serialize( + &self, + serializer: &mut S, + traversal_scope: TraversalScope, + ) -> Result<(), Error> { + crate::node::serializable::serialize(self.tree.root(), serializer, traversal_scope) + } +} + +#[cfg(test)] +mod tests { + use crate::Html; + + #[test] + fn test_serialize() { + let src = r#"

Hello world!

"#; + let html = Html::parse_document(src); + assert_eq!(html.html(), src); + } +} diff --git a/src/node.rs b/src/node.rs index d536850a..78d5f940 100644 --- a/src/node.rs +++ b/src/node.rs @@ -360,3 +360,5 @@ impl Deref for ProcessingInstruction { self.data.deref() } } + +pub(crate) mod serializable; diff --git a/src/node/serializable.rs b/src/node/serializable.rs new file mode 100644 index 00000000..b5f8183a --- /dev/null +++ b/src/node/serializable.rs @@ -0,0 +1,52 @@ +use std::io::Error; + +use ego_tree::{iter::Edge, NodeRef}; +use html5ever::serialize::{Serializer, TraversalScope}; + +use crate::Node; + +/// Serialize an HTML node using html5ever serializer. +pub(crate) fn serialize( + self_node: NodeRef, + serializer: &mut S, + traversal_scope: TraversalScope, +) -> Result<(), Error> { + for edge in self_node.traverse() { + match edge { + Edge::Open(node) => { + if node == self_node && traversal_scope == TraversalScope::ChildrenOnly(None) { + continue; + } + + match *node.value() { + Node::Doctype(ref doctype) => { + serializer.write_doctype(doctype.name())?; + } + Node::Comment(ref comment) => { + serializer.write_comment(comment)?; + } + Node::Text(ref text) => { + serializer.write_text(text)?; + } + Node::Element(ref elem) => { + let attrs = elem.attrs.iter().map(|(k, v)| (k, &v[..])); + serializer.start_elem(elem.name.clone(), attrs)?; + } + _ => (), + } + } + + Edge::Close(node) => { + if node == self_node && traversal_scope == TraversalScope::ChildrenOnly(None) { + continue; + } + + if let Some(elem) = node.value().as_element() { + serializer.end_elem(elem.name.clone())?; + } + } + } + } + + Ok(()) +}