Skip to content

Commit

Permalink
Merge pull request #86 from TonalidadeHidrica/serialize-entire-document
Browse files Browse the repository at this point in the history
Implement serializer for `Html`
  • Loading branch information
cfvescovo committed Aug 30, 2022
2 parents 5578819 + dfb3930 commit 8a53f01
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 41 deletions.
42 changes: 2 additions & 40 deletions src/element_ref/serializable.rs
@@ -1,53 +1,15 @@
use std::io::Error;

use ego_tree::iter::Edge;
use html5ever::serialize::{Serialize, Serializer, TraversalScope};

use crate::{ElementRef, Node};
use crate::ElementRef;

impl<'a> Serialize for ElementRef<'a> {
fn serialize<S: Serializer>(
&self,
serializer: &mut S,
traversal_scope: TraversalScope,
) -> Result<(), Error> {
for edge in self.traverse() {
match edge {
Edge::Open(node) => {
if node == **self && traversal_scope == TraversalScope::ChildrenOnly(None) {
continue;
}

match *node.value() {
Node::Doctype(ref doctype) => {
serializer.write_doctype(doctype.name())?;
}
Node::Comment(ref comment) => {
serializer.write_comment(comment)?;
}
Node::Text(ref text) => {
serializer.write_text(text)?;
}
Node::Element(ref elem) => {
let attrs = elem.attrs.iter().map(|(k, v)| (k, &v[..]));
serializer.start_elem(elem.name.clone(), attrs)?;
}
_ => (),
}
}

Edge::Close(node) => {
if node == **self && traversal_scope == TraversalScope::ChildrenOnly(None) {
continue;
}

if let Some(elem) = node.value().as_element() {
serializer.end_elem(elem.name.clone())?;
}
}
}
}

Ok(())
crate::node::serializable::serialize(**self, serializer, traversal_scope)
}
}
16 changes: 15 additions & 1 deletion src/html/mod.rs
Expand Up @@ -4,9 +4,10 @@ use std::borrow::Cow;

use ego_tree::iter::Nodes;
use ego_tree::Tree;
use html5ever::driver;
use html5ever::serialize::SerializeOpts;
use html5ever::tree_builder::QuirksMode;
use html5ever::QualName;
use html5ever::{driver, serialize};
use tendril::TendrilSink;

use crate::selector::Selector;
Expand Down Expand Up @@ -101,6 +102,18 @@ impl Html {
.expect("html node missing");
ElementRef::wrap(root_node).unwrap()
}

/// Serialize entire document into HTML.
pub fn html(&self) -> String {
let opts = SerializeOpts {
scripting_enabled: false, // It's not clear what this does.
traversal_scope: html5ever::serialize::TraversalScope::IncludeNode,
create_missing_parent: false,
};
let mut buf = Vec::new();
serialize(&mut buf, self, opts).unwrap();
String::from_utf8(buf).unwrap()
}
}

/// Iterator over elements matching a selector.
Expand All @@ -125,6 +138,7 @@ impl<'a, 'b> Iterator for Select<'a, 'b> {
}
}

mod serializable;
mod tree_sink;

#[cfg(test)]
Expand Down
27 changes: 27 additions & 0 deletions src/html/serializable.rs
@@ -0,0 +1,27 @@
use std::io::Error;

use html5ever::serialize::{Serialize, Serializer, TraversalScope};

use crate::Html;

impl Serialize for Html {
fn serialize<S: Serializer>(
&self,
serializer: &mut S,
traversal_scope: TraversalScope,
) -> Result<(), Error> {
crate::node::serializable::serialize(self.tree.root(), serializer, traversal_scope)
}
}

#[cfg(test)]
mod tests {
use crate::Html;

#[test]
fn test_serialize() {
let src = r#"<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"></head><body><p>Hello world!</p></body></html>"#;
let html = Html::parse_document(src);
assert_eq!(html.html(), src);
}
}
2 changes: 2 additions & 0 deletions src/node.rs
Expand Up @@ -360,3 +360,5 @@ impl Deref for ProcessingInstruction {
self.data.deref()
}
}

pub(crate) mod serializable;
52 changes: 52 additions & 0 deletions src/node/serializable.rs
@@ -0,0 +1,52 @@
use std::io::Error;

use ego_tree::{iter::Edge, NodeRef};
use html5ever::serialize::{Serializer, TraversalScope};

use crate::Node;

/// Serialize an HTML node using html5ever serializer.
pub(crate) fn serialize<S: Serializer>(
self_node: NodeRef<Node>,
serializer: &mut S,
traversal_scope: TraversalScope,
) -> Result<(), Error> {
for edge in self_node.traverse() {
match edge {
Edge::Open(node) => {
if node == self_node && traversal_scope == TraversalScope::ChildrenOnly(None) {
continue;
}

match *node.value() {
Node::Doctype(ref doctype) => {
serializer.write_doctype(doctype.name())?;
}
Node::Comment(ref comment) => {
serializer.write_comment(comment)?;
}
Node::Text(ref text) => {
serializer.write_text(text)?;
}
Node::Element(ref elem) => {
let attrs = elem.attrs.iter().map(|(k, v)| (k, &v[..]));
serializer.start_elem(elem.name.clone(), attrs)?;
}
_ => (),
}
}

Edge::Close(node) => {
if node == self_node && traversal_scope == TraversalScope::ChildrenOnly(None) {
continue;
}

if let Some(elem) = node.value().as_element() {
serializer.end_elem(elem.name.clone())?;
}
}
}
}

Ok(())
}

0 comments on commit 8a53f01

Please sign in to comment.