Skip to content

Commit

Permalink
Add comments for html5ever/examples (#506)
Browse files Browse the repository at this point in the history
* Adding comments for arena

* Adding comments for noop-tokenize

* Adding comments for noop-tree-builder

* Adding comments for print-tree-actions

* Add comments for tokenize

* Removing redundant comments

---------

Co-authored-by: Or Gany <gangana3@gmail.com>
Co-authored-by: Martin Robinson <mrobinson@igalia.com>
  • Loading branch information
3 people committed Apr 3, 2024
1 parent 030bfeb commit 5cc0951
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 11 deletions.
35 changes: 26 additions & 9 deletions html5ever/examples/arena.rs
Expand Up @@ -19,36 +19,32 @@ use std::collections::HashSet;
use std::io::{self, Read};
use std::ptr;

fn main() {
let mut bytes = Vec::new();
io::stdin().read_to_end(&mut bytes).unwrap();
let arena = typed_arena::Arena::new();
html5ever_parse_slice_into_arena(&bytes, &arena);
}

/// By using our Sink type, the arena is filled with parsed HTML.
fn html5ever_parse_slice_into_arena<'a>(bytes: &[u8], arena: Arena<'a>) -> Ref<'a> {
let sink = Sink {
arena,
document: arena.alloc(Node::new(NodeData::Document)),
quirks_mode: QuirksMode::NoQuirks,
};

parse_document(sink, Default::default())
.from_utf8()
.one(bytes)
}

type Arena<'arena> = &'arena typed_arena::Arena<Node<'arena>>;

type Ref<'arena> = &'arena Node<'arena>;

type Link<'arena> = Cell<Option<Ref<'arena>>>;

/// Sink struct is responsible for handling how the data that comes out of the HTML parsing
/// unit (TreeBuilder in our case) is handled.
struct Sink<'arena> {
arena: Arena<'arena>,
document: Ref<'arena>,
quirks_mode: QuirksMode,
}

/// DOM node which contains links to other nodes in the tree.
pub struct Node<'arena> {
parent: Link<'arena>,
next_sibling: Link<'arena>,
Expand All @@ -58,6 +54,7 @@ pub struct Node<'arena> {
data: NodeData<'arena>,
}

/// HTML node data which can be an element, a comment, a string, a DOCTYPE, etc...
pub enum NodeData<'arena> {
Document,
Doctype {
Expand Down Expand Up @@ -178,6 +175,11 @@ impl<'arena> Sink<'arena> {
}
}

/// By implementing the TreeSink trait we determine how the data from the tree building step
/// is processed. In our case, our data is allocated in the arena and added to the Node data
/// structure.
///
/// For deeper understating of each function go to the TreeSink declaration.
impl<'arena> TreeSink for Sink<'arena> {
type Handle = Ref<'arena>;
type Output = Ref<'arena>;
Expand Down Expand Up @@ -333,3 +335,18 @@ impl<'arena> TreeSink for Sink<'arena> {
}
}
}

/// In this example an "arena" is created and filled with the DOM nodes.
/// "Arena" is a type of allocation in which a block of memory is allocated
/// and later filled with data, DOM nodes in this case. When the arena is deallocated
/// it is destroyed with all of its items.
///
/// Further info about arena: https://docs.rs/typed-arena/latest/typed_arena/
fn main() {
// Read HTML from the standard input
let mut bytes = Vec::new();
io::stdin().read_to_end(&mut bytes).unwrap();

let arena = typed_arena::Arena::new();
html5ever_parse_slice_into_arena(&bytes, &arena);
}
9 changes: 7 additions & 2 deletions html5ever/examples/noop-tokenize.rs
Expand Up @@ -16,22 +16,27 @@ use std::io;
use html5ever::tendril::*;
use html5ever::tokenizer::{BufferQueue, Token, TokenSink, TokenSinkResult, Tokenizer};


/// In our case, our sink only contains a tokens vector
struct Sink(Vec<Token>);

impl TokenSink for Sink {
type Handle = ();

/// Each processed token will be handled by this method
fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
// Don't use the token, but make sure we don't get
// optimized out entirely.
self.0.push(token);
TokenSinkResult::Continue
}
}

/// In this example we implement the TokenSink trait which lets us implement how each
/// parsed token is treated. In our example we take each token and insert it into a vector.
fn main() {
// Read HTML from standard input
let mut chunk = ByteTendril::new();
io::stdin().read_to_tendril(&mut chunk).unwrap();

let mut input = BufferQueue::default();
input.push_back(chunk.try_reinterpret().unwrap());

Expand Down
8 changes: 8 additions & 0 deletions html5ever/examples/noop-tree-builder.rs
Expand Up @@ -32,6 +32,10 @@ impl Sink {
}
}

/// By implementing the TreeSink trait we determine how the data from the tree building step
/// is processed. In this case the DOM elements are written into the "names" hashmap.
///
/// For deeper understating of each function go to the TreeSink declaration.
impl TreeSink for Sink {
type Handle = usize;
type Output = Self;
Expand Down Expand Up @@ -98,11 +102,15 @@ impl TreeSink for Sink {
fn mark_script_already_started(&mut self, _node: &usize) {}
}

/// In this example we implement the TreeSink trait which takes each parsed elements and insert
/// it to a hashmap, while each element is given a numeric id.
fn main() {
let sink = Sink {
next_id: 1,
names: HashMap::new(),
};

// Read HTML from the standard input and parse it
let stdin = io::stdin();
parse_document(sink, Default::default())
.from_utf8()
Expand Down
3 changes: 3 additions & 0 deletions html5ever/examples/print-tree-actions.rs
Expand Up @@ -158,6 +158,9 @@ impl TreeSink for Sink {
}
}

/// Same example as the "noop-tree-builder", but this time every function implemented in our
/// Sink object prints a log, so it's easier to get an understating of when each function is
/// called.
fn main() {
let sink = Sink {
next_id: 1,
Expand Down
6 changes: 6 additions & 0 deletions html5ever/examples/tokenize.rs
Expand Up @@ -81,10 +81,15 @@ impl TokenSink for TokenPrinter {
}
}

/// In this example we implement the TokenSink trait in such a way that each token is printed.
/// If a there's an error while processing a token it is printed as well.
fn main() {
let mut sink = TokenPrinter { in_char_run: false };

// Read HTML from standard input
let mut chunk = ByteTendril::new();
io::stdin().read_to_tendril(&mut chunk).unwrap();

let mut input = BufferQueue::default();
input.push_back(chunk.try_reinterpret().unwrap());

Expand All @@ -96,6 +101,7 @@ fn main() {
},
);
let _ = tok.feed(&mut input);

assert!(input.is_empty());
tok.end();
sink.is_char(false);
Expand Down

0 comments on commit 5cc0951

Please sign in to comment.