diff --git a/Cargo.toml b/Cargo.toml index 27c8c0fc..8f91409b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,11 +15,11 @@ repository = "https://github.com/causal-agent/scraper" readme = "README.md" [dependencies] -cssparser = "0.27" +cssparser = "0.28" ego-tree = "0.6.2" html5ever = "0.26" matches = "0.1.9" -selectors = "0.22.0" +selectors = "0.23.0" smallvec = "1.9.0" tendril = "0.4.3" indexmap = { version = "1.9.1", optional = true } diff --git a/src/element_ref/element.rs b/src/element_ref/element.rs index 20e4d0de..6998dc1f 100644 --- a/src/element_ref/element.rs +++ b/src/element_ref/element.rs @@ -1,10 +1,10 @@ -use html5ever::{LocalName, Namespace}; +use html5ever::Namespace; use selectors::attr::{AttrSelectorOperation, CaseSensitivity, NamespaceConstraint}; use selectors::matching; use selectors::{Element, OpaqueElement}; use super::ElementRef; -use crate::selector::{NonTSPseudoClass, PseudoElement, Simple}; +use crate::selector::{CssLocalName, CssString, NonTSPseudoClass, PseudoElement, Simple}; /// Note: will never match against non-tree-structure pseudo-classes. impl<'a> Element for ElementRef<'a> { @@ -30,7 +30,7 @@ impl<'a> Element for ElementRef<'a> { false } - fn is_part(&self, _name: &LocalName) -> bool { + fn is_part(&self, _name: &CssLocalName) -> bool { false } @@ -38,11 +38,7 @@ impl<'a> Element for ElementRef<'a> { self.value().name == other.value().name } - fn exported_part(&self, _: &LocalName) -> Option { - None - } - - fn imported_part(&self, _: &LocalName) -> Option { + fn imported_part(&self, _: &CssLocalName) -> Option { None } @@ -63,8 +59,8 @@ impl<'a> Element for ElementRef<'a> { self.value().name.ns == ns!(html) } - fn has_local_name(&self, name: &LocalName) -> bool { - &self.value().name.local == name + fn has_local_name(&self, name: &CssLocalName) -> bool { + self.value().name.local == name.0 } fn has_namespace(&self, namespace: &Namespace) -> bool { @@ -74,12 +70,12 @@ impl<'a> Element for ElementRef<'a> { fn attr_matches( &self, ns: &NamespaceConstraint<&Namespace>, - local_name: &LocalName, - operation: &AttrSelectorOperation<&String>, + local_name: &CssLocalName, + operation: &AttrSelectorOperation<&CssString>, ) -> bool { self.value().attrs.iter().any(|(key, value)| { !matches!(*ns, NamespaceConstraint::Specific(url) if *url != key.ns) - && *local_name == key.local + && local_name.0 == key.local && operation.eval_str(value) }) } @@ -109,15 +105,15 @@ impl<'a> Element for ElementRef<'a> { true } - fn has_id(&self, id: &LocalName, case_sensitivity: CaseSensitivity) -> bool { + fn has_id(&self, id: &CssLocalName, case_sensitivity: CaseSensitivity) -> bool { match self.value().id { - Some(ref val) => case_sensitivity.eq(id.as_bytes(), val.as_bytes()), + Some(ref val) => case_sensitivity.eq(id.0.as_bytes(), val.as_bytes()), None => false, } } - fn has_class(&self, name: &LocalName, case_sensitivity: CaseSensitivity) -> bool { - self.value().has_class(name, case_sensitivity) + fn has_class(&self, name: &CssLocalName, case_sensitivity: CaseSensitivity) -> bool { + self.value().has_class(&name.0, case_sensitivity) } fn is_empty(&self) -> bool { @@ -135,14 +131,12 @@ impl<'a> Element for ElementRef<'a> { #[cfg(test)] mod tests { use crate::html::Html; - use crate::selector::Selector; + use crate::selector::{CssLocalName, Selector}; use selectors::attr::CaseSensitivity; use selectors::Element; #[test] fn test_has_id() { - use html5ever::LocalName; - let html = ""; let fragment = Html::parse_fragment(html); let sel = Selector::parse("p").unwrap(); @@ -151,7 +145,7 @@ mod tests { assert_eq!( true, element.has_id( - &LocalName::from("link_id_456"), + &CssLocalName::from("link_id_456"), CaseSensitivity::CaseSensitive ) ); @@ -162,7 +156,7 @@ mod tests { assert_eq!( false, element.has_id( - &LocalName::from("any_link_id"), + &CssLocalName::from("any_link_id"), CaseSensitivity::CaseSensitive ) ); @@ -185,14 +179,16 @@ mod tests { #[test] fn test_has_class() { - use html5ever::LocalName; let html = "

hey there

"; let fragment = Html::parse_fragment(html); let sel = Selector::parse("p").unwrap(); let element = fragment.select(&sel).next().unwrap(); assert_eq!( true, - element.has_class(&LocalName::from("my_class"), CaseSensitivity::CaseSensitive) + element.has_class( + &CssLocalName::from("my_class"), + CaseSensitivity::CaseSensitive + ) ); let html = "

hey there

"; @@ -201,7 +197,10 @@ mod tests { let element = fragment.select(&sel).next().unwrap(); assert_eq!( false, - element.has_class(&LocalName::from("my_class"), CaseSensitivity::CaseSensitive) + element.has_class( + &CssLocalName::from("my_class"), + CaseSensitivity::CaseSensitive + ) ); } } diff --git a/src/selector.rs b/src/selector.rs index 2e1a14e7..a33c2a9c 100644 --- a/src/selector.rs +++ b/src/selector.rs @@ -7,7 +7,7 @@ use smallvec::SmallVec; use html5ever::{LocalName, Namespace}; use selectors::parser::SelectorParseErrorKind; -use selectors::{matching, parser, visitor}; +use selectors::{matching, parser}; use crate::error::SelectorErrorKind; use crate::ElementRef; @@ -67,15 +67,13 @@ impl<'i> parser::Parser<'i> for Parser { pub struct Simple; impl parser::SelectorImpl for Simple { - type AttrValue = String; - type Identifier = LocalName; - type ClassName = LocalName; - type PartName = LocalName; - type LocalName = LocalName; - type NamespacePrefix = LocalName; + type AttrValue = CssString; + type Identifier = CssLocalName; + type LocalName = CssLocalName; + type NamespacePrefix = CssLocalName; type NamespaceUrl = Namespace; type BorrowedNamespaceUrl = Namespace; - type BorrowedLocalName = LocalName; + type BorrowedLocalName = CssLocalName; type NonTSPseudoClass = NonTSPseudoClass; type PseudoElement = PseudoElement; @@ -84,6 +82,50 @@ impl parser::SelectorImpl for Simple { type ExtraMatchingData = String; } +/// Wraps [`String`] so that it can be used with [`selectors`] +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct CssString(pub String); + +impl<'a> From<&'a str> for CssString { + fn from(val: &'a str) -> Self { + Self(val.to_owned()) + } +} + +impl AsRef for CssString { + fn as_ref(&self) -> &str { + &self.0 + } +} + +impl cssparser::ToCss for CssString { + fn to_css(&self, dest: &mut W) -> fmt::Result + where + W: fmt::Write, + { + cssparser::serialize_string(&self.0, dest) + } +} + +/// Wraps [`LocalName`] so that it can be used with [`selectors`] +#[derive(Debug, Default, Clone, PartialEq, Eq)] +pub struct CssLocalName(pub LocalName); + +impl<'a> From<&'a str> for CssLocalName { + fn from(val: &'a str) -> Self { + Self(val.into()) + } +} + +impl cssparser::ToCss for CssLocalName { + fn to_css(&self, dest: &mut W) -> fmt::Result + where + W: fmt::Write, + { + dest.write_str(&self.0) + } +} + /// Non Tree-Structural Pseudo-Class. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum NonTSPseudoClass {} @@ -98,21 +140,6 @@ impl parser::NonTSPseudoClass for NonTSPseudoClass { fn is_user_action_state(&self) -> bool { false } - - fn has_zero_specificity(&self) -> bool { - false - } -} - -impl parser::Visit for NonTSPseudoClass { - type Impl = Simple; - - fn visit(&self, _visitor: &mut V) -> bool - where - V: visitor::SelectorVisitor, - { - true - } } impl cssparser::ToCss for NonTSPseudoClass {