diff --git a/parser.go b/parser.go index 4f8810e..bebf0af 100644 --- a/parser.go +++ b/parser.go @@ -13,6 +13,10 @@ import ( type parser struct { s string // the source text i int // the current position + + // if `false`, parsing a pseudo-element + // returns an error. + acceptPseudoElements bool } // parseEscape parses a backslash escape. @@ -422,17 +426,25 @@ var errExpectedParenthesis = errors.New("expected '(' but didn't find it") var errExpectedClosingParenthesis = errors.New("expected ')' but didn't find it") var errUnmatchedParenthesis = errors.New("unmatched '('") -// parsePseudoclassSelector parses a pseudoclass selector like :not(p) -func (p *parser) parsePseudoclassSelector() (out Sel, err error) { +// parsePseudoclassSelector parses a pseudoclass selector like :not(p) or a pseudo-element +// For backwards compatibility, both ':' and '::' prefix are allowed for pseudo-elements. +// https://drafts.csswg.org/selectors-3/#pseudo-elements +// Returning a nil `Sel` (and a nil `error`) means we found a pseudo-element. +func (p *parser) parsePseudoclassSelector() (out Sel, pseudoElement string, err error) { if p.i >= len(p.s) { - return nil, fmt.Errorf("expected pseudoclass selector (:pseudoclass), found EOF instead") + return nil, "", fmt.Errorf("expected pseudoclass selector (:pseudoclass), found EOF instead") } if p.s[p.i] != ':' { - return nil, fmt.Errorf("expected attribute selector (:pseudoclass), found '%c' instead", p.s[p.i]) + return nil, "", fmt.Errorf("expected attribute selector (:pseudoclass), found '%c' instead", p.s[p.i]) } p.i++ + var mustBePseudoElement bool + if p.i >= len(p.s) { + return nil, "", fmt.Errorf("got empty pseudoclass (or pseudoelement)") + } if p.s[p.i] == ':' { // we found a pseudo-element + mustBePseudoElement = true p.i++ } @@ -441,27 +453,33 @@ func (p *parser) parsePseudoclassSelector() (out Sel, err error) { return } name = toLowerASCII(name) + if mustBePseudoElement && (name != "after" && name != "backdrop" && name != "before" && + name != "cue" && name != "first-letter" && name != "first-line" && name != "grammar-error" && + name != "marker" && name != "placeholder" && name != "selection" && name != "spelling-error") { + return out, "", fmt.Errorf("unknown pseudoelement :%s", name) + } + switch name { case "not", "has", "haschild": if !p.consumeParenthesis() { - return out, errExpectedParenthesis + return out, "", errExpectedParenthesis } sel, parseErr := p.parseSelectorGroup() if parseErr != nil { - return out, parseErr + return out, "", parseErr } if !p.consumeClosingParenthesis() { - return out, errExpectedClosingParenthesis + return out, "", errExpectedClosingParenthesis } out = relativePseudoClassSelector{name: name, match: sel} case "contains", "containsown": if !p.consumeParenthesis() { - return out, errExpectedParenthesis + return out, "", errExpectedParenthesis } if p.i == len(p.s) { - return out, errUnmatchedParenthesis + return out, "", errUnmatchedParenthesis } var val string switch p.s[p.i] { @@ -471,46 +489,46 @@ func (p *parser) parsePseudoclassSelector() (out Sel, err error) { val, err = p.parseIdentifier() } if err != nil { - return out, err + return out, "", err } val = strings.ToLower(val) p.skipWhitespace() if p.i >= len(p.s) { - return out, errors.New("unexpected EOF in pseudo selector") + return out, "", errors.New("unexpected EOF in pseudo selector") } if !p.consumeClosingParenthesis() { - return out, errExpectedClosingParenthesis + return out, "", errExpectedClosingParenthesis } out = containsPseudoClassSelector{own: name == "containsown", value: val} case "matches", "matchesown": if !p.consumeParenthesis() { - return out, errExpectedParenthesis + return out, "", errExpectedParenthesis } rx, err := p.parseRegex() if err != nil { - return out, err + return out, "", err } if p.i >= len(p.s) { - return out, errors.New("unexpected EOF in pseudo selector") + return out, "", errors.New("unexpected EOF in pseudo selector") } if !p.consumeClosingParenthesis() { - return out, errExpectedClosingParenthesis + return out, "", errExpectedClosingParenthesis } out = regexpPseudoClassSelector{own: name == "matchesown", regexp: rx} case "nth-child", "nth-last-child", "nth-of-type", "nth-last-of-type": if !p.consumeParenthesis() { - return out, errExpectedParenthesis + return out, "", errExpectedParenthesis } a, b, err := p.parseNth() if err != nil { - return out, err + return out, "", err } if !p.consumeClosingParenthesis() { - return out, errExpectedClosingParenthesis + return out, "", errExpectedClosingParenthesis } last := name == "nth-last-child" || name == "nth-last-of-type" ofType := name == "nth-of-type" || name == "nth-last-of-type" @@ -535,9 +553,9 @@ func (p *parser) parsePseudoclassSelector() (out Sel, err error) { case "root": out = rootPseudoClassSelector{} case "after", "backdrop", "before", "cue", "first-letter", "first-line", "grammar-error", "marker", "placeholder", "selection", "spelling-error": - return out, errors.New("pseudo-elements are not yet supported") + return nil, name, nil default: - return out, fmt.Errorf("unknown pseudoclass or pseudoelement :%s", name) + return out, "", fmt.Errorf("unknown pseudoclass or pseudoelement :%s", name) } return } @@ -706,11 +724,13 @@ func (p *parser) parseSimpleSelectorSequence() (Sel, error) { selectors = append(selectors, r) } + var pseudoElement string loop: for p.i < len(p.s) { var ( - ns Sel - err error + ns Sel + newPseudoElement string + err error ) switch p.s[p.i] { case '#': @@ -720,20 +740,37 @@ loop: case '[': ns, err = p.parseAttributeSelector() case ':': - ns, err = p.parsePseudoclassSelector() + ns, newPseudoElement, err = p.parsePseudoclassSelector() default: break loop } if err != nil { return nil, err } + // From https://drafts.csswg.org/selectors-3/#pseudo-elements : + // "Only one pseudo-element may appear per selector, and if present + // it must appear after the sequence of simple selectors that + // represents the subjects of the selector."" + if ns == nil { // we found a pseudo-element + if pseudoElement != "" { + return nil, fmt.Errorf("only one pseudo-element is accepted per selector, got %s and %s", pseudoElement, newPseudoElement) + } + if !p.acceptPseudoElements { + return nil, fmt.Errorf("pseudo-element %s found, but pseudo-elements support is disabled", newPseudoElement) + } + pseudoElement = newPseudoElement + } else { + if pseudoElement != "" { + return nil, fmt.Errorf("pseudo-element %s must be at the end of selector", pseudoElement) + } + selectors = append(selectors, ns) + } - selectors = append(selectors, ns) } - if len(selectors) == 1 { // no need wrap the selectors in compoundSelector + if len(selectors) == 1 && pseudoElement == "" { // no need wrap the selectors in compoundSelector return selectors[0], nil } - return compoundSelector{selectors: selectors}, nil + return compoundSelector{selectors: selectors, pseudoElement: pseudoElement}, nil } // parseSelector parses a selector that may include combinators. diff --git a/selector.go b/selector.go index 18ce116..d7631e0 100644 --- a/selector.go +++ b/selector.go @@ -16,14 +16,16 @@ type Matcher interface { } // Sel is the interface for all the functionality provided by selectors. -// It is currently the same as Matcher, but other methods may be added in the -// future. type Sel interface { Matcher Specificity() Specificity + + // Return a pseudo-element, or an empty string. + PseudoElement() string } -// Parse parses a selector. +// Parse parses a selector. Use `ParseWithPseudoElement` +// if you need support for pseudo-elements. func Parse(sel string) (Sel, error) { p := &parser{s: sel} compiled, err := p.parseSelector() @@ -38,7 +40,25 @@ func Parse(sel string) (Sel, error) { return compiled, nil } +// ParseWithPseudoElement parses a single selector, +// with support for pseudo-element. +func ParseWithPseudoElement(sel string) (Sel, error) { + p := &parser{s: sel, acceptPseudoElements: true} + compiled, err := p.parseSelector() + if err != nil { + return nil, err + } + + if p.i < len(sel) { + return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i) + } + + return compiled, nil +} + // ParseGroup parses a selector, or a group of selectors separated by commas. +// Use `ParseGroupWithPseudoElements` +// if you need support for pseudo-elements. func ParseGroup(sel string) (SelectorGroup, error) { p := &parser{s: sel} compiled, err := p.parseSelectorGroup() @@ -53,6 +73,22 @@ func ParseGroup(sel string) (SelectorGroup, error) { return compiled, nil } +// ParseGroupWithPseudoElements parses a selector, or a group of selectors separated by commas. +// It supports pseudo-elements. +func ParseGroupWithPseudoElements(sel string) (SelectorGroup, error) { + p := &parser{s: sel, acceptPseudoElements: true} + compiled, err := p.parseSelectorGroup() + if err != nil { + return nil, err + } + + if p.i < len(sel) { + return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i) + } + + return compiled, nil +} + // A Selector is a function which tells whether a node matches or not. // // This type is maintained for compatibility; I recommend using the newer and @@ -182,6 +218,10 @@ func (c tagSelector) Specificity() Specificity { return Specificity{0, 0, 1} } +func (c tagSelector) PseudoElement() string { + return "" +} + type classSelector struct { class string } @@ -197,6 +237,10 @@ func (c classSelector) Specificity() Specificity { return Specificity{0, 1, 0} } +func (c classSelector) PseudoElement() string { + return "" +} + type idSelector struct { id string } @@ -212,6 +256,10 @@ func (c idSelector) Specificity() Specificity { return Specificity{1, 0, 0} } +func (c idSelector) PseudoElement() string { + return "" +} + type attrSelector struct { key, val, operation string regexp *regexp.Regexp @@ -352,6 +400,10 @@ func (c attrSelector) Specificity() Specificity { return Specificity{0, 1, 0} } +func (c attrSelector) PseudoElement() string { + return "" +} + // ---------------- Pseudo class selectors ---------------- // we use severals concrete types of pseudo-class selectors @@ -415,6 +467,10 @@ func (s relativePseudoClassSelector) Specificity() Specificity { return max } +func (c relativePseudoClassSelector) PseudoElement() string { + return "" +} + type containsPseudoClassSelector struct { own bool value string @@ -436,6 +492,10 @@ func (s containsPseudoClassSelector) Specificity() Specificity { return Specificity{0, 1, 0} } +func (c containsPseudoClassSelector) PseudoElement() string { + return "" +} + type regexpPseudoClassSelector struct { own bool regexp *regexp.Regexp @@ -488,6 +548,10 @@ func (s regexpPseudoClassSelector) Specificity() Specificity { return Specificity{0, 1, 0} } +func (c regexpPseudoClassSelector) PseudoElement() string { + return "" +} + type nthPseudoClassSelector struct { a, b int last, ofType bool @@ -623,6 +687,10 @@ func (s nthPseudoClassSelector) Specificity() Specificity { return Specificity{0, 1, 0} } +func (c nthPseudoClassSelector) PseudoElement() string { + return "" +} + type onlyChildPseudoClassSelector struct { ofType bool } @@ -661,6 +729,10 @@ func (s onlyChildPseudoClassSelector) Specificity() Specificity { return Specificity{0, 1, 0} } +func (c onlyChildPseudoClassSelector) PseudoElement() string { + return "" +} + type inputPseudoClassSelector struct{} // Matches input, select, textarea and button elements. @@ -672,6 +744,10 @@ func (s inputPseudoClassSelector) Specificity() Specificity { return Specificity{0, 1, 0} } +func (c inputPseudoClassSelector) PseudoElement() string { + return "" +} + type emptyElementPseudoClassSelector struct{} // Matches empty elements. @@ -694,6 +770,10 @@ func (s emptyElementPseudoClassSelector) Specificity() Specificity { return Specificity{0, 1, 0} } +func (c emptyElementPseudoClassSelector) PseudoElement() string { + return "" +} + type rootPseudoClassSelector struct{} // Match implements :root @@ -711,8 +791,13 @@ func (s rootPseudoClassSelector) Specificity() Specificity { return Specificity{0, 1, 0} } +func (c rootPseudoClassSelector) PseudoElement() string { + return "" +} + type compoundSelector struct { - selectors []Sel + selectors []Sel + pseudoElement string } // Matches elements if each sub-selectors matches. @@ -734,9 +819,17 @@ func (s compoundSelector) Specificity() Specificity { for _, sel := range s.selectors { out = out.Add(sel.Specificity()) } + if s.pseudoElement != "" { + // https://drafts.csswg.org/selectors-3/#specificity + out = out.Add(Specificity{0, 0, 1}) + } return out } +func (c compoundSelector) PseudoElement() string { + return c.pseudoElement +} + type combinedSelector struct { first Sel combinator byte @@ -818,6 +911,15 @@ func (s combinedSelector) Specificity() Specificity { return spec } +// on combinedSelector, a pseudo-element only makes sens on the last +// selector, although others increase specificity. +func (c combinedSelector) PseudoElement() string { + if c.second == nil { + return "" + } + return c.second.PseudoElement() +} + // A SelectorGroup is a list of selectors, which matches if any of the // individual selectors matches. type SelectorGroup []Sel diff --git a/selector_test.go b/selector_test.go index 82598c4..4a6c974 100644 --- a/selector_test.go +++ b/selector_test.go @@ -614,6 +614,14 @@ var selectorTests = []selectorTest{ ``, }, }, + { + `

`, + "body > *:nth-child(3n+2)", + []string{ + "
", + "
", + }, + }, } func setup(selector, testHTML string) (Selector, *html.Node, error) { @@ -634,6 +642,7 @@ func TestSelectors(t *testing.T) { s, doc, err := setup(test.selector, test.HTML) if err != nil { t.Error(err) + continue } matches := s.MatchAll(doc) @@ -710,3 +719,73 @@ func TestMatchers(t *testing.T) { } } } + +type testPseudo struct { + HTML, selector string + spec Specificity + pseudo string +} + +var testsPseudo = []testPseudo{ + { + HTML: ``, + selector: "#s12:not(FOO)::before", + spec: Specificity{1, 0, 2}, + pseudo: "before", + }, + { + HTML: ``, + selector: "#s12::first-line", + spec: Specificity{1, 0, 1}, + pseudo: "first-line", + }, + { + HTML: ``, + selector: "ol > #s12:first-line", + spec: Specificity{1, 0, 2}, + pseudo: "first-line", + }, + { + HTML: ``, + selector: "#s12:not(FOO)::after", + spec: Specificity{1, 0, 2}, + pseudo: "after", + }, + { + HTML: ``, + selector: "LI.red.level:before", + spec: Specificity{0, 2, 2}, + pseudo: "before", + }, +} + +func TestPseudoElement(t *testing.T) { + for _, test := range testsPseudo { + s, err := ParseWithPseudoElement(test.selector) + if err != nil { + t.Fatalf("error compiling %q: %s", test.selector, err) + } + + if _, err = Parse(test.selector); err == nil { + t.Fatalf("selector %s with pseudo-element should not compile", test.selector) + } + + doc, err := html.Parse(strings.NewReader(test.HTML)) + if err != nil { + t.Fatalf("error parsing %q: %s", test.HTML, err) + } + + body := doc.FirstChild.LastChild + testNode := body.FirstChild.FirstChild.LastChild + if !s.Match(testNode) { + t.Errorf("%s didn't match (html tree : \n %s) \n", test.selector, nodeString(doc)) + continue + } + if s.Specificity() != test.spec { + t.Errorf("wrong specificity : expected %v got %v", test.spec, s.Specificity()) + } + if s.PseudoElement() != test.pseudo { + t.Errorf("wrong pseudo-element : expected %s got %s", test.pseudo, s.PseudoElement()) + } + } +} diff --git a/specificity_test.go b/specificity_test.go index b9d47ed..338e794 100644 --- a/specificity_test.go +++ b/specificity_test.go @@ -1,6 +1,7 @@ package cascadia import ( + "fmt" "strings" "testing" @@ -82,16 +83,24 @@ var testsSpecificity = []testSpec{ }, } +func setupSel(selector, HTML string) (Sel, *html.Node, error) { + s, err := Parse(selector) + if err != nil { + return nil, nil, fmt.Errorf("error compiling %q: %s", selector, err) + } + + doc, err := html.Parse(strings.NewReader(HTML)) + if err != nil { + return nil, nil, fmt.Errorf("error parsing %q: %s", HTML, err) + } + return s, doc, nil +} + func TestSpecificity(t *testing.T) { for _, test := range testsSpecificity { - s, err := Parse(test.selector) - if err != nil { - t.Fatalf("error compiling %q: %s", test.selector, err) - } - - doc, err := html.Parse(strings.NewReader(test.HTML)) + s, doc, err := setupSel(test.selector, test.HTML) if err != nil { - t.Fatalf("error parsing %q: %s", test.HTML, err) + t.Fatal(err) } body := doc.FirstChild.LastChild testNode := body.FirstChild.FirstChild.LastChild