weppos · weppos · Aug 23, 2018 · Aug 14, 2018 · Aug 14, 2018 · Aug 14, 2018
diff --git a/cmd/gen/gen.go b/cmd/gen/gen.go
@@ -11,15 +11,15 @@ import (
 	"context"
 	"fmt"
 	"go/format"
+	"io/ioutil"
+	"log"
 	"net/http"
 	"strings"
 	"text/template"
 	"time"
 
 	"github.com/google/go-github/github"
 	"github.com/weppos/publicsuffix-go/publicsuffix"
-	"io/ioutil"
-	"log"
 )
 
 const (
@@ -39,7 +39,9 @@ func init() {
 		{ {{$r.Type}}, "{{$r.Value}}", {{$r.Length}}, {{$r.Private}} },
 		{{end}}
 	}
-	DefaultList.rules = r[:]
+	for i := range r {
+		DefaultList.AddRule(&r[i])
+	}
 }
 
 `

diff --git a/publicsuffix/publicsuffix.go b/publicsuffix/publicsuffix.go
@@ -11,7 +11,6 @@ import (
 	"io"
 	"net/http/cookiejar"
 	"os"
-	"regexp"
 	"strings"
 
 	"golang.org/x/net/idna"
@@ -80,13 +79,14 @@ type FindOptions struct {
 // List represents a Public Suffix List.
 type List struct {
 	// rules is kept private because you should not access rules directly
-	// for lookup optimization the list will not be guaranteed to be a simple slice forever
-	rules []Rule
+	rules map[string]*Rule
 }
 
 // NewList creates a new empty list.
 func NewList() *List {
-	return &List{}
+	return &List{
+		rules: map[string]*Rule{},
+	}
 }
 
 // NewListFromString parses a string that represents a Public Suffix source
@@ -132,7 +132,7 @@ func (l *List) LoadFile(path string, options *ParserOption) ([]Rule, error) {
 // The list may be optimized internally for lookups, therefore the algorithm
 // will decide the best position for the new rule.
 func (l *List) AddRule(r *Rule) error {
-	l.rules = append(l.rules, *r)
+	l.rules[r.Value] = r
 	return nil
 }
 
@@ -195,43 +195,23 @@ Scanning:
 
 // Find and returns the most appropriate rule for the domain name.
 func (l *List) Find(name string, options *FindOptions) *Rule {
-	var bestRule *Rule
-
 	if options == nil {
 		options = DefaultFindOptions
 	}
 
-	for _, r := range l.selectRules(name, options) {
-		if r.Type == ExceptionType {
-			return &r
-		}
-		if bestRule == nil || bestRule.Length < r.Length {
-			bestRule = &r
-		}
-	}
-
-	if bestRule != nil {
-		return bestRule
-	}
-
-	return options.DefaultRule
-}
-
-func (l *List) selectRules(name string, options *FindOptions) []Rule {
-	var found []Rule
-
-	// In this phase the search is a simple sequential scan
-	for _, rule := range l.rules {
-		if !rule.Match(name) {
-			continue
+	for {
+		rule, ok := l.rules[name]
+		if ok && (!options.IgnorePrivate || !rule.Private) {
+			return rule
 		}
-		if options.IgnorePrivate && rule.Private {
-			continue
+		i := strings.IndexRune(name, '.')
+		if i < 0 {
+			return options.DefaultRule
 		}
-		found = append(found, rule)
+		name = name[i+1:]
 	}
 
-	return found
+	return nil
 }
 
 // NewRule parses the rule content, creates and returns a Rule.
@@ -309,36 +289,46 @@ func (r *Rule) Match(name string) bool {
 
 // Decompose takes a name as input and decomposes it into a tuple of <TRD+SLD, TLD>,
 // according to the rule definition and type.
-func (r *Rule) Decompose(name string) [2]string {
-	var parts []string
-
+func (r *Rule) Decompose(name string) (result [2]string) {
+	if r == DefaultRule {
+		i := strings.LastIndex(name, ".")
+		if i < 0 {
+			return
+		}
+		result[0], result[1] = name[:i], name[i+1:]
+		return
+	}
 	switch r.Type {
+	case NormalType:
+		name = strings.TrimSuffix(name, r.Value)
+		if len(name) == 0 {
+			return
+		}
+		result[0], result[1] = name[:len(name)-1], r.Value
 	case WildcardType:
-		parts = append([]string{`.*?`}, r.parts()...)
-	default:
-		parts = r.parts()
-	}
-
-	suffix := strings.Join(parts, `\.`)
-	re := regexp.MustCompile(fmt.Sprintf(`^(.+)\.(%s)$`, suffix))
-
-	matches := re.FindStringSubmatch(name)
-	if len(matches) < 3 {
-		return [2]string{"", ""}
-	}
-
-	return [2]string{matches[1], matches[2]}
-}
-
-func (r *Rule) parts() []string {
-	labels := Labels(r.Value)
-	if r.Type == ExceptionType {
-		return labels[1:]
-	}
-	if r.Type == WildcardType && r.Value == "" {
-		return []string{}
+		name := strings.TrimSuffix(name, r.Value)
+		if len(name) == 0 {
+			return
+		}
+		name = name[:len(name)-1]
+		i := strings.LastIndex(name, ".")
+		if i < 0 {
+			return
+		}
+		result[0], result[1] = name[:i], name[i+1:]+"."+r.Value
+	case ExceptionType:
+		i := strings.IndexRune(r.Value, '.')
+		if i < 0 {
+			return
+		}
+		suffix := r.Value[i+1:]
+		name = strings.TrimSuffix(name, suffix)
+		if len(name) == 0 {
+			return
+		}
+		result[0], result[1] = name[:len(name)-1], suffix
 	}
-	return labels
+	return
 }
 
 // Labels decomposes given domain name into labels,
@@ -432,7 +422,6 @@ func DomainFromListWithOptions(l *List, name string, options *FindOptions) (stri
 	if err != nil {
 		return "", err
 	}
-
 	return dn.SLD + "." + dn.TLD, nil
 }
 
@@ -458,44 +447,38 @@ func ParseFromListWithOptions(l *List, name string, options *FindOptions) (*Doma
 	}
 
 	r := l.Find(n, options)
-	if tld := r.Decompose(n)[1]; tld == "" {
+	parts := r.Decompose(n)
+	left, tld := parts[0], parts[1]
+	if tld == "" {
 		return nil, fmt.Errorf("%s is a suffix", n)
 	}
 
-	dn := &DomainName{Rule: r}
-	dn.TLD, dn.SLD, dn.TRD = decompose(r, n)
+	dn := &DomainName{
+		Rule: r,
+		TLD:  tld,
+	}
+	if i := strings.LastIndex(left, "."); i < 0 {
+		dn.SLD = left
+	} else {
+		dn.TRD = left[:i]
+		dn.SLD = left[i+1:]
+	}
 	return dn, nil
 }
 
 func normalize(name string) (string, error) {
 	ret := strings.ToLower(name)
 
 	if ret == "" {
-		return "", fmt.Errorf("Name is blank")
+		return "", fmt.Errorf("name is blank")
 	}
 	if ret[0] == '.' {
-		return "", fmt.Errorf("Name %s starts with a dot", ret)
+		return "", fmt.Errorf("name %s starts with a dot", ret)
 	}
 
 	return ret, nil
 }
 
-func decompose(r *Rule, name string) (tld, sld, trd string) {
-	parts := r.Decompose(name)
-	left, tld := parts[0], parts[1]
-
-	dot := strings.LastIndex(left, ".")
-	if dot == -1 {
-		sld = left
-		trd = ""
-	} else {
-		sld = left[dot+1:]
-		trd = left[0:dot]
-	}
-
-	return
-}
-
 // ToASCII is a wrapper for idna.ToASCII.
 //
 // This wrapper exists because idna.ToASCII backward-compatibility was broken twice in few months

diff --git a/publicsuffix/publicsuffix_test.go b/publicsuffix/publicsuffix_test.go
@@ -3,6 +3,8 @@ package publicsuffix
 import (
 	"reflect"
 	"testing"
+
+	xlib "golang.org/x/net/publicsuffix"
 )
 
 func TestNewListFromString(t *testing.T) {
@@ -42,7 +44,7 @@ blogspot.com
 	testRules = []Rule{}
 	for _, rule := range rules {
 		if rule.Private == false {
-			testRules = append(testRules, rule)
+			testRules = append(testRules, *rule)
 		}
 	}
 	if want, got := 2, len(testRules); want != got {
@@ -53,7 +55,7 @@ blogspot.com
 	testRules = []Rule{}
 	for _, rule := range rules {
 		if rule.Private == true {
-			testRules = append(testRules, rule)
+			testRules = append(testRules, *rule)
 		}
 	}
 	if want, got := 1, len(testRules); want != got {
@@ -141,7 +143,7 @@ func TestNewListFromFile(t *testing.T) {
 	testRules = []Rule{}
 	for _, rule := range rules {
 		if rule.Private == false {
-			testRules = append(testRules, rule)
+			testRules = append(testRules, *rule)
 		}
 	}
 	if want, got := 2, len(testRules); want != got {
@@ -152,7 +154,7 @@ func TestNewListFromFile(t *testing.T) {
 	testRules = []Rule{}
 	for _, rule := range rules {
 		if rule.Private == true {
-			testRules = append(testRules, rule)
+			testRules = append(testRules, *rule)
 		}
 	}
 	if want, got := 1, len(testRules); want != got {
@@ -173,8 +175,10 @@ func TestListAddRule(t *testing.T) {
 	if list.Size() != 1 {
 		t.Fatalf("List should have 1 rule, got %v", list.Size())
 	}
-	if got := &list.rules[0]; !reflect.DeepEqual(rule, got) {
-		t.Fatalf("List[0] expected to be %v, got %v", rule, got)
+	for _, got := range list.rules {
+		if !reflect.DeepEqual(rule, got) {
+			t.Fatalf("List[0] expected to be %v, got %v", rule, got)
+		}
 	}
 }
 
@@ -439,3 +443,36 @@ func TestCookieJarList(t *testing.T) {
 		}
 	}
 }
+
+var benchmarkTestCases = map[string]string{
+	"example.com":                         "example.com",
+	"example.id.au":                       "example.id.au",
+	"www.ck":                              "www.ck",
+	"foo.bar.xn--55qx5d.cn":               "bar.xn--55qx5d.cn",
+	"a.b.c.minami.fukuoka.jp":             "c.minami.fukuoka.jp",
+	"posts-and-telecommunications.museum": "",
+	"www.example.pvt.k12.ma.us":           "example.pvt.k12.ma.us",
+	"many.lol":                            "many.lol",
+	"the.russian.for.moscow.is.xn--80adxhks": "is.xn--80adxhks",
+	"blah.blah.s3-us-west-1.amazonaws.com":   "blah.s3-us-west-1.amazonaws.com",
+	"thing.dyndns.org":                       "thing.dyndns.org",
+	"nosuchtld":                              "",
+}
+
+func benchmarkDomain(b *testing.B, domainFunc func(string) (string, error)) {
+	var got string
+	for i := 0; i < b.N; i++ {
+		for input := range benchmarkTestCases {
+			got, _ = domainFunc(input)
+		}
+	}
+	_ = got
+}
+
+func BenchmarkDomain(b *testing.B) {
+	benchmarkDomain(b, Domain)
+}
+
+func BenchmarkXNet(b *testing.B) {
+	benchmarkDomain(b, xlib.EffectiveTLDPlusOne)
+}
diff --git a/publicsuffix/rules.go b/publicsuffix/rules.go
@@ -8626,5 +8626,7 @@ func init() {
 		{1, "now.sh", 2, true},
 		{1, "zone.id", 2, true},
 	}
-	DefaultList.rules = r[:]
+	for i := range r {
+		DefaultList.AddRule(&r[i])
+	}
 }