Skip to content

Commit

Permalink
Hash based lookup implementation (GH-143)
Browse files Browse the repository at this point in the history
Added benchmarks, moved to a hash based implementation and improved performance to match that of golang.org/x/net library.

See GH-133
  • Loading branch information
guliyevemil1 authored and weppos committed Aug 23, 2018
1 parent 4df5def commit b6e96f6
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 93 deletions.
8 changes: 5 additions & 3 deletions cmd/gen/gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@ import (
"context"
"fmt"
"go/format"
"io/ioutil"
"log"
"net/http"
"strings"
"text/template"
"time"

"github.com/google/go-github/github"
"github.com/weppos/publicsuffix-go/publicsuffix"
"io/ioutil"
"log"
)

const (
Expand All @@ -39,7 +39,9 @@ func init() {
{ {{$r.Type}}, "{{$r.Value}}", {{$r.Length}}, {{$r.Private}} },
{{end}}
}
DefaultList.rules = r[:]
for i := range r {
DefaultList.AddRule(&r[i])
}
}
`
Expand Down
149 changes: 66 additions & 83 deletions publicsuffix/publicsuffix.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import (
"io"
"net/http/cookiejar"
"os"
"regexp"
"strings"

"golang.org/x/net/idna"
Expand Down Expand Up @@ -80,13 +79,14 @@ type FindOptions struct {
// List represents a Public Suffix List.
type List struct {
// rules is kept private because you should not access rules directly
// for lookup optimization the list will not be guaranteed to be a simple slice forever
rules []Rule
rules map[string]*Rule
}

// NewList creates a new empty list.
func NewList() *List {
return &List{}
return &List{
rules: map[string]*Rule{},
}
}

// NewListFromString parses a string that represents a Public Suffix source
Expand Down Expand Up @@ -132,7 +132,7 @@ func (l *List) LoadFile(path string, options *ParserOption) ([]Rule, error) {
// The list may be optimized internally for lookups, therefore the algorithm
// will decide the best position for the new rule.
func (l *List) AddRule(r *Rule) error {
l.rules = append(l.rules, *r)
l.rules[r.Value] = r
return nil
}

Expand Down Expand Up @@ -195,43 +195,23 @@ Scanning:

// Find and returns the most appropriate rule for the domain name.
func (l *List) Find(name string, options *FindOptions) *Rule {
var bestRule *Rule

if options == nil {
options = DefaultFindOptions
}

for _, r := range l.selectRules(name, options) {
if r.Type == ExceptionType {
return &r
}
if bestRule == nil || bestRule.Length < r.Length {
bestRule = &r
}
}

if bestRule != nil {
return bestRule
}

return options.DefaultRule
}

func (l *List) selectRules(name string, options *FindOptions) []Rule {
var found []Rule

// In this phase the search is a simple sequential scan
for _, rule := range l.rules {
if !rule.Match(name) {
continue
for {
rule, ok := l.rules[name]
if ok && (!options.IgnorePrivate || !rule.Private) {
return rule
}
if options.IgnorePrivate && rule.Private {
continue
i := strings.IndexRune(name, '.')
if i < 0 {
return options.DefaultRule
}
found = append(found, rule)
name = name[i+1:]
}

return found
return nil
}

// NewRule parses the rule content, creates and returns a Rule.
Expand Down Expand Up @@ -309,36 +289,46 @@ func (r *Rule) Match(name string) bool {

// Decompose takes a name as input and decomposes it into a tuple of <TRD+SLD, TLD>,
// according to the rule definition and type.
func (r *Rule) Decompose(name string) [2]string {
var parts []string

func (r *Rule) Decompose(name string) (result [2]string) {
if r == DefaultRule {
i := strings.LastIndex(name, ".")
if i < 0 {
return
}
result[0], result[1] = name[:i], name[i+1:]
return
}
switch r.Type {
case NormalType:
name = strings.TrimSuffix(name, r.Value)
if len(name) == 0 {
return
}
result[0], result[1] = name[:len(name)-1], r.Value
case WildcardType:
parts = append([]string{`.*?`}, r.parts()...)
default:
parts = r.parts()
}

suffix := strings.Join(parts, `\.`)
re := regexp.MustCompile(fmt.Sprintf(`^(.+)\.(%s)$`, suffix))

matches := re.FindStringSubmatch(name)
if len(matches) < 3 {
return [2]string{"", ""}
}

return [2]string{matches[1], matches[2]}
}

func (r *Rule) parts() []string {
labels := Labels(r.Value)
if r.Type == ExceptionType {
return labels[1:]
}
if r.Type == WildcardType && r.Value == "" {
return []string{}
name := strings.TrimSuffix(name, r.Value)
if len(name) == 0 {
return
}
name = name[:len(name)-1]
i := strings.LastIndex(name, ".")
if i < 0 {
return
}
result[0], result[1] = name[:i], name[i+1:]+"."+r.Value
case ExceptionType:
i := strings.IndexRune(r.Value, '.')
if i < 0 {
return
}
suffix := r.Value[i+1:]
name = strings.TrimSuffix(name, suffix)
if len(name) == 0 {
return
}
result[0], result[1] = name[:len(name)-1], suffix
}
return labels
return
}

// Labels decomposes given domain name into labels,
Expand Down Expand Up @@ -432,7 +422,6 @@ func DomainFromListWithOptions(l *List, name string, options *FindOptions) (stri
if err != nil {
return "", err
}

return dn.SLD + "." + dn.TLD, nil
}

Expand All @@ -458,44 +447,38 @@ func ParseFromListWithOptions(l *List, name string, options *FindOptions) (*Doma
}

r := l.Find(n, options)
if tld := r.Decompose(n)[1]; tld == "" {
parts := r.Decompose(n)
left, tld := parts[0], parts[1]
if tld == "" {
return nil, fmt.Errorf("%s is a suffix", n)
}

dn := &DomainName{Rule: r}
dn.TLD, dn.SLD, dn.TRD = decompose(r, n)
dn := &DomainName{
Rule: r,
TLD: tld,
}
if i := strings.LastIndex(left, "."); i < 0 {
dn.SLD = left
} else {
dn.TRD = left[:i]
dn.SLD = left[i+1:]
}
return dn, nil
}

func normalize(name string) (string, error) {
ret := strings.ToLower(name)

if ret == "" {
return "", fmt.Errorf("Name is blank")
return "", fmt.Errorf("name is blank")
}
if ret[0] == '.' {
return "", fmt.Errorf("Name %s starts with a dot", ret)
return "", fmt.Errorf("name %s starts with a dot", ret)
}

return ret, nil
}

func decompose(r *Rule, name string) (tld, sld, trd string) {
parts := r.Decompose(name)
left, tld := parts[0], parts[1]

dot := strings.LastIndex(left, ".")
if dot == -1 {
sld = left
trd = ""
} else {
sld = left[dot+1:]
trd = left[0:dot]
}

return
}

// ToASCII is a wrapper for idna.ToASCII.
//
// This wrapper exists because idna.ToASCII backward-compatibility was broken twice in few months
Expand Down
14 changes: 8 additions & 6 deletions publicsuffix/publicsuffix_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ blogspot.com
testRules = []Rule{}
for _, rule := range rules {
if rule.Private == false {
testRules = append(testRules, rule)
testRules = append(testRules, *rule)
}
}
if want, got := 2, len(testRules); want != got {
Expand All @@ -55,7 +55,7 @@ blogspot.com
testRules = []Rule{}
for _, rule := range rules {
if rule.Private == true {
testRules = append(testRules, rule)
testRules = append(testRules, *rule)
}
}
if want, got := 1, len(testRules); want != got {
Expand Down Expand Up @@ -143,7 +143,7 @@ func TestNewListFromFile(t *testing.T) {
testRules = []Rule{}
for _, rule := range rules {
if rule.Private == false {
testRules = append(testRules, rule)
testRules = append(testRules, *rule)
}
}
if want, got := 2, len(testRules); want != got {
Expand All @@ -154,7 +154,7 @@ func TestNewListFromFile(t *testing.T) {
testRules = []Rule{}
for _, rule := range rules {
if rule.Private == true {
testRules = append(testRules, rule)
testRules = append(testRules, *rule)
}
}
if want, got := 1, len(testRules); want != got {
Expand All @@ -175,8 +175,10 @@ func TestListAddRule(t *testing.T) {
if list.Size() != 1 {
t.Fatalf("List should have 1 rule, got %v", list.Size())
}
if got := &list.rules[0]; !reflect.DeepEqual(rule, got) {
t.Fatalf("List[0] expected to be %v, got %v", rule, got)
for _, got := range list.rules {
if !reflect.DeepEqual(rule, got) {
t.Fatalf("List[0] expected to be %v, got %v", rule, got)
}
}
}

Expand Down
4 changes: 3 additions & 1 deletion publicsuffix/rules.go
Original file line number Diff line number Diff line change
Expand Up @@ -8627,5 +8627,7 @@ func init() {
{1, "now.sh", 2, true},
{1, "zone.id", 2, true},
}
DefaultList.rules = r[:]
for i := range r {
DefaultList.AddRule(&r[i])
}
}

0 comments on commit b6e96f6

Please sign in to comment.