taldir

Directory service to resolve wallet mailboxes by messenger addresses
Log | Files | Refs | Submodules | README | LICENSE

list.go (6711B)


      1 // Copyright 2012 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 //go:generate go run gen.go
      6 
      7 // Package publicsuffix provides a public suffix list based on data from
      8 // https://publicsuffix.org/
      9 //
     10 // A public suffix is one under which Internet users can directly register
     11 // names. It is related to, but different from, a TLD (top level domain).
     12 //
     13 // "com" is a TLD (top level domain). Top level means it has no dots.
     14 //
     15 // "com" is also a public suffix. Amazon and Google have registered different
     16 // siblings under that domain: "amazon.com" and "google.com".
     17 //
     18 // "au" is another TLD, again because it has no dots. But it's not "amazon.au".
     19 // Instead, it's "amazon.com.au".
     20 //
     21 // "com.au" isn't an actual TLD, because it's not at the top level (it has
     22 // dots). But it is an eTLD (effective TLD), because that's the branching point
     23 // for domain name registrars.
     24 //
     25 // Another name for "an eTLD" is "a public suffix". Often, what's more of
     26 // interest is the eTLD+1, or one more label than the public suffix. For
     27 // example, browsers partition read/write access to HTTP cookies according to
     28 // the eTLD+1. Web pages served from "amazon.com.au" can't read cookies from
     29 // "google.com.au", but web pages served from "maps.google.com" can share
     30 // cookies from "www.google.com", so you don't have to sign into Google Maps
     31 // separately from signing into Google Web Search. Note that all four of those
     32 // domains have 3 labels and 2 dots. The first two domains are each an eTLD+1,
     33 // the last two are not (but share the same eTLD+1: "google.com").
     34 //
     35 // All of these domains have the same eTLD+1:
     36 //   - "www.books.amazon.co.uk"
     37 //   - "books.amazon.co.uk"
     38 //   - "amazon.co.uk"
     39 //
     40 // Specifically, the eTLD+1 is "amazon.co.uk", because the eTLD is "co.uk".
     41 //
     42 // There is no closed form algorithm to calculate the eTLD of a domain.
     43 // Instead, the calculation is data driven. This package provides a
     44 // pre-compiled snapshot of Mozilla's PSL (Public Suffix List) data at
     45 // https://publicsuffix.org/
     46 package publicsuffix // import "golang.org/x/net/publicsuffix"
     47 
     48 // TODO: specify case sensitivity and leading/trailing dot behavior for
     49 // func PublicSuffix and func EffectiveTLDPlusOne.
     50 
     51 import (
     52 	"fmt"
     53 	"net/http/cookiejar"
     54 	"net/netip"
     55 	"strings"
     56 )
     57 
     58 // List implements the cookiejar.PublicSuffixList interface by calling the
     59 // PublicSuffix function.
     60 var List cookiejar.PublicSuffixList = list{}
     61 
     62 type list struct{}
     63 
     64 func (list) PublicSuffix(domain string) string {
     65 	ps, _ := PublicSuffix(domain)
     66 	return ps
     67 }
     68 
     69 func (list) String() string {
     70 	return version
     71 }
     72 
     73 // PublicSuffix returns the public suffix of the domain using a copy of the
     74 // publicsuffix.org database compiled into the library.
     75 //
     76 // icann is whether the public suffix is managed by the Internet Corporation
     77 // for Assigned Names and Numbers. If not, the public suffix is either a
     78 // privately managed domain (and in practice, not a top level domain) or an
     79 // unmanaged top level domain (and not explicitly mentioned in the
     80 // publicsuffix.org list). For example, "foo.org" and "foo.co.uk" are ICANN
     81 // domains, "foo.dyndns.org" is a private domain and
     82 // "cromulent" is an unmanaged top level domain.
     83 //
     84 // Use cases for distinguishing ICANN domains like "foo.com" from private
     85 // domains like "foo.appspot.com" can be found at
     86 // https://wiki.mozilla.org/Public_Suffix_List/Use_Cases
     87 func PublicSuffix(domain string) (publicSuffix string, icann bool) {
     88 	if _, err := netip.ParseAddr(domain); err == nil {
     89 		return domain, false
     90 	}
     91 
     92 	lo, hi := uint32(0), uint32(numTLD)
     93 	s, suffix, icannNode, wildcard := domain, len(domain), false, false
     94 loop:
     95 	for {
     96 		dot := strings.LastIndexByte(s, '.')
     97 		if wildcard {
     98 			icann = icannNode
     99 			suffix = 1 + dot
    100 		}
    101 		if lo == hi {
    102 			break
    103 		}
    104 		f := find(s[1+dot:], lo, hi)
    105 		if f == notFound {
    106 			break
    107 		}
    108 
    109 		u := uint32(nodes.get(f) >> (nodesBitsTextOffset + nodesBitsTextLength))
    110 		icannNode = u&(1<<nodesBitsICANN-1) != 0
    111 		u >>= nodesBitsICANN
    112 		u = children.get(u & (1<<nodesBitsChildren - 1))
    113 		lo = u & (1<<childrenBitsLo - 1)
    114 		u >>= childrenBitsLo
    115 		hi = u & (1<<childrenBitsHi - 1)
    116 		u >>= childrenBitsHi
    117 		switch u & (1<<childrenBitsNodeType - 1) {
    118 		case nodeTypeNormal:
    119 			suffix = 1 + dot
    120 		case nodeTypeException:
    121 			suffix = 1 + len(s)
    122 			break loop
    123 		}
    124 		u >>= childrenBitsNodeType
    125 		wildcard = u&(1<<childrenBitsWildcard-1) != 0
    126 		if !wildcard {
    127 			icann = icannNode
    128 		}
    129 
    130 		if dot == -1 {
    131 			break
    132 		}
    133 		s = s[:dot]
    134 	}
    135 	if suffix == len(domain) {
    136 		// If no rules match, the prevailing rule is "*".
    137 		return domain[1+strings.LastIndexByte(domain, '.'):], icann
    138 	}
    139 	return domain[suffix:], icann
    140 }
    141 
    142 const notFound uint32 = 1<<32 - 1
    143 
    144 // find returns the index of the node in the range [lo, hi) whose label equals
    145 // label, or notFound if there is no such node. The range is assumed to be in
    146 // strictly increasing node label order.
    147 func find(label string, lo, hi uint32) uint32 {
    148 	for lo < hi {
    149 		mid := lo + (hi-lo)/2
    150 		s := nodeLabel(mid)
    151 		if s < label {
    152 			lo = mid + 1
    153 		} else if s == label {
    154 			return mid
    155 		} else {
    156 			hi = mid
    157 		}
    158 	}
    159 	return notFound
    160 }
    161 
    162 // nodeLabel returns the label for the i'th node.
    163 func nodeLabel(i uint32) string {
    164 	x := nodes.get(i)
    165 	length := x & (1<<nodesBitsTextLength - 1)
    166 	x >>= nodesBitsTextLength
    167 	offset := x & (1<<nodesBitsTextOffset - 1)
    168 	return text[offset : offset+length]
    169 }
    170 
    171 // EffectiveTLDPlusOne returns the effective top level domain plus one more
    172 // label. For example, the eTLD+1 for "foo.bar.golang.org" is "golang.org".
    173 func EffectiveTLDPlusOne(domain string) (string, error) {
    174 	if strings.HasPrefix(domain, ".") || strings.HasSuffix(domain, ".") || strings.Contains(domain, "..") {
    175 		return "", fmt.Errorf("publicsuffix: empty label in domain %q", domain)
    176 	}
    177 
    178 	suffix, _ := PublicSuffix(domain)
    179 	if len(domain) <= len(suffix) {
    180 		return "", fmt.Errorf("publicsuffix: cannot derive eTLD+1 for domain %q", domain)
    181 	}
    182 	i := len(domain) - len(suffix) - 1
    183 	if domain[i] != '.' {
    184 		return "", fmt.Errorf("publicsuffix: invalid public suffix %q for domain %q", suffix, domain)
    185 	}
    186 	return domain[1+strings.LastIndexByte(domain[:i], '.'):], nil
    187 }
    188 
    189 type uint32String string
    190 
    191 func (u uint32String) get(i uint32) uint32 {
    192 	off := i * 4
    193 	u = u[off:] // help the compiler reduce bounds checks
    194 	return uint32(u[3]) |
    195 		uint32(u[2])<<8 |
    196 		uint32(u[1])<<16 |
    197 		uint32(u[0])<<24
    198 }
    199 
    200 type uint40String string
    201 
    202 func (u uint40String) get(i uint32) uint64 {
    203 	off := uint64(i * (nodesBits / 8))
    204 	u = u[off:] // help the compiler reduce bounds checks
    205 	return uint64(u[4]) |
    206 		uint64(u[3])<<8 |
    207 		uint64(u[2])<<16 |
    208 		uint64(u[1])<<24 |
    209 		uint64(u[0])<<32
    210 }