list.go (6711B)
1 // Copyright 2012 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:generate go run gen.go 6 7 // Package publicsuffix provides a public suffix list based on data from 8 // https://publicsuffix.org/ 9 // 10 // A public suffix is one under which Internet users can directly register 11 // names. It is related to, but different from, a TLD (top level domain). 12 // 13 // "com" is a TLD (top level domain). Top level means it has no dots. 14 // 15 // "com" is also a public suffix. Amazon and Google have registered different 16 // siblings under that domain: "amazon.com" and "google.com". 17 // 18 // "au" is another TLD, again because it has no dots. But it's not "amazon.au". 19 // Instead, it's "amazon.com.au". 20 // 21 // "com.au" isn't an actual TLD, because it's not at the top level (it has 22 // dots). But it is an eTLD (effective TLD), because that's the branching point 23 // for domain name registrars. 24 // 25 // Another name for "an eTLD" is "a public suffix". Often, what's more of 26 // interest is the eTLD+1, or one more label than the public suffix. For 27 // example, browsers partition read/write access to HTTP cookies according to 28 // the eTLD+1. Web pages served from "amazon.com.au" can't read cookies from 29 // "google.com.au", but web pages served from "maps.google.com" can share 30 // cookies from "www.google.com", so you don't have to sign into Google Maps 31 // separately from signing into Google Web Search. Note that all four of those 32 // domains have 3 labels and 2 dots. The first two domains are each an eTLD+1, 33 // the last two are not (but share the same eTLD+1: "google.com"). 34 // 35 // All of these domains have the same eTLD+1: 36 // - "www.books.amazon.co.uk" 37 // - "books.amazon.co.uk" 38 // - "amazon.co.uk" 39 // 40 // Specifically, the eTLD+1 is "amazon.co.uk", because the eTLD is "co.uk". 41 // 42 // There is no closed form algorithm to calculate the eTLD of a domain. 43 // Instead, the calculation is data driven. This package provides a 44 // pre-compiled snapshot of Mozilla's PSL (Public Suffix List) data at 45 // https://publicsuffix.org/ 46 package publicsuffix // import "golang.org/x/net/publicsuffix" 47 48 // TODO: specify case sensitivity and leading/trailing dot behavior for 49 // func PublicSuffix and func EffectiveTLDPlusOne. 50 51 import ( 52 "fmt" 53 "net/http/cookiejar" 54 "net/netip" 55 "strings" 56 ) 57 58 // List implements the cookiejar.PublicSuffixList interface by calling the 59 // PublicSuffix function. 60 var List cookiejar.PublicSuffixList = list{} 61 62 type list struct{} 63 64 func (list) PublicSuffix(domain string) string { 65 ps, _ := PublicSuffix(domain) 66 return ps 67 } 68 69 func (list) String() string { 70 return version 71 } 72 73 // PublicSuffix returns the public suffix of the domain using a copy of the 74 // publicsuffix.org database compiled into the library. 75 // 76 // icann is whether the public suffix is managed by the Internet Corporation 77 // for Assigned Names and Numbers. If not, the public suffix is either a 78 // privately managed domain (and in practice, not a top level domain) or an 79 // unmanaged top level domain (and not explicitly mentioned in the 80 // publicsuffix.org list). For example, "foo.org" and "foo.co.uk" are ICANN 81 // domains, "foo.dyndns.org" is a private domain and 82 // "cromulent" is an unmanaged top level domain. 83 // 84 // Use cases for distinguishing ICANN domains like "foo.com" from private 85 // domains like "foo.appspot.com" can be found at 86 // https://wiki.mozilla.org/Public_Suffix_List/Use_Cases 87 func PublicSuffix(domain string) (publicSuffix string, icann bool) { 88 if _, err := netip.ParseAddr(domain); err == nil { 89 return domain, false 90 } 91 92 lo, hi := uint32(0), uint32(numTLD) 93 s, suffix, icannNode, wildcard := domain, len(domain), false, false 94 loop: 95 for { 96 dot := strings.LastIndexByte(s, '.') 97 if wildcard { 98 icann = icannNode 99 suffix = 1 + dot 100 } 101 if lo == hi { 102 break 103 } 104 f := find(s[1+dot:], lo, hi) 105 if f == notFound { 106 break 107 } 108 109 u := uint32(nodes.get(f) >> (nodesBitsTextOffset + nodesBitsTextLength)) 110 icannNode = u&(1<<nodesBitsICANN-1) != 0 111 u >>= nodesBitsICANN 112 u = children.get(u & (1<<nodesBitsChildren - 1)) 113 lo = u & (1<<childrenBitsLo - 1) 114 u >>= childrenBitsLo 115 hi = u & (1<<childrenBitsHi - 1) 116 u >>= childrenBitsHi 117 switch u & (1<<childrenBitsNodeType - 1) { 118 case nodeTypeNormal: 119 suffix = 1 + dot 120 case nodeTypeException: 121 suffix = 1 + len(s) 122 break loop 123 } 124 u >>= childrenBitsNodeType 125 wildcard = u&(1<<childrenBitsWildcard-1) != 0 126 if !wildcard { 127 icann = icannNode 128 } 129 130 if dot == -1 { 131 break 132 } 133 s = s[:dot] 134 } 135 if suffix == len(domain) { 136 // If no rules match, the prevailing rule is "*". 137 return domain[1+strings.LastIndexByte(domain, '.'):], icann 138 } 139 return domain[suffix:], icann 140 } 141 142 const notFound uint32 = 1<<32 - 1 143 144 // find returns the index of the node in the range [lo, hi) whose label equals 145 // label, or notFound if there is no such node. The range is assumed to be in 146 // strictly increasing node label order. 147 func find(label string, lo, hi uint32) uint32 { 148 for lo < hi { 149 mid := lo + (hi-lo)/2 150 s := nodeLabel(mid) 151 if s < label { 152 lo = mid + 1 153 } else if s == label { 154 return mid 155 } else { 156 hi = mid 157 } 158 } 159 return notFound 160 } 161 162 // nodeLabel returns the label for the i'th node. 163 func nodeLabel(i uint32) string { 164 x := nodes.get(i) 165 length := x & (1<<nodesBitsTextLength - 1) 166 x >>= nodesBitsTextLength 167 offset := x & (1<<nodesBitsTextOffset - 1) 168 return text[offset : offset+length] 169 } 170 171 // EffectiveTLDPlusOne returns the effective top level domain plus one more 172 // label. For example, the eTLD+1 for "foo.bar.golang.org" is "golang.org". 173 func EffectiveTLDPlusOne(domain string) (string, error) { 174 if strings.HasPrefix(domain, ".") || strings.HasSuffix(domain, ".") || strings.Contains(domain, "..") { 175 return "", fmt.Errorf("publicsuffix: empty label in domain %q", domain) 176 } 177 178 suffix, _ := PublicSuffix(domain) 179 if len(domain) <= len(suffix) { 180 return "", fmt.Errorf("publicsuffix: cannot derive eTLD+1 for domain %q", domain) 181 } 182 i := len(domain) - len(suffix) - 1 183 if domain[i] != '.' { 184 return "", fmt.Errorf("publicsuffix: invalid public suffix %q for domain %q", suffix, domain) 185 } 186 return domain[1+strings.LastIndexByte(domain[:i], '.'):], nil 187 } 188 189 type uint32String string 190 191 func (u uint32String) get(i uint32) uint32 { 192 off := i * 4 193 u = u[off:] // help the compiler reduce bounds checks 194 return uint32(u[3]) | 195 uint32(u[2])<<8 | 196 uint32(u[1])<<16 | 197 uint32(u[0])<<24 198 } 199 200 type uint40String string 201 202 func (u uint40String) get(i uint32) uint64 { 203 off := uint64(i * (nodesBits / 8)) 204 u = u[off:] // help the compiler reduce bounds checks 205 return uint64(u[4]) | 206 uint64(u[3])<<8 | 207 uint64(u[2])<<16 | 208 uint64(u[1])<<24 | 209 uint64(u[0])<<32 210 }