taldir

Directory service to resolve wallet mailboxes by messenger addresses
Log | Files | Refs | Submodules | README | LICENSE

regexp.go (10461B)


      1 // Copyright 2012 The Gorilla Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package mux
      6 
      7 import (
      8 	"bytes"
      9 	"fmt"
     10 	"net/http"
     11 	"net/url"
     12 	"regexp"
     13 	"strconv"
     14 	"strings"
     15 )
     16 
     17 type routeRegexpOptions struct {
     18 	strictSlash    bool
     19 	useEncodedPath bool
     20 }
     21 
     22 type regexpType int
     23 
     24 const (
     25 	regexpTypePath regexpType = iota
     26 	regexpTypeHost
     27 	regexpTypePrefix
     28 	regexpTypeQuery
     29 )
     30 
     31 // newRouteRegexp parses a route template and returns a routeRegexp,
     32 // used to match a host, a path or a query string.
     33 //
     34 // It will extract named variables, assemble a regexp to be matched, create
     35 // a "reverse" template to build URLs and compile regexps to validate variable
     36 // values used in URL building.
     37 //
     38 // Previously we accepted only Python-like identifiers for variable
     39 // names ([a-zA-Z_][a-zA-Z0-9_]*), but currently the only restriction is that
     40 // name and pattern can't be empty, and names can't contain a colon.
     41 func newRouteRegexp(tpl string, typ regexpType, options routeRegexpOptions) (*routeRegexp, error) {
     42 	// Check if it is well-formed.
     43 	idxs, errBraces := braceIndices(tpl)
     44 	if errBraces != nil {
     45 		return nil, errBraces
     46 	}
     47 	// Backup the original.
     48 	template := tpl
     49 	// Now let's parse it.
     50 	defaultPattern := "[^/]+"
     51 	if typ == regexpTypeQuery {
     52 		defaultPattern = ".*"
     53 	} else if typ == regexpTypeHost {
     54 		defaultPattern = "[^.]+"
     55 	}
     56 	// Only match strict slash if not matching
     57 	if typ != regexpTypePath {
     58 		options.strictSlash = false
     59 	}
     60 	// Set a flag for strictSlash.
     61 	endSlash := false
     62 	if options.strictSlash && strings.HasSuffix(tpl, "/") {
     63 		tpl = tpl[:len(tpl)-1]
     64 		endSlash = true
     65 	}
     66 	varsN := make([]string, len(idxs)/2)
     67 	varsR := make([]*regexp.Regexp, len(idxs)/2)
     68 	pattern := bytes.NewBufferString("")
     69 	pattern.WriteByte('^')
     70 	reverse := bytes.NewBufferString("")
     71 	var end int
     72 	var err error
     73 	for i := 0; i < len(idxs); i += 2 {
     74 		// Set all values we are interested in.
     75 		raw := tpl[end:idxs[i]]
     76 		end = idxs[i+1]
     77 		parts := strings.SplitN(tpl[idxs[i]+1:end-1], ":", 2)
     78 		name := parts[0]
     79 		patt := defaultPattern
     80 		if len(parts) == 2 {
     81 			patt = parts[1]
     82 		}
     83 		// Name or pattern can't be empty.
     84 		if name == "" || patt == "" {
     85 			return nil, fmt.Errorf("mux: missing name or pattern in %q",
     86 				tpl[idxs[i]:end])
     87 		}
     88 		// Build the regexp pattern.
     89 		fmt.Fprintf(pattern, "%s(?P<%s>%s)", regexp.QuoteMeta(raw), varGroupName(i/2), patt)
     90 
     91 		// Build the reverse template.
     92 		fmt.Fprintf(reverse, "%s%%s", raw)
     93 
     94 		// Append variable name and compiled pattern.
     95 		varsN[i/2] = name
     96 		varsR[i/2], err = regexp.Compile(fmt.Sprintf("^%s$", patt))
     97 		if err != nil {
     98 			return nil, err
     99 		}
    100 	}
    101 	// Add the remaining.
    102 	raw := tpl[end:]
    103 	pattern.WriteString(regexp.QuoteMeta(raw))
    104 	if options.strictSlash {
    105 		pattern.WriteString("[/]?")
    106 	}
    107 	if typ == regexpTypeQuery {
    108 		// Add the default pattern if the query value is empty
    109 		if queryVal := strings.SplitN(template, "=", 2)[1]; queryVal == "" {
    110 			pattern.WriteString(defaultPattern)
    111 		}
    112 	}
    113 	if typ != regexpTypePrefix {
    114 		pattern.WriteByte('$')
    115 	}
    116 
    117 	var wildcardHostPort bool
    118 	if typ == regexpTypeHost {
    119 		if !strings.Contains(pattern.String(), ":") {
    120 			wildcardHostPort = true
    121 		}
    122 	}
    123 	reverse.WriteString(raw)
    124 	if endSlash {
    125 		reverse.WriteByte('/')
    126 	}
    127 	// Compile full regexp.
    128 	reg, errCompile := regexp.Compile(pattern.String())
    129 	if errCompile != nil {
    130 		return nil, errCompile
    131 	}
    132 
    133 	// Check for capturing groups which used to work in older versions
    134 	if reg.NumSubexp() != len(idxs)/2 {
    135 		panic(fmt.Sprintf("route %s contains capture groups in its regexp. ", template) +
    136 			"Only non-capturing groups are accepted: e.g. (?:pattern) instead of (pattern)")
    137 	}
    138 
    139 	// Done!
    140 	return &routeRegexp{
    141 		template:         template,
    142 		regexpType:       typ,
    143 		options:          options,
    144 		regexp:           reg,
    145 		reverse:          reverse.String(),
    146 		varsN:            varsN,
    147 		varsR:            varsR,
    148 		wildcardHostPort: wildcardHostPort,
    149 	}, nil
    150 }
    151 
    152 // routeRegexp stores a regexp to match a host or path and information to
    153 // collect and validate route variables.
    154 type routeRegexp struct {
    155 	// The unmodified template.
    156 	template string
    157 	// The type of match
    158 	regexpType regexpType
    159 	// Options for matching
    160 	options routeRegexpOptions
    161 	// Expanded regexp.
    162 	regexp *regexp.Regexp
    163 	// Reverse template.
    164 	reverse string
    165 	// Variable names.
    166 	varsN []string
    167 	// Variable regexps (validators).
    168 	varsR []*regexp.Regexp
    169 	// Wildcard host-port (no strict port match in hostname)
    170 	wildcardHostPort bool
    171 }
    172 
    173 // Match matches the regexp against the URL host or path.
    174 func (r *routeRegexp) Match(req *http.Request, match *RouteMatch) bool {
    175 	if r.regexpType == regexpTypeHost {
    176 		host := getHost(req)
    177 		if r.wildcardHostPort {
    178 			// Don't be strict on the port match
    179 			if i := strings.Index(host, ":"); i != -1 {
    180 				host = host[:i]
    181 			}
    182 		}
    183 		return r.regexp.MatchString(host)
    184 	}
    185 
    186 	if r.regexpType == regexpTypeQuery {
    187 		return r.matchQueryString(req)
    188 	}
    189 	path := req.URL.Path
    190 	if r.options.useEncodedPath {
    191 		path = req.URL.EscapedPath()
    192 	}
    193 	return r.regexp.MatchString(path)
    194 }
    195 
    196 // url builds a URL part using the given values.
    197 func (r *routeRegexp) url(values map[string]string) (string, error) {
    198 	urlValues := make([]interface{}, len(r.varsN))
    199 	for k, v := range r.varsN {
    200 		value, ok := values[v]
    201 		if !ok {
    202 			return "", fmt.Errorf("mux: missing route variable %q", v)
    203 		}
    204 		if r.regexpType == regexpTypeQuery {
    205 			value = url.QueryEscape(value)
    206 		}
    207 		urlValues[k] = value
    208 	}
    209 	rv := fmt.Sprintf(r.reverse, urlValues...)
    210 	if !r.regexp.MatchString(rv) {
    211 		// The URL is checked against the full regexp, instead of checking
    212 		// individual variables. This is faster but to provide a good error
    213 		// message, we check individual regexps if the URL doesn't match.
    214 		for k, v := range r.varsN {
    215 			if !r.varsR[k].MatchString(values[v]) {
    216 				return "", fmt.Errorf(
    217 					"mux: variable %q doesn't match, expected %q", values[v],
    218 					r.varsR[k].String())
    219 			}
    220 		}
    221 	}
    222 	return rv, nil
    223 }
    224 
    225 // getURLQuery returns a single query parameter from a request URL.
    226 // For a URL with foo=bar&baz=ding, we return only the relevant key
    227 // value pair for the routeRegexp.
    228 func (r *routeRegexp) getURLQuery(req *http.Request) string {
    229 	if r.regexpType != regexpTypeQuery {
    230 		return ""
    231 	}
    232 	templateKey := strings.SplitN(r.template, "=", 2)[0]
    233 	val, ok := findFirstQueryKey(req.URL.RawQuery, templateKey)
    234 	if ok {
    235 		return templateKey + "=" + val
    236 	}
    237 	return ""
    238 }
    239 
    240 // findFirstQueryKey returns the same result as (*url.URL).Query()[key][0].
    241 // If key was not found, empty string and false is returned.
    242 func findFirstQueryKey(rawQuery, key string) (value string, ok bool) {
    243 	query := []byte(rawQuery)
    244 	for len(query) > 0 {
    245 		foundKey := query
    246 		if i := bytes.IndexAny(foundKey, "&;"); i >= 0 {
    247 			foundKey, query = foundKey[:i], foundKey[i+1:]
    248 		} else {
    249 			query = query[:0]
    250 		}
    251 		if len(foundKey) == 0 {
    252 			continue
    253 		}
    254 		var value []byte
    255 		if i := bytes.IndexByte(foundKey, '='); i >= 0 {
    256 			foundKey, value = foundKey[:i], foundKey[i+1:]
    257 		}
    258 		if len(foundKey) < len(key) {
    259 			// Cannot possibly be key.
    260 			continue
    261 		}
    262 		keyString, err := url.QueryUnescape(string(foundKey))
    263 		if err != nil {
    264 			continue
    265 		}
    266 		if keyString != key {
    267 			continue
    268 		}
    269 		valueString, err := url.QueryUnescape(string(value))
    270 		if err != nil {
    271 			continue
    272 		}
    273 		return valueString, true
    274 	}
    275 	return "", false
    276 }
    277 
    278 func (r *routeRegexp) matchQueryString(req *http.Request) bool {
    279 	return r.regexp.MatchString(r.getURLQuery(req))
    280 }
    281 
    282 // braceIndices returns the first level curly brace indices from a string.
    283 // It returns an error in case of unbalanced braces.
    284 func braceIndices(s string) ([]int, error) {
    285 	var level, idx int
    286 	var idxs []int
    287 	for i := 0; i < len(s); i++ {
    288 		switch s[i] {
    289 		case '{':
    290 			if level++; level == 1 {
    291 				idx = i
    292 			}
    293 		case '}':
    294 			if level--; level == 0 {
    295 				idxs = append(idxs, idx, i+1)
    296 			} else if level < 0 {
    297 				return nil, fmt.Errorf("mux: unbalanced braces in %q", s)
    298 			}
    299 		}
    300 	}
    301 	if level != 0 {
    302 		return nil, fmt.Errorf("mux: unbalanced braces in %q", s)
    303 	}
    304 	return idxs, nil
    305 }
    306 
    307 // varGroupName builds a capturing group name for the indexed variable.
    308 func varGroupName(idx int) string {
    309 	return "v" + strconv.Itoa(idx)
    310 }
    311 
    312 // ----------------------------------------------------------------------------
    313 // routeRegexpGroup
    314 // ----------------------------------------------------------------------------
    315 
    316 // routeRegexpGroup groups the route matchers that carry variables.
    317 type routeRegexpGroup struct {
    318 	host    *routeRegexp
    319 	path    *routeRegexp
    320 	queries []*routeRegexp
    321 }
    322 
    323 // setMatch extracts the variables from the URL once a route matches.
    324 func (v routeRegexpGroup) setMatch(req *http.Request, m *RouteMatch, r *Route) {
    325 	// Store host variables.
    326 	if v.host != nil {
    327 		host := getHost(req)
    328 		if v.host.wildcardHostPort {
    329 			// Don't be strict on the port match
    330 			if i := strings.Index(host, ":"); i != -1 {
    331 				host = host[:i]
    332 			}
    333 		}
    334 		matches := v.host.regexp.FindStringSubmatchIndex(host)
    335 		if len(matches) > 0 {
    336 			extractVars(host, matches, v.host.varsN, m.Vars)
    337 		}
    338 	}
    339 	path := req.URL.Path
    340 	if r.useEncodedPath {
    341 		path = req.URL.EscapedPath()
    342 	}
    343 	// Store path variables.
    344 	if v.path != nil {
    345 		matches := v.path.regexp.FindStringSubmatchIndex(path)
    346 		if len(matches) > 0 {
    347 			extractVars(path, matches, v.path.varsN, m.Vars)
    348 			// Check if we should redirect.
    349 			if v.path.options.strictSlash {
    350 				p1 := strings.HasSuffix(path, "/")
    351 				p2 := strings.HasSuffix(v.path.template, "/")
    352 				if p1 != p2 {
    353 					u, _ := url.Parse(req.URL.String())
    354 					if p1 {
    355 						u.Path = u.Path[:len(u.Path)-1]
    356 					} else {
    357 						u.Path += "/"
    358 					}
    359 					m.Handler = http.RedirectHandler(u.String(), http.StatusMovedPermanently)
    360 				}
    361 			}
    362 		}
    363 	}
    364 	// Store query string variables.
    365 	for _, q := range v.queries {
    366 		queryURL := q.getURLQuery(req)
    367 		matches := q.regexp.FindStringSubmatchIndex(queryURL)
    368 		if len(matches) > 0 {
    369 			extractVars(queryURL, matches, q.varsN, m.Vars)
    370 		}
    371 	}
    372 }
    373 
    374 // getHost tries its best to return the request host.
    375 // According to section 14.23 of RFC 2616 the Host header
    376 // can include the port number if the default value of 80 is not used.
    377 func getHost(r *http.Request) string {
    378 	if r.URL.IsAbs() {
    379 		return r.URL.Host
    380 	}
    381 	return r.Host
    382 }
    383 
    384 func extractVars(input string, matches []int, names []string, output map[string]string) {
    385 	for i, name := range names {
    386 		output[name] = input[matches[2*i+2]:matches[2*i+3]]
    387 	}
    388 }