regexp.go (10461B)
1 // Copyright 2012 The Gorilla Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package mux 6 7 import ( 8 "bytes" 9 "fmt" 10 "net/http" 11 "net/url" 12 "regexp" 13 "strconv" 14 "strings" 15 ) 16 17 type routeRegexpOptions struct { 18 strictSlash bool 19 useEncodedPath bool 20 } 21 22 type regexpType int 23 24 const ( 25 regexpTypePath regexpType = iota 26 regexpTypeHost 27 regexpTypePrefix 28 regexpTypeQuery 29 ) 30 31 // newRouteRegexp parses a route template and returns a routeRegexp, 32 // used to match a host, a path or a query string. 33 // 34 // It will extract named variables, assemble a regexp to be matched, create 35 // a "reverse" template to build URLs and compile regexps to validate variable 36 // values used in URL building. 37 // 38 // Previously we accepted only Python-like identifiers for variable 39 // names ([a-zA-Z_][a-zA-Z0-9_]*), but currently the only restriction is that 40 // name and pattern can't be empty, and names can't contain a colon. 41 func newRouteRegexp(tpl string, typ regexpType, options routeRegexpOptions) (*routeRegexp, error) { 42 // Check if it is well-formed. 43 idxs, errBraces := braceIndices(tpl) 44 if errBraces != nil { 45 return nil, errBraces 46 } 47 // Backup the original. 48 template := tpl 49 // Now let's parse it. 50 defaultPattern := "[^/]+" 51 if typ == regexpTypeQuery { 52 defaultPattern = ".*" 53 } else if typ == regexpTypeHost { 54 defaultPattern = "[^.]+" 55 } 56 // Only match strict slash if not matching 57 if typ != regexpTypePath { 58 options.strictSlash = false 59 } 60 // Set a flag for strictSlash. 61 endSlash := false 62 if options.strictSlash && strings.HasSuffix(tpl, "/") { 63 tpl = tpl[:len(tpl)-1] 64 endSlash = true 65 } 66 varsN := make([]string, len(idxs)/2) 67 varsR := make([]*regexp.Regexp, len(idxs)/2) 68 pattern := bytes.NewBufferString("") 69 pattern.WriteByte('^') 70 reverse := bytes.NewBufferString("") 71 var end int 72 var err error 73 for i := 0; i < len(idxs); i += 2 { 74 // Set all values we are interested in. 75 raw := tpl[end:idxs[i]] 76 end = idxs[i+1] 77 parts := strings.SplitN(tpl[idxs[i]+1:end-1], ":", 2) 78 name := parts[0] 79 patt := defaultPattern 80 if len(parts) == 2 { 81 patt = parts[1] 82 } 83 // Name or pattern can't be empty. 84 if name == "" || patt == "" { 85 return nil, fmt.Errorf("mux: missing name or pattern in %q", 86 tpl[idxs[i]:end]) 87 } 88 // Build the regexp pattern. 89 fmt.Fprintf(pattern, "%s(?P<%s>%s)", regexp.QuoteMeta(raw), varGroupName(i/2), patt) 90 91 // Build the reverse template. 92 fmt.Fprintf(reverse, "%s%%s", raw) 93 94 // Append variable name and compiled pattern. 95 varsN[i/2] = name 96 varsR[i/2], err = regexp.Compile(fmt.Sprintf("^%s$", patt)) 97 if err != nil { 98 return nil, err 99 } 100 } 101 // Add the remaining. 102 raw := tpl[end:] 103 pattern.WriteString(regexp.QuoteMeta(raw)) 104 if options.strictSlash { 105 pattern.WriteString("[/]?") 106 } 107 if typ == regexpTypeQuery { 108 // Add the default pattern if the query value is empty 109 if queryVal := strings.SplitN(template, "=", 2)[1]; queryVal == "" { 110 pattern.WriteString(defaultPattern) 111 } 112 } 113 if typ != regexpTypePrefix { 114 pattern.WriteByte('$') 115 } 116 117 var wildcardHostPort bool 118 if typ == regexpTypeHost { 119 if !strings.Contains(pattern.String(), ":") { 120 wildcardHostPort = true 121 } 122 } 123 reverse.WriteString(raw) 124 if endSlash { 125 reverse.WriteByte('/') 126 } 127 // Compile full regexp. 128 reg, errCompile := regexp.Compile(pattern.String()) 129 if errCompile != nil { 130 return nil, errCompile 131 } 132 133 // Check for capturing groups which used to work in older versions 134 if reg.NumSubexp() != len(idxs)/2 { 135 panic(fmt.Sprintf("route %s contains capture groups in its regexp. ", template) + 136 "Only non-capturing groups are accepted: e.g. (?:pattern) instead of (pattern)") 137 } 138 139 // Done! 140 return &routeRegexp{ 141 template: template, 142 regexpType: typ, 143 options: options, 144 regexp: reg, 145 reverse: reverse.String(), 146 varsN: varsN, 147 varsR: varsR, 148 wildcardHostPort: wildcardHostPort, 149 }, nil 150 } 151 152 // routeRegexp stores a regexp to match a host or path and information to 153 // collect and validate route variables. 154 type routeRegexp struct { 155 // The unmodified template. 156 template string 157 // The type of match 158 regexpType regexpType 159 // Options for matching 160 options routeRegexpOptions 161 // Expanded regexp. 162 regexp *regexp.Regexp 163 // Reverse template. 164 reverse string 165 // Variable names. 166 varsN []string 167 // Variable regexps (validators). 168 varsR []*regexp.Regexp 169 // Wildcard host-port (no strict port match in hostname) 170 wildcardHostPort bool 171 } 172 173 // Match matches the regexp against the URL host or path. 174 func (r *routeRegexp) Match(req *http.Request, match *RouteMatch) bool { 175 if r.regexpType == regexpTypeHost { 176 host := getHost(req) 177 if r.wildcardHostPort { 178 // Don't be strict on the port match 179 if i := strings.Index(host, ":"); i != -1 { 180 host = host[:i] 181 } 182 } 183 return r.regexp.MatchString(host) 184 } 185 186 if r.regexpType == regexpTypeQuery { 187 return r.matchQueryString(req) 188 } 189 path := req.URL.Path 190 if r.options.useEncodedPath { 191 path = req.URL.EscapedPath() 192 } 193 return r.regexp.MatchString(path) 194 } 195 196 // url builds a URL part using the given values. 197 func (r *routeRegexp) url(values map[string]string) (string, error) { 198 urlValues := make([]interface{}, len(r.varsN)) 199 for k, v := range r.varsN { 200 value, ok := values[v] 201 if !ok { 202 return "", fmt.Errorf("mux: missing route variable %q", v) 203 } 204 if r.regexpType == regexpTypeQuery { 205 value = url.QueryEscape(value) 206 } 207 urlValues[k] = value 208 } 209 rv := fmt.Sprintf(r.reverse, urlValues...) 210 if !r.regexp.MatchString(rv) { 211 // The URL is checked against the full regexp, instead of checking 212 // individual variables. This is faster but to provide a good error 213 // message, we check individual regexps if the URL doesn't match. 214 for k, v := range r.varsN { 215 if !r.varsR[k].MatchString(values[v]) { 216 return "", fmt.Errorf( 217 "mux: variable %q doesn't match, expected %q", values[v], 218 r.varsR[k].String()) 219 } 220 } 221 } 222 return rv, nil 223 } 224 225 // getURLQuery returns a single query parameter from a request URL. 226 // For a URL with foo=bar&baz=ding, we return only the relevant key 227 // value pair for the routeRegexp. 228 func (r *routeRegexp) getURLQuery(req *http.Request) string { 229 if r.regexpType != regexpTypeQuery { 230 return "" 231 } 232 templateKey := strings.SplitN(r.template, "=", 2)[0] 233 val, ok := findFirstQueryKey(req.URL.RawQuery, templateKey) 234 if ok { 235 return templateKey + "=" + val 236 } 237 return "" 238 } 239 240 // findFirstQueryKey returns the same result as (*url.URL).Query()[key][0]. 241 // If key was not found, empty string and false is returned. 242 func findFirstQueryKey(rawQuery, key string) (value string, ok bool) { 243 query := []byte(rawQuery) 244 for len(query) > 0 { 245 foundKey := query 246 if i := bytes.IndexAny(foundKey, "&;"); i >= 0 { 247 foundKey, query = foundKey[:i], foundKey[i+1:] 248 } else { 249 query = query[:0] 250 } 251 if len(foundKey) == 0 { 252 continue 253 } 254 var value []byte 255 if i := bytes.IndexByte(foundKey, '='); i >= 0 { 256 foundKey, value = foundKey[:i], foundKey[i+1:] 257 } 258 if len(foundKey) < len(key) { 259 // Cannot possibly be key. 260 continue 261 } 262 keyString, err := url.QueryUnescape(string(foundKey)) 263 if err != nil { 264 continue 265 } 266 if keyString != key { 267 continue 268 } 269 valueString, err := url.QueryUnescape(string(value)) 270 if err != nil { 271 continue 272 } 273 return valueString, true 274 } 275 return "", false 276 } 277 278 func (r *routeRegexp) matchQueryString(req *http.Request) bool { 279 return r.regexp.MatchString(r.getURLQuery(req)) 280 } 281 282 // braceIndices returns the first level curly brace indices from a string. 283 // It returns an error in case of unbalanced braces. 284 func braceIndices(s string) ([]int, error) { 285 var level, idx int 286 var idxs []int 287 for i := 0; i < len(s); i++ { 288 switch s[i] { 289 case '{': 290 if level++; level == 1 { 291 idx = i 292 } 293 case '}': 294 if level--; level == 0 { 295 idxs = append(idxs, idx, i+1) 296 } else if level < 0 { 297 return nil, fmt.Errorf("mux: unbalanced braces in %q", s) 298 } 299 } 300 } 301 if level != 0 { 302 return nil, fmt.Errorf("mux: unbalanced braces in %q", s) 303 } 304 return idxs, nil 305 } 306 307 // varGroupName builds a capturing group name for the indexed variable. 308 func varGroupName(idx int) string { 309 return "v" + strconv.Itoa(idx) 310 } 311 312 // ---------------------------------------------------------------------------- 313 // routeRegexpGroup 314 // ---------------------------------------------------------------------------- 315 316 // routeRegexpGroup groups the route matchers that carry variables. 317 type routeRegexpGroup struct { 318 host *routeRegexp 319 path *routeRegexp 320 queries []*routeRegexp 321 } 322 323 // setMatch extracts the variables from the URL once a route matches. 324 func (v routeRegexpGroup) setMatch(req *http.Request, m *RouteMatch, r *Route) { 325 // Store host variables. 326 if v.host != nil { 327 host := getHost(req) 328 if v.host.wildcardHostPort { 329 // Don't be strict on the port match 330 if i := strings.Index(host, ":"); i != -1 { 331 host = host[:i] 332 } 333 } 334 matches := v.host.regexp.FindStringSubmatchIndex(host) 335 if len(matches) > 0 { 336 extractVars(host, matches, v.host.varsN, m.Vars) 337 } 338 } 339 path := req.URL.Path 340 if r.useEncodedPath { 341 path = req.URL.EscapedPath() 342 } 343 // Store path variables. 344 if v.path != nil { 345 matches := v.path.regexp.FindStringSubmatchIndex(path) 346 if len(matches) > 0 { 347 extractVars(path, matches, v.path.varsN, m.Vars) 348 // Check if we should redirect. 349 if v.path.options.strictSlash { 350 p1 := strings.HasSuffix(path, "/") 351 p2 := strings.HasSuffix(v.path.template, "/") 352 if p1 != p2 { 353 u, _ := url.Parse(req.URL.String()) 354 if p1 { 355 u.Path = u.Path[:len(u.Path)-1] 356 } else { 357 u.Path += "/" 358 } 359 m.Handler = http.RedirectHandler(u.String(), http.StatusMovedPermanently) 360 } 361 } 362 } 363 } 364 // Store query string variables. 365 for _, q := range v.queries { 366 queryURL := q.getURLQuery(req) 367 matches := q.regexp.FindStringSubmatchIndex(queryURL) 368 if len(matches) > 0 { 369 extractVars(queryURL, matches, q.varsN, m.Vars) 370 } 371 } 372 } 373 374 // getHost tries its best to return the request host. 375 // According to section 14.23 of RFC 2616 the Host header 376 // can include the port number if the default value of 80 is not used. 377 func getHost(r *http.Request) string { 378 if r.URL.IsAbs() { 379 return r.URL.Host 380 } 381 return r.Host 382 } 383 384 func extractVars(input string, matches []int, names []string, output map[string]string) { 385 for i, name := range names { 386 output[name] = input[matches[2*i+2]:matches[2*i+3]] 387 } 388 }