parse.go (22213B)
1 package toml 2 3 import ( 4 "fmt" 5 "math" 6 "strconv" 7 "strings" 8 "time" 9 "unicode/utf8" 10 11 "github.com/BurntSushi/toml/internal" 12 ) 13 14 type parser struct { 15 lx *lexer 16 context Key // Full key for the current hash in scope. 17 currentKey string // Base key name for everything except hashes. 18 pos Position // Current position in the TOML file. 19 20 ordered []Key // List of keys in the order that they appear in the TOML data. 21 22 keyInfo map[string]keyInfo // Map keyname → info about the TOML key. 23 mapping map[string]any // Map keyname → key value. 24 implicits map[string]struct{} // Record implicit keys (e.g. "key.group.names"). 25 } 26 27 type keyInfo struct { 28 pos Position 29 tomlType tomlType 30 } 31 32 func parse(data string) (p *parser, err error) { 33 defer func() { 34 if r := recover(); r != nil { 35 if pErr, ok := r.(ParseError); ok { 36 pErr.input = data 37 err = pErr 38 return 39 } 40 panic(r) 41 } 42 }() 43 44 // Read over BOM; do this here as the lexer calls utf8.DecodeRuneInString() 45 // which mangles stuff. UTF-16 BOM isn't strictly valid, but some tools add 46 // it anyway. 47 if strings.HasPrefix(data, "\xff\xfe") || strings.HasPrefix(data, "\xfe\xff") { // UTF-16 48 data = data[2:] 49 } else if strings.HasPrefix(data, "\xef\xbb\xbf") { // UTF-8 50 data = data[3:] 51 } 52 53 // Examine first few bytes for NULL bytes; this probably means it's a UTF-16 54 // file (second byte in surrogate pair being NULL). Again, do this here to 55 // avoid having to deal with UTF-8/16 stuff in the lexer. 56 ex := 6 57 if len(data) < 6 { 58 ex = len(data) 59 } 60 if i := strings.IndexRune(data[:ex], 0); i > -1 { 61 return nil, ParseError{ 62 Message: "files cannot contain NULL bytes; probably using UTF-16; TOML files must be UTF-8", 63 Position: Position{Line: 1, Col: 1, Start: i, Len: 1}, 64 Line: 1, 65 input: data, 66 } 67 } 68 69 p = &parser{ 70 keyInfo: make(map[string]keyInfo), 71 mapping: make(map[string]any), 72 lx: lex(data), 73 ordered: make([]Key, 0), 74 implicits: make(map[string]struct{}), 75 } 76 for { 77 item := p.next() 78 if item.typ == itemEOF { 79 break 80 } 81 p.topLevel(item) 82 } 83 84 return p, nil 85 } 86 87 func (p *parser) panicErr(it item, err error) { 88 panic(ParseError{ 89 Message: err.Error(), 90 err: err, 91 Position: it.pos.withCol(p.lx.input), 92 Line: it.pos.Len, 93 LastKey: p.current(), 94 }) 95 } 96 97 func (p *parser) panicItemf(it item, format string, v ...any) { 98 panic(ParseError{ 99 Message: fmt.Sprintf(format, v...), 100 Position: it.pos.withCol(p.lx.input), 101 Line: it.pos.Len, 102 LastKey: p.current(), 103 }) 104 } 105 106 func (p *parser) panicf(format string, v ...any) { 107 panic(ParseError{ 108 Message: fmt.Sprintf(format, v...), 109 Position: p.pos.withCol(p.lx.input), 110 Line: p.pos.Line, 111 LastKey: p.current(), 112 }) 113 } 114 115 func (p *parser) next() item { 116 it := p.lx.nextItem() 117 //fmt.Printf("ITEM %-18s line %-3d │ %q\n", it.typ, it.pos.Line, it.val) 118 if it.typ == itemError { 119 if it.err != nil { 120 panic(ParseError{ 121 Message: it.err.Error(), 122 err: it.err, 123 Position: it.pos.withCol(p.lx.input), 124 Line: it.pos.Line, 125 LastKey: p.current(), 126 }) 127 } 128 129 p.panicItemf(it, "%s", it.val) 130 } 131 return it 132 } 133 134 func (p *parser) nextPos() item { 135 it := p.next() 136 p.pos = it.pos 137 return it 138 } 139 140 func (p *parser) bug(format string, v ...any) { 141 panic(fmt.Sprintf("BUG: "+format+"\n\n", v...)) 142 } 143 144 func (p *parser) expect(typ itemType) item { 145 it := p.next() 146 p.assertEqual(typ, it.typ) 147 return it 148 } 149 150 func (p *parser) assertEqual(expected, got itemType) { 151 if expected != got { 152 p.bug("Expected '%s' but got '%s'.", expected, got) 153 } 154 } 155 156 func (p *parser) topLevel(item item) { 157 switch item.typ { 158 case itemCommentStart: // # .. 159 p.expect(itemText) 160 case itemTableStart: // [ .. ] 161 name := p.nextPos() 162 163 var key Key 164 for ; name.typ != itemTableEnd && name.typ != itemEOF; name = p.next() { 165 key = append(key, p.keyString(name)) 166 } 167 p.assertEqual(itemTableEnd, name.typ) 168 169 p.addContext(key, false) 170 p.setType("", tomlHash, item.pos) 171 p.ordered = append(p.ordered, key) 172 case itemArrayTableStart: // [[ .. ]] 173 name := p.nextPos() 174 175 var key Key 176 for ; name.typ != itemArrayTableEnd && name.typ != itemEOF; name = p.next() { 177 key = append(key, p.keyString(name)) 178 } 179 p.assertEqual(itemArrayTableEnd, name.typ) 180 181 p.addContext(key, true) 182 p.setType("", tomlArrayHash, item.pos) 183 p.ordered = append(p.ordered, key) 184 case itemKeyStart: // key = .. 185 outerContext := p.context 186 /// Read all the key parts (e.g. 'a' and 'b' in 'a.b') 187 k := p.nextPos() 188 var key Key 189 for ; k.typ != itemKeyEnd && k.typ != itemEOF; k = p.next() { 190 key = append(key, p.keyString(k)) 191 } 192 p.assertEqual(itemKeyEnd, k.typ) 193 194 /// The current key is the last part. 195 p.currentKey = key.last() 196 197 /// All the other parts (if any) are the context; need to set each part 198 /// as implicit. 199 context := key.parent() 200 for i := range context { 201 p.addImplicitContext(append(p.context, context[i:i+1]...)) 202 } 203 p.ordered = append(p.ordered, p.context.add(p.currentKey)) 204 205 /// Set value. 206 vItem := p.next() 207 val, typ := p.value(vItem, false) 208 p.setValue(p.currentKey, val) 209 p.setType(p.currentKey, typ, vItem.pos) 210 211 /// Remove the context we added (preserving any context from [tbl] lines). 212 p.context = outerContext 213 p.currentKey = "" 214 default: 215 p.bug("Unexpected type at top level: %s", item.typ) 216 } 217 } 218 219 // Gets a string for a key (or part of a key in a table name). 220 func (p *parser) keyString(it item) string { 221 switch it.typ { 222 case itemText: 223 return it.val 224 case itemString, itemStringEsc, itemMultilineString, 225 itemRawString, itemRawMultilineString: 226 s, _ := p.value(it, false) 227 return s.(string) 228 default: 229 p.bug("Unexpected key type: %s", it.typ) 230 } 231 panic("unreachable") 232 } 233 234 var datetimeRepl = strings.NewReplacer( 235 "z", "Z", 236 "t", "T", 237 " ", "T") 238 239 // value translates an expected value from the lexer into a Go value wrapped 240 // as an empty interface. 241 func (p *parser) value(it item, parentIsArray bool) (any, tomlType) { 242 switch it.typ { 243 case itemString: 244 return it.val, p.typeOfPrimitive(it) 245 case itemStringEsc: 246 return p.replaceEscapes(it, it.val), p.typeOfPrimitive(it) 247 case itemMultilineString: 248 return p.replaceEscapes(it, p.stripEscapedNewlines(stripFirstNewline(it.val))), p.typeOfPrimitive(it) 249 case itemRawString: 250 return it.val, p.typeOfPrimitive(it) 251 case itemRawMultilineString: 252 return stripFirstNewline(it.val), p.typeOfPrimitive(it) 253 case itemInteger: 254 return p.valueInteger(it) 255 case itemFloat: 256 return p.valueFloat(it) 257 case itemBool: 258 switch it.val { 259 case "true": 260 return true, p.typeOfPrimitive(it) 261 case "false": 262 return false, p.typeOfPrimitive(it) 263 default: 264 p.bug("Expected boolean value, but got '%s'.", it.val) 265 } 266 case itemDatetime: 267 return p.valueDatetime(it) 268 case itemArray: 269 return p.valueArray(it) 270 case itemInlineTableStart: 271 return p.valueInlineTable(it, parentIsArray) 272 default: 273 p.bug("Unexpected value type: %s", it.typ) 274 } 275 panic("unreachable") 276 } 277 278 func (p *parser) valueInteger(it item) (any, tomlType) { 279 if !numUnderscoresOK(it.val) { 280 p.panicItemf(it, "Invalid integer %q: underscores must be surrounded by digits", it.val) 281 } 282 if numHasLeadingZero(it.val) { 283 p.panicItemf(it, "Invalid integer %q: cannot have leading zeroes", it.val) 284 } 285 286 num, err := strconv.ParseInt(it.val, 0, 64) 287 if err != nil { 288 // Distinguish integer values. Normally, it'd be a bug if the lexer 289 // provides an invalid integer, but it's possible that the number is 290 // out of range of valid values (which the lexer cannot determine). 291 // So mark the former as a bug but the latter as a legitimate user 292 // error. 293 if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrRange { 294 p.panicErr(it, errParseRange{i: it.val, size: "int64"}) 295 } else { 296 p.bug("Expected integer value, but got '%s'.", it.val) 297 } 298 } 299 return num, p.typeOfPrimitive(it) 300 } 301 302 func (p *parser) valueFloat(it item) (any, tomlType) { 303 parts := strings.FieldsFunc(it.val, func(r rune) bool { 304 switch r { 305 case '.', 'e', 'E': 306 return true 307 } 308 return false 309 }) 310 for _, part := range parts { 311 if !numUnderscoresOK(part) { 312 p.panicItemf(it, "Invalid float %q: underscores must be surrounded by digits", it.val) 313 } 314 } 315 if len(parts) > 0 && numHasLeadingZero(parts[0]) { 316 p.panicItemf(it, "Invalid float %q: cannot have leading zeroes", it.val) 317 } 318 if !numPeriodsOK(it.val) { 319 // As a special case, numbers like '123.' or '1.e2', 320 // which are valid as far as Go/strconv are concerned, 321 // must be rejected because TOML says that a fractional 322 // part consists of '.' followed by 1+ digits. 323 p.panicItemf(it, "Invalid float %q: '.' must be followed by one or more digits", it.val) 324 } 325 val := strings.Replace(it.val, "_", "", -1) 326 signbit := false 327 if val == "+nan" || val == "-nan" { 328 signbit = val == "-nan" 329 val = "nan" 330 } 331 num, err := strconv.ParseFloat(val, 64) 332 if err != nil { 333 if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrRange { 334 p.panicErr(it, errParseRange{i: it.val, size: "float64"}) 335 } else { 336 p.panicItemf(it, "Invalid float value: %q", it.val) 337 } 338 } 339 if signbit { 340 num = math.Copysign(num, -1) 341 } 342 return num, p.typeOfPrimitive(it) 343 } 344 345 var dtTypes = []struct { 346 fmt string 347 zone *time.Location 348 }{ 349 {time.RFC3339Nano, time.Local}, 350 {"2006-01-02T15:04:05.999999999", internal.LocalDatetime}, 351 {"2006-01-02", internal.LocalDate}, 352 {"15:04:05.999999999", internal.LocalTime}, 353 {"2006-01-02T15:04Z07:00", time.Local}, 354 {"2006-01-02T15:04", internal.LocalDatetime}, 355 {"15:04", internal.LocalTime}, 356 } 357 358 func (p *parser) valueDatetime(it item) (any, tomlType) { 359 it.val = datetimeRepl.Replace(it.val) 360 var ( 361 t time.Time 362 ok bool 363 err error 364 ) 365 for _, dt := range dtTypes { 366 t, err = time.ParseInLocation(dt.fmt, it.val, dt.zone) 367 if err == nil { 368 if missingLeadingZero(it.val, dt.fmt) { 369 p.panicErr(it, errParseDate{it.val}) 370 } 371 ok = true 372 break 373 } 374 } 375 if !ok { 376 p.panicErr(it, errParseDate{it.val}) 377 } 378 return t, p.typeOfPrimitive(it) 379 } 380 381 // Go's time.Parse() will accept numbers without a leading zero; there isn't any 382 // way to require it. https://github.com/golang/go/issues/29911 383 // 384 // Depend on the fact that the separators (- and :) should always be at the same 385 // location. 386 func missingLeadingZero(d, l string) bool { 387 for i, c := range []byte(l) { 388 if c == '.' || c == 'Z' { 389 return false 390 } 391 if (c < '0' || c > '9') && d[i] != c { 392 return true 393 } 394 } 395 return false 396 } 397 398 func (p *parser) valueArray(it item) (any, tomlType) { 399 p.setType(p.currentKey, tomlArray, it.pos) 400 401 var ( 402 // Initialize to a non-nil slice to make it consistent with how S = [] 403 // decodes into a non-nil slice inside something like struct { S 404 // []string }. See #338 405 array = make([]any, 0, 2) 406 ) 407 for it = p.next(); it.typ != itemArrayEnd; it = p.next() { 408 if it.typ == itemCommentStart { 409 p.expect(itemText) 410 continue 411 } 412 413 val, typ := p.value(it, true) 414 array = append(array, val) 415 416 // XXX: type isn't used here, we need it to record the accurate type 417 // information. 418 // 419 // Not entirely sure how to best store this; could use "key[0]", 420 // "key[1]" notation, or maybe store it on the Array type? 421 _ = typ 422 } 423 return array, tomlArray 424 } 425 426 func (p *parser) valueInlineTable(it item, parentIsArray bool) (any, tomlType) { 427 var ( 428 topHash = make(map[string]any) 429 outerContext = p.context 430 outerKey = p.currentKey 431 ) 432 433 p.context = append(p.context, p.currentKey) 434 prevContext := p.context 435 p.currentKey = "" 436 437 p.addImplicit(p.context) 438 p.addContext(p.context, parentIsArray) 439 440 /// Loop over all table key/value pairs. 441 for it := p.next(); it.typ != itemInlineTableEnd; it = p.next() { 442 if it.typ == itemCommentStart { 443 p.expect(itemText) 444 continue 445 } 446 447 /// Read all key parts. 448 k := p.nextPos() 449 var key Key 450 for ; k.typ != itemKeyEnd && k.typ != itemEOF; k = p.next() { 451 key = append(key, p.keyString(k)) 452 } 453 p.assertEqual(itemKeyEnd, k.typ) 454 455 /// The current key is the last part. 456 p.currentKey = key.last() 457 458 /// All the other parts (if any) are the context; need to set each part 459 /// as implicit. 460 context := key.parent() 461 for i := range context { 462 p.addImplicitContext(append(p.context, context[i:i+1]...)) 463 } 464 p.ordered = append(p.ordered, p.context.add(p.currentKey)) 465 466 /// Set the value. 467 val, typ := p.value(p.next(), false) 468 p.setValue(p.currentKey, val) 469 p.setType(p.currentKey, typ, it.pos) 470 471 hash := topHash 472 for _, c := range context { 473 h, ok := hash[c] 474 if !ok { 475 h = make(map[string]any) 476 hash[c] = h 477 } 478 hash, ok = h.(map[string]any) 479 if !ok { 480 p.panicf("%q is not a table", p.context) 481 } 482 } 483 hash[p.currentKey] = val 484 485 /// Restore context. 486 p.context = prevContext 487 } 488 p.context = outerContext 489 p.currentKey = outerKey 490 return topHash, tomlHash 491 } 492 493 // numHasLeadingZero checks if this number has leading zeroes, allowing for '0', 494 // +/- signs, and base prefixes. 495 func numHasLeadingZero(s string) bool { 496 if len(s) > 1 && s[0] == '0' && !(s[1] == 'b' || s[1] == 'o' || s[1] == 'x') { // Allow 0b, 0o, 0x 497 return true 498 } 499 if len(s) > 2 && (s[0] == '-' || s[0] == '+') && s[1] == '0' { 500 return true 501 } 502 return false 503 } 504 505 // numUnderscoresOK checks whether each underscore in s is surrounded by 506 // characters that are not underscores. 507 func numUnderscoresOK(s string) bool { 508 switch s { 509 case "nan", "+nan", "-nan", "inf", "-inf", "+inf": 510 return true 511 } 512 accept := false 513 for _, r := range s { 514 if r == '_' { 515 if !accept { 516 return false 517 } 518 } 519 520 // isHex is a superset of all the permissible characters surrounding an 521 // underscore. 522 accept = isHex(r) 523 } 524 return accept 525 } 526 527 // numPeriodsOK checks whether every period in s is followed by a digit. 528 func numPeriodsOK(s string) bool { 529 period := false 530 for _, r := range s { 531 if period && !isDigit(r) { 532 return false 533 } 534 period = r == '.' 535 } 536 return !period 537 } 538 539 // Set the current context of the parser, where the context is either a hash or 540 // an array of hashes, depending on the value of the `array` parameter. 541 // 542 // Establishing the context also makes sure that the key isn't a duplicate, and 543 // will create implicit hashes automatically. 544 func (p *parser) addContext(key Key, array bool) { 545 /// Always start at the top level and drill down for our context. 546 hashContext := p.mapping 547 keyContext := make(Key, 0, len(key)-1) 548 549 /// We only need implicit hashes for the parents. 550 for _, k := range key.parent() { 551 _, ok := hashContext[k] 552 keyContext = append(keyContext, k) 553 554 // No key? Make an implicit hash and move on. 555 if !ok { 556 p.addImplicit(keyContext) 557 hashContext[k] = make(map[string]any) 558 } 559 560 // If the hash context is actually an array of tables, then set 561 // the hash context to the last element in that array. 562 // 563 // Otherwise, it better be a table, since this MUST be a key group (by 564 // virtue of it not being the last element in a key). 565 switch t := hashContext[k].(type) { 566 case []map[string]any: 567 hashContext = t[len(t)-1] 568 case map[string]any: 569 hashContext = t 570 default: 571 p.panicf("Key '%s' was already created as a hash.", keyContext) 572 } 573 } 574 575 p.context = keyContext 576 if array { 577 // If this is the first element for this array, then allocate a new 578 // list of tables for it. 579 k := key.last() 580 if _, ok := hashContext[k]; !ok { 581 hashContext[k] = make([]map[string]any, 0, 4) 582 } 583 584 // Add a new table. But make sure the key hasn't already been used 585 // for something else. 586 if hash, ok := hashContext[k].([]map[string]any); ok { 587 hashContext[k] = append(hash, make(map[string]any)) 588 } else { 589 p.panicf("Key '%s' was already created and cannot be used as an array.", key) 590 } 591 } else { 592 p.setValue(key.last(), make(map[string]any)) 593 } 594 p.context = append(p.context, key.last()) 595 } 596 597 // setValue sets the given key to the given value in the current context. 598 // It will make sure that the key hasn't already been defined, account for 599 // implicit key groups. 600 func (p *parser) setValue(key string, value any) { 601 var ( 602 tmpHash any 603 ok bool 604 hash = p.mapping 605 keyContext = make(Key, 0, len(p.context)+1) 606 ) 607 for _, k := range p.context { 608 keyContext = append(keyContext, k) 609 if tmpHash, ok = hash[k]; !ok { 610 p.bug("Context for key '%s' has not been established.", keyContext) 611 } 612 switch t := tmpHash.(type) { 613 case []map[string]any: 614 // The context is a table of hashes. Pick the most recent table 615 // defined as the current hash. 616 hash = t[len(t)-1] 617 case map[string]any: 618 hash = t 619 default: 620 p.panicf("Key '%s' has already been defined.", keyContext) 621 } 622 } 623 keyContext = append(keyContext, key) 624 625 if _, ok := hash[key]; ok { 626 // Normally redefining keys isn't allowed, but the key could have been 627 // defined implicitly and it's allowed to be redefined concretely. (See 628 // the `valid/implicit-and-explicit-after.toml` in toml-test) 629 // 630 // But we have to make sure to stop marking it as an implicit. (So that 631 // another redefinition provokes an error.) 632 // 633 // Note that since it has already been defined (as a hash), we don't 634 // want to overwrite it. So our business is done. 635 if p.isArray(keyContext) { 636 if !p.isImplicit(keyContext) { 637 if _, ok := hash[key]; ok { 638 p.panicf("Key '%s' has already been defined.", keyContext) 639 } 640 } 641 p.removeImplicit(keyContext) 642 hash[key] = value 643 return 644 } 645 if p.isImplicit(keyContext) { 646 p.removeImplicit(keyContext) 647 return 648 } 649 // Otherwise, we have a concrete key trying to override a previous key, 650 // which is *always* wrong. 651 p.panicf("Key '%s' has already been defined.", keyContext) 652 } 653 654 hash[key] = value 655 } 656 657 // setType sets the type of a particular value at a given key. It should be 658 // called immediately AFTER setValue. 659 // 660 // Note that if `key` is empty, then the type given will be applied to the 661 // current context (which is either a table or an array of tables). 662 func (p *parser) setType(key string, typ tomlType, pos Position) { 663 keyContext := make(Key, 0, len(p.context)+1) 664 keyContext = append(keyContext, p.context...) 665 if len(key) > 0 { // allow type setting for hashes 666 keyContext = append(keyContext, key) 667 } 668 // Special case to make empty keys ("" = 1) work. 669 // Without it it will set "" rather than `""`. 670 // TODO: why is this needed? And why is this only needed here? 671 if len(keyContext) == 0 { 672 keyContext = Key{""} 673 } 674 p.keyInfo[keyContext.String()] = keyInfo{tomlType: typ, pos: pos} 675 } 676 677 // Implicit keys need to be created when tables are implied in "a.b.c.d = 1" and 678 // "[a.b.c]" (the "a", "b", and "c" hashes are never created explicitly). 679 func (p *parser) addImplicit(key Key) { p.implicits[key.String()] = struct{}{} } 680 func (p *parser) removeImplicit(key Key) { delete(p.implicits, key.String()) } 681 func (p *parser) isImplicit(key Key) bool { _, ok := p.implicits[key.String()]; return ok } 682 func (p *parser) isArray(key Key) bool { return p.keyInfo[key.String()].tomlType == tomlArray } 683 func (p *parser) addImplicitContext(key Key) { p.addImplicit(key); p.addContext(key, false) } 684 685 // current returns the full key name of the current context. 686 func (p *parser) current() string { 687 if len(p.currentKey) == 0 { 688 return p.context.String() 689 } 690 if len(p.context) == 0 { 691 return p.currentKey 692 } 693 return fmt.Sprintf("%s.%s", p.context, p.currentKey) 694 } 695 696 func stripFirstNewline(s string) string { 697 if len(s) > 0 && s[0] == '\n' { 698 return s[1:] 699 } 700 if len(s) > 1 && s[0] == '\r' && s[1] == '\n' { 701 return s[2:] 702 } 703 return s 704 } 705 706 // stripEscapedNewlines removes whitespace after line-ending backslashes in 707 // multiline strings. 708 // 709 // A line-ending backslash is an unescaped \ followed only by whitespace until 710 // the next newline. After a line-ending backslash, all whitespace is removed 711 // until the next non-whitespace character. 712 func (p *parser) stripEscapedNewlines(s string) string { 713 var ( 714 b strings.Builder 715 i int 716 ) 717 b.Grow(len(s)) 718 for { 719 ix := strings.Index(s[i:], `\`) 720 if ix < 0 { 721 b.WriteString(s) 722 return b.String() 723 } 724 i += ix 725 726 if len(s) > i+1 && s[i+1] == '\\' { 727 // Escaped backslash. 728 i += 2 729 continue 730 } 731 // Scan until the next non-whitespace. 732 j := i + 1 733 whitespaceLoop: 734 for ; j < len(s); j++ { 735 switch s[j] { 736 case ' ', '\t', '\r', '\n': 737 default: 738 break whitespaceLoop 739 } 740 } 741 if j == i+1 { 742 // Not a whitespace escape. 743 i++ 744 continue 745 } 746 if !strings.Contains(s[i:j], "\n") { 747 // This is not a line-ending backslash. (It's a bad escape sequence, 748 // but we can let replaceEscapes catch it.) 749 i++ 750 continue 751 } 752 b.WriteString(s[:i]) 753 s = s[j:] 754 i = 0 755 } 756 } 757 758 func (p *parser) replaceEscapes(it item, str string) string { 759 var ( 760 b strings.Builder 761 skip = 0 762 ) 763 b.Grow(len(str)) 764 for i, c := range str { 765 if skip > 0 { 766 skip-- 767 continue 768 } 769 if c != '\\' { 770 b.WriteRune(c) 771 continue 772 } 773 774 if i >= len(str) { 775 p.bug("Escape sequence at end of string.") 776 return "" 777 } 778 switch str[i+1] { 779 default: 780 p.bug("Expected valid escape code after \\, but got %q.", str[i+1]) 781 case ' ', '\t': 782 p.panicItemf(it, "invalid escape: '\\%c'", str[i+1]) 783 case 'b': 784 b.WriteByte(0x08) 785 skip = 1 786 case 't': 787 b.WriteByte(0x09) 788 skip = 1 789 case 'n': 790 b.WriteByte(0x0a) 791 skip = 1 792 case 'f': 793 b.WriteByte(0x0c) 794 skip = 1 795 case 'r': 796 b.WriteByte(0x0d) 797 skip = 1 798 case 'e': 799 b.WriteByte(0x1b) 800 skip = 1 801 case '"': 802 b.WriteByte(0x22) 803 skip = 1 804 case '\\': 805 b.WriteByte(0x5c) 806 skip = 1 807 // The lexer guarantees the correct number of characters are present; 808 // don't need to check here. 809 case 'x': 810 escaped := p.asciiEscapeToUnicode(it, str[i+2:i+4]) 811 b.WriteRune(escaped) 812 skip = 3 813 case 'u': 814 escaped := p.asciiEscapeToUnicode(it, str[i+2:i+6]) 815 b.WriteRune(escaped) 816 skip = 5 817 case 'U': 818 escaped := p.asciiEscapeToUnicode(it, str[i+2:i+10]) 819 b.WriteRune(escaped) 820 skip = 9 821 } 822 } 823 return b.String() 824 } 825 826 func (p *parser) asciiEscapeToUnicode(it item, s string) rune { 827 hex, err := strconv.ParseUint(strings.ToLower(s), 16, 32) 828 if err != nil { 829 p.bug("Could not parse '%s' as a hexadecimal number, but the lexer claims it's OK: %s", s, err) 830 } 831 if !utf8.ValidRune(rune(hex)) { 832 p.panicItemf(it, "Escaped character '\\u%s' is not valid UTF-8.", s) 833 } 834 return rune(hex) 835 }