taldir

Directory service to resolve wallet mailboxes by messenger addresses
Log | Files | Refs | Submodules | README | LICENSE

lex.go (30022B)


      1 package toml
      2 
      3 import (
      4 	"fmt"
      5 	"reflect"
      6 	"runtime"
      7 	"strings"
      8 	"unicode"
      9 	"unicode/utf8"
     10 )
     11 
     12 type itemType int
     13 
     14 const (
     15 	itemError itemType = iota
     16 	itemEOF
     17 	itemText
     18 	itemString
     19 	itemStringEsc
     20 	itemRawString
     21 	itemMultilineString
     22 	itemRawMultilineString
     23 	itemBool
     24 	itemInteger
     25 	itemFloat
     26 	itemDatetime
     27 	itemArray // the start of an array
     28 	itemArrayEnd
     29 	itemTableStart
     30 	itemTableEnd
     31 	itemArrayTableStart
     32 	itemArrayTableEnd
     33 	itemKeyStart
     34 	itemKeyEnd
     35 	itemCommentStart
     36 	itemInlineTableStart
     37 	itemInlineTableEnd
     38 )
     39 
     40 const eof = 0
     41 
     42 type stateFn func(lx *lexer) stateFn
     43 
     44 func (p Position) String() string {
     45 	return fmt.Sprintf("at line %d; start %d; length %d", p.Line, p.Start, p.Len)
     46 }
     47 
     48 type lexer struct {
     49 	input string
     50 	start int
     51 	pos   int
     52 	line  int
     53 	state stateFn
     54 	items chan item
     55 	esc   bool
     56 
     57 	// Allow for backing up up to 4 runes. This is necessary because TOML
     58 	// contains 3-rune tokens (""" and ''').
     59 	prevWidths [4]int
     60 	nprev      int  // how many of prevWidths are in use
     61 	atEOF      bool // If we emit an eof, we can still back up, but it is not OK to call next again.
     62 
     63 	// A stack of state functions used to maintain context.
     64 	//
     65 	// The idea is to reuse parts of the state machine in various places. For
     66 	// example, values can appear at the top level or within arbitrarily nested
     67 	// arrays. The last state on the stack is used after a value has been lexed.
     68 	// Similarly for comments.
     69 	stack []stateFn
     70 }
     71 
     72 type item struct {
     73 	typ itemType
     74 	val string
     75 	err error
     76 	pos Position
     77 }
     78 
     79 func (lx *lexer) nextItem() item {
     80 	for {
     81 		select {
     82 		case item := <-lx.items:
     83 			return item
     84 		default:
     85 			lx.state = lx.state(lx)
     86 			//fmt.Printf("     STATE %-24s  current: %-10s	stack: %s\n", lx.state, lx.current(), lx.stack)
     87 		}
     88 	}
     89 }
     90 
     91 func lex(input string) *lexer {
     92 	lx := &lexer{
     93 		input: input,
     94 		state: lexTop,
     95 		items: make(chan item, 10),
     96 		stack: make([]stateFn, 0, 10),
     97 		line:  1,
     98 	}
     99 	return lx
    100 }
    101 
    102 func (lx *lexer) push(state stateFn) {
    103 	lx.stack = append(lx.stack, state)
    104 }
    105 
    106 func (lx *lexer) pop() stateFn {
    107 	if len(lx.stack) == 0 {
    108 		panic("BUG in lexer: no states to pop")
    109 	}
    110 	last := lx.stack[len(lx.stack)-1]
    111 	lx.stack = lx.stack[0 : len(lx.stack)-1]
    112 	return last
    113 }
    114 
    115 func (lx *lexer) current() string {
    116 	return lx.input[lx.start:lx.pos]
    117 }
    118 
    119 func (lx lexer) getPos() Position {
    120 	p := Position{
    121 		Line:  lx.line,
    122 		Start: lx.start,
    123 		Len:   lx.pos - lx.start,
    124 	}
    125 	if p.Len <= 0 {
    126 		p.Len = 1
    127 	}
    128 	return p
    129 }
    130 
    131 func (lx *lexer) emit(typ itemType) {
    132 	// Needed for multiline strings ending with an incomplete UTF-8 sequence.
    133 	if lx.start > lx.pos {
    134 		lx.error(errLexUTF8{lx.input[lx.pos]})
    135 		return
    136 	}
    137 	lx.items <- item{typ: typ, pos: lx.getPos(), val: lx.current()}
    138 	lx.start = lx.pos
    139 }
    140 
    141 func (lx *lexer) emitTrim(typ itemType) {
    142 	lx.items <- item{typ: typ, pos: lx.getPos(), val: strings.TrimSpace(lx.current())}
    143 	lx.start = lx.pos
    144 }
    145 
    146 func (lx *lexer) next() (r rune) {
    147 	if lx.atEOF {
    148 		panic("BUG in lexer: next called after EOF")
    149 	}
    150 	if lx.pos >= len(lx.input) {
    151 		lx.atEOF = true
    152 		return eof
    153 	}
    154 
    155 	if lx.input[lx.pos] == '\n' {
    156 		lx.line++
    157 	}
    158 	lx.prevWidths[3] = lx.prevWidths[2]
    159 	lx.prevWidths[2] = lx.prevWidths[1]
    160 	lx.prevWidths[1] = lx.prevWidths[0]
    161 	if lx.nprev < 4 {
    162 		lx.nprev++
    163 	}
    164 
    165 	r, w := utf8.DecodeRuneInString(lx.input[lx.pos:])
    166 	if r == utf8.RuneError && w == 1 {
    167 		lx.error(errLexUTF8{lx.input[lx.pos]})
    168 		return utf8.RuneError
    169 	}
    170 
    171 	// Note: don't use peek() here, as this calls next().
    172 	if isControl(r) || (r == '\r' && (len(lx.input)-1 == lx.pos || lx.input[lx.pos+1] != '\n')) {
    173 		lx.errorControlChar(r)
    174 		return utf8.RuneError
    175 	}
    176 
    177 	lx.prevWidths[0] = w
    178 	lx.pos += w
    179 	return r
    180 }
    181 
    182 // ignore skips over the pending input before this point.
    183 func (lx *lexer) ignore() {
    184 	lx.start = lx.pos
    185 }
    186 
    187 // backup steps back one rune. Can be called 4 times between calls to next.
    188 func (lx *lexer) backup() {
    189 	if lx.atEOF {
    190 		lx.atEOF = false
    191 		return
    192 	}
    193 	if lx.nprev < 1 {
    194 		panic("BUG in lexer: backed up too far")
    195 	}
    196 	w := lx.prevWidths[0]
    197 	lx.prevWidths[0] = lx.prevWidths[1]
    198 	lx.prevWidths[1] = lx.prevWidths[2]
    199 	lx.prevWidths[2] = lx.prevWidths[3]
    200 	lx.nprev--
    201 
    202 	lx.pos -= w
    203 	if lx.pos < len(lx.input) && lx.input[lx.pos] == '\n' {
    204 		lx.line--
    205 	}
    206 }
    207 
    208 // accept consumes the next rune if it's equal to `valid`.
    209 func (lx *lexer) accept(valid rune) bool {
    210 	if lx.next() == valid {
    211 		return true
    212 	}
    213 	lx.backup()
    214 	return false
    215 }
    216 
    217 // peek returns but does not consume the next rune in the input.
    218 func (lx *lexer) peek() rune {
    219 	r := lx.next()
    220 	lx.backup()
    221 	return r
    222 }
    223 
    224 // skip ignores all input that matches the given predicate.
    225 func (lx *lexer) skip(pred func(rune) bool) {
    226 	for {
    227 		r := lx.next()
    228 		if pred(r) {
    229 			continue
    230 		}
    231 		lx.backup()
    232 		lx.ignore()
    233 		return
    234 	}
    235 }
    236 
    237 // error stops all lexing by emitting an error and returning `nil`.
    238 //
    239 // Note that any value that is a character is escaped if it's a special
    240 // character (newlines, tabs, etc.).
    241 func (lx *lexer) error(err error) stateFn {
    242 	if lx.atEOF {
    243 		return lx.errorPrevLine(err)
    244 	}
    245 	lx.items <- item{typ: itemError, pos: lx.getPos(), err: err}
    246 	return nil
    247 }
    248 
    249 // errorfPrevline is like error(), but sets the position to the last column of
    250 // the previous line.
    251 //
    252 // This is so that unexpected EOF or NL errors don't show on a new blank line.
    253 func (lx *lexer) errorPrevLine(err error) stateFn {
    254 	pos := lx.getPos()
    255 	pos.Line--
    256 	pos.Len = 1
    257 	pos.Start = lx.pos - 1
    258 	lx.items <- item{typ: itemError, pos: pos, err: err}
    259 	return nil
    260 }
    261 
    262 // errorPos is like error(), but allows explicitly setting the position.
    263 func (lx *lexer) errorPos(start, length int, err error) stateFn {
    264 	pos := lx.getPos()
    265 	pos.Start = start
    266 	pos.Len = length
    267 	lx.items <- item{typ: itemError, pos: pos, err: err}
    268 	return nil
    269 }
    270 
    271 // errorf is like error, and creates a new error.
    272 func (lx *lexer) errorf(format string, values ...any) stateFn {
    273 	if lx.atEOF {
    274 		pos := lx.getPos()
    275 		if lx.pos >= 1 && lx.input[lx.pos-1] == '\n' {
    276 			pos.Line--
    277 		}
    278 		pos.Len = 1
    279 		pos.Start = lx.pos - 1
    280 		lx.items <- item{typ: itemError, pos: pos, err: fmt.Errorf(format, values...)}
    281 		return nil
    282 	}
    283 	lx.items <- item{typ: itemError, pos: lx.getPos(), err: fmt.Errorf(format, values...)}
    284 	return nil
    285 }
    286 
    287 func (lx *lexer) errorControlChar(cc rune) stateFn {
    288 	return lx.errorPos(lx.pos-1, 1, errLexControl{cc})
    289 }
    290 
    291 // lexTop consumes elements at the top level of TOML data.
    292 func lexTop(lx *lexer) stateFn {
    293 	r := lx.next()
    294 	if isWhitespace(r) || isNL(r) {
    295 		return lexSkip(lx, lexTop)
    296 	}
    297 	switch r {
    298 	case '#':
    299 		lx.push(lexTop)
    300 		return lexCommentStart
    301 	case '[':
    302 		return lexTableStart
    303 	case eof:
    304 		if lx.pos > lx.start {
    305 			// TODO: never reached? I think this can only occur on a bug in the
    306 			// lexer(?)
    307 			return lx.errorf("unexpected EOF")
    308 		}
    309 		lx.emit(itemEOF)
    310 		return nil
    311 	}
    312 
    313 	// At this point, the only valid item can be a key, so we back up
    314 	// and let the key lexer do the rest.
    315 	lx.backup()
    316 	lx.push(lexTopEnd)
    317 	return lexKeyStart
    318 }
    319 
    320 // lexTopEnd is entered whenever a top-level item has been consumed. (A value
    321 // or a table.) It must see only whitespace, and will turn back to lexTop
    322 // upon a newline. If it sees EOF, it will quit the lexer successfully.
    323 func lexTopEnd(lx *lexer) stateFn {
    324 	r := lx.next()
    325 	switch {
    326 	case r == '#':
    327 		// a comment will read to a newline for us.
    328 		lx.push(lexTop)
    329 		return lexCommentStart
    330 	case isWhitespace(r):
    331 		return lexTopEnd
    332 	case isNL(r):
    333 		lx.ignore()
    334 		return lexTop
    335 	case r == eof:
    336 		lx.emit(itemEOF)
    337 		return nil
    338 	}
    339 	return lx.errorf("expected a top-level item to end with a newline, comment, or EOF, but got %q instead", r)
    340 }
    341 
    342 // lexTable lexes the beginning of a table. Namely, it makes sure that
    343 // it starts with a character other than '.' and ']'.
    344 // It assumes that '[' has already been consumed.
    345 // It also handles the case that this is an item in an array of tables.
    346 // e.g., '[[name]]'.
    347 func lexTableStart(lx *lexer) stateFn {
    348 	if lx.peek() == '[' {
    349 		lx.next()
    350 		lx.emit(itemArrayTableStart)
    351 		lx.push(lexArrayTableEnd)
    352 	} else {
    353 		lx.emit(itemTableStart)
    354 		lx.push(lexTableEnd)
    355 	}
    356 	return lexTableNameStart
    357 }
    358 
    359 func lexTableEnd(lx *lexer) stateFn {
    360 	lx.emit(itemTableEnd)
    361 	return lexTopEnd
    362 }
    363 
    364 func lexArrayTableEnd(lx *lexer) stateFn {
    365 	if r := lx.next(); r != ']' {
    366 		return lx.errorf("expected end of table array name delimiter ']', but got %q instead", r)
    367 	}
    368 	lx.emit(itemArrayTableEnd)
    369 	return lexTopEnd
    370 }
    371 
    372 func lexTableNameStart(lx *lexer) stateFn {
    373 	lx.skip(isWhitespace)
    374 	switch r := lx.peek(); {
    375 	case r == ']' || r == eof:
    376 		return lx.errorf("unexpected end of table name (table names cannot be empty)")
    377 	case r == '.':
    378 		return lx.errorf("unexpected table separator (table names cannot be empty)")
    379 	case r == '"' || r == '\'':
    380 		lx.ignore()
    381 		lx.push(lexTableNameEnd)
    382 		return lexQuotedName
    383 	default:
    384 		lx.push(lexTableNameEnd)
    385 		return lexBareName
    386 	}
    387 }
    388 
    389 // lexTableNameEnd reads the end of a piece of a table name, optionally
    390 // consuming whitespace.
    391 func lexTableNameEnd(lx *lexer) stateFn {
    392 	lx.skip(isWhitespace)
    393 	switch r := lx.next(); {
    394 	case r == '.':
    395 		lx.ignore()
    396 		return lexTableNameStart
    397 	case r == ']':
    398 		return lx.pop()
    399 	default:
    400 		return lx.errorf("expected '.' or ']' to end table name, but got %q instead", r)
    401 	}
    402 }
    403 
    404 // lexBareName lexes one part of a key or table.
    405 //
    406 // It assumes that at least one valid character for the table has already been
    407 // read.
    408 //
    409 // Lexes only one part, e.g. only 'a' inside 'a.b'.
    410 func lexBareName(lx *lexer) stateFn {
    411 	r := lx.next()
    412 	if isBareKeyChar(r) {
    413 		return lexBareName
    414 	}
    415 	lx.backup()
    416 	lx.emit(itemText)
    417 	return lx.pop()
    418 }
    419 
    420 // lexQuotedName lexes one part of a quoted key or table name. It assumes that
    421 // it starts lexing at the quote itself (" or ').
    422 //
    423 // Lexes only one part, e.g. only '"a"' inside '"a".b'.
    424 func lexQuotedName(lx *lexer) stateFn {
    425 	r := lx.next()
    426 	switch {
    427 	case r == '"':
    428 		lx.ignore() // ignore the '"'
    429 		return lexString
    430 	case r == '\'':
    431 		lx.ignore() // ignore the "'"
    432 		return lexRawString
    433 
    434 	// TODO: I don't think any of the below conditions can ever be reached?
    435 	case isWhitespace(r):
    436 		return lexSkip(lx, lexValue)
    437 	case r == eof:
    438 		return lx.errorf("unexpected EOF; expected value")
    439 	default:
    440 		return lx.errorf("expected value but found %q instead", r)
    441 	}
    442 }
    443 
    444 // lexKeyStart consumes all key parts until a '='.
    445 func lexKeyStart(lx *lexer) stateFn {
    446 	lx.skip(isWhitespace)
    447 	switch r := lx.peek(); {
    448 	case r == '=' || r == eof:
    449 		return lx.errorf("unexpected '=': key name appears blank")
    450 	case r == '.':
    451 		return lx.errorf("unexpected '.': keys cannot start with a '.'")
    452 	case r == '"' || r == '\'':
    453 		lx.ignore()
    454 		fallthrough
    455 	default: // Bare key
    456 		lx.emit(itemKeyStart)
    457 		return lexKeyNameStart
    458 	}
    459 }
    460 
    461 func lexKeyNameStart(lx *lexer) stateFn {
    462 	lx.skip(isWhitespace)
    463 	switch r := lx.peek(); {
    464 	default:
    465 		lx.push(lexKeyEnd)
    466 		return lexBareName
    467 	case r == '"' || r == '\'':
    468 		lx.ignore()
    469 		lx.push(lexKeyEnd)
    470 		return lexQuotedName
    471 
    472 	// TODO: I think these can never be reached?
    473 	case r == '=' || r == eof:
    474 		return lx.errorf("unexpected '='")
    475 	case r == '.':
    476 		return lx.errorf("unexpected '.'")
    477 	}
    478 }
    479 
    480 // lexKeyEnd consumes the end of a key and trims whitespace (up to the key
    481 // separator).
    482 func lexKeyEnd(lx *lexer) stateFn {
    483 	lx.skip(isWhitespace)
    484 	switch r := lx.next(); {
    485 	case isWhitespace(r):
    486 		return lexSkip(lx, lexKeyEnd)
    487 	case r == eof: // TODO: never reached
    488 		return lx.errorf("unexpected EOF; expected key separator '='")
    489 	case r == '.':
    490 		lx.ignore()
    491 		return lexKeyNameStart
    492 	case r == '=':
    493 		lx.emit(itemKeyEnd)
    494 		return lexSkip(lx, lexValue)
    495 	default:
    496 		if r == '\n' {
    497 			return lx.errorPrevLine(fmt.Errorf("expected '.' or '=', but got %q instead", r))
    498 		}
    499 		return lx.errorf("expected '.' or '=', but got %q instead", r)
    500 	}
    501 }
    502 
    503 // lexValue starts the consumption of a value anywhere a value is expected.
    504 // lexValue will ignore whitespace.
    505 // After a value is lexed, the last state on the next is popped and returned.
    506 func lexValue(lx *lexer) stateFn {
    507 	// We allow whitespace to precede a value, but NOT newlines.
    508 	// In array syntax, the array states are responsible for ignoring newlines.
    509 	r := lx.next()
    510 	switch {
    511 	case isWhitespace(r):
    512 		return lexSkip(lx, lexValue)
    513 	case isDigit(r):
    514 		lx.backup() // avoid an extra state and use the same as above
    515 		return lexNumberOrDateStart
    516 	}
    517 	switch r {
    518 	case '[':
    519 		lx.ignore()
    520 		lx.emit(itemArray)
    521 		return lexArrayValue
    522 	case '{':
    523 		lx.ignore()
    524 		lx.emit(itemInlineTableStart)
    525 		return lexInlineTableValue
    526 	case '"':
    527 		if lx.accept('"') {
    528 			if lx.accept('"') {
    529 				lx.ignore() // Ignore """
    530 				return lexMultilineString
    531 			}
    532 			lx.backup()
    533 		}
    534 		lx.ignore() // ignore the '"'
    535 		return lexString
    536 	case '\'':
    537 		if lx.accept('\'') {
    538 			if lx.accept('\'') {
    539 				lx.ignore() // Ignore """
    540 				return lexMultilineRawString
    541 			}
    542 			lx.backup()
    543 		}
    544 		lx.ignore() // ignore the "'"
    545 		return lexRawString
    546 	case '.': // special error case, be kind to users
    547 		return lx.errorf("floats must start with a digit, not '.'")
    548 	case 'i', 'n':
    549 		if (lx.accept('n') && lx.accept('f')) || (lx.accept('a') && lx.accept('n')) {
    550 			lx.emit(itemFloat)
    551 			return lx.pop()
    552 		}
    553 	case '-', '+':
    554 		return lexDecimalNumberStart
    555 	}
    556 	if unicode.IsLetter(r) {
    557 		// Be permissive here; lexBool will give a nice error if the
    558 		// user wrote something like
    559 		//   x = foo
    560 		// (i.e. not 'true' or 'false' but is something else word-like.)
    561 		lx.backup()
    562 		return lexBool
    563 	}
    564 	if r == eof {
    565 		return lx.errorf("unexpected EOF; expected value")
    566 	}
    567 	if r == '\n' {
    568 		return lx.errorPrevLine(fmt.Errorf("expected value but found %q instead", r))
    569 	}
    570 	return lx.errorf("expected value but found %q instead", r)
    571 }
    572 
    573 // lexArrayValue consumes one value in an array. It assumes that '[' or ','
    574 // have already been consumed. All whitespace and newlines are ignored.
    575 func lexArrayValue(lx *lexer) stateFn {
    576 	r := lx.next()
    577 	switch {
    578 	case isWhitespace(r) || isNL(r):
    579 		return lexSkip(lx, lexArrayValue)
    580 	case r == '#':
    581 		lx.push(lexArrayValue)
    582 		return lexCommentStart
    583 	case r == ',':
    584 		return lx.errorf("unexpected comma")
    585 	case r == ']':
    586 		return lexArrayEnd
    587 	}
    588 
    589 	lx.backup()
    590 	lx.push(lexArrayValueEnd)
    591 	return lexValue
    592 }
    593 
    594 // lexArrayValueEnd consumes everything between the end of an array value and
    595 // the next value (or the end of the array): it ignores whitespace and newlines
    596 // and expects either a ',' or a ']'.
    597 func lexArrayValueEnd(lx *lexer) stateFn {
    598 	switch r := lx.next(); {
    599 	case isWhitespace(r) || isNL(r):
    600 		return lexSkip(lx, lexArrayValueEnd)
    601 	case r == '#':
    602 		lx.push(lexArrayValueEnd)
    603 		return lexCommentStart
    604 	case r == ',':
    605 		lx.ignore()
    606 		return lexArrayValue // move on to the next value
    607 	case r == ']':
    608 		return lexArrayEnd
    609 	default:
    610 		return lx.errorf("expected a comma (',') or array terminator (']'), but got %s", runeOrEOF(r))
    611 	}
    612 }
    613 
    614 // lexArrayEnd finishes the lexing of an array.
    615 // It assumes that a ']' has just been consumed.
    616 func lexArrayEnd(lx *lexer) stateFn {
    617 	lx.ignore()
    618 	lx.emit(itemArrayEnd)
    619 	return lx.pop()
    620 }
    621 
    622 // lexInlineTableValue consumes one key/value pair in an inline table.
    623 // It assumes that '{' or ',' have already been consumed. Whitespace is ignored.
    624 func lexInlineTableValue(lx *lexer) stateFn {
    625 	r := lx.next()
    626 	switch {
    627 	case isWhitespace(r):
    628 		return lexSkip(lx, lexInlineTableValue)
    629 	case isNL(r):
    630 		return lexSkip(lx, lexInlineTableValue)
    631 	case r == '#':
    632 		lx.push(lexInlineTableValue)
    633 		return lexCommentStart
    634 	case r == ',':
    635 		return lx.errorf("unexpected comma")
    636 	case r == '}':
    637 		return lexInlineTableEnd
    638 	}
    639 	lx.backup()
    640 	lx.push(lexInlineTableValueEnd)
    641 	return lexKeyStart
    642 }
    643 
    644 // lexInlineTableValueEnd consumes everything between the end of an inline table
    645 // key/value pair and the next pair (or the end of the table):
    646 // it ignores whitespace and expects either a ',' or a '}'.
    647 func lexInlineTableValueEnd(lx *lexer) stateFn {
    648 	switch r := lx.next(); {
    649 	case isWhitespace(r):
    650 		return lexSkip(lx, lexInlineTableValueEnd)
    651 	case isNL(r):
    652 		return lexSkip(lx, lexInlineTableValueEnd)
    653 	case r == '#':
    654 		lx.push(lexInlineTableValueEnd)
    655 		return lexCommentStart
    656 	case r == ',':
    657 		lx.ignore()
    658 		lx.skip(isWhitespace)
    659 		if lx.peek() == '}' {
    660 			return lexInlineTableValueEnd
    661 		}
    662 		return lexInlineTableValue
    663 	case r == '}':
    664 		return lexInlineTableEnd
    665 	default:
    666 		return lx.errorf("expected a comma or an inline table terminator '}', but got %s instead", runeOrEOF(r))
    667 	}
    668 }
    669 
    670 func runeOrEOF(r rune) string {
    671 	if r == eof {
    672 		return "end of file"
    673 	}
    674 	return "'" + string(r) + "'"
    675 }
    676 
    677 // lexInlineTableEnd finishes the lexing of an inline table.
    678 // It assumes that a '}' has just been consumed.
    679 func lexInlineTableEnd(lx *lexer) stateFn {
    680 	lx.ignore()
    681 	lx.emit(itemInlineTableEnd)
    682 	return lx.pop()
    683 }
    684 
    685 // lexString consumes the inner contents of a string. It assumes that the
    686 // beginning '"' has already been consumed and ignored.
    687 func lexString(lx *lexer) stateFn {
    688 	r := lx.next()
    689 	switch {
    690 	case r == eof:
    691 		return lx.errorf(`unexpected EOF; expected '"'`)
    692 	case isNL(r):
    693 		return lx.errorPrevLine(errLexStringNL{})
    694 	case r == '\\':
    695 		lx.push(lexString)
    696 		return lexStringEscape
    697 	case r == '"':
    698 		lx.backup()
    699 		if lx.esc {
    700 			lx.esc = false
    701 			lx.emit(itemStringEsc)
    702 		} else {
    703 			lx.emit(itemString)
    704 		}
    705 		lx.next()
    706 		lx.ignore()
    707 		return lx.pop()
    708 	}
    709 	return lexString
    710 }
    711 
    712 // lexMultilineString consumes the inner contents of a string. It assumes that
    713 // the beginning '"""' has already been consumed and ignored.
    714 func lexMultilineString(lx *lexer) stateFn {
    715 	r := lx.next()
    716 	switch r {
    717 	default:
    718 		return lexMultilineString
    719 	case eof:
    720 		return lx.errorf(`unexpected EOF; expected '"""'`)
    721 	case '\\':
    722 		return lexMultilineStringEscape
    723 	case '"':
    724 		/// Found " → try to read two more "".
    725 		if lx.accept('"') {
    726 			if lx.accept('"') {
    727 				/// Peek ahead: the string can contain " and "", including at the
    728 				/// end: """str"""""
    729 				/// 6 or more at the end, however, is an error.
    730 				if lx.peek() == '"' {
    731 					/// Check if we already lexed 5 's; if so we have 6 now, and
    732 					/// that's just too many man!
    733 					///
    734 					/// Second check is for the edge case:
    735 					///
    736 					///            two quotes allowed.
    737 					///            vv
    738 					///   """lol \""""""
    739 					///          ^^  ^^^---- closing three
    740 					///     escaped
    741 					///
    742 					/// But ugly, but it works
    743 					if strings.HasSuffix(lx.current(), `"""""`) && !strings.HasSuffix(lx.current(), `\"""""`) {
    744 						return lx.errorf(`unexpected '""""""'`)
    745 					}
    746 					lx.backup()
    747 					lx.backup()
    748 					return lexMultilineString
    749 				}
    750 
    751 				lx.backup() /// backup: don't include the """ in the item.
    752 				lx.backup()
    753 				lx.backup()
    754 				lx.esc = false
    755 				lx.emit(itemMultilineString)
    756 				lx.next() /// Read over ''' again and discard it.
    757 				lx.next()
    758 				lx.next()
    759 				lx.ignore()
    760 				return lx.pop()
    761 			}
    762 			lx.backup()
    763 		}
    764 		return lexMultilineString
    765 	}
    766 }
    767 
    768 // lexRawString consumes a raw string. Nothing can be escaped in such a string.
    769 // It assumes that the beginning "'" has already been consumed and ignored.
    770 func lexRawString(lx *lexer) stateFn {
    771 	r := lx.next()
    772 	switch {
    773 	default:
    774 		return lexRawString
    775 	case r == eof:
    776 		return lx.errorf(`unexpected EOF; expected "'"`)
    777 	case isNL(r):
    778 		return lx.errorPrevLine(errLexStringNL{})
    779 	case r == '\'':
    780 		lx.backup()
    781 		lx.emit(itemRawString)
    782 		lx.next()
    783 		lx.ignore()
    784 		return lx.pop()
    785 	}
    786 }
    787 
    788 // lexMultilineRawString consumes a raw string. Nothing can be escaped in such a
    789 // string. It assumes that the beginning triple-' has already been consumed and
    790 // ignored.
    791 func lexMultilineRawString(lx *lexer) stateFn {
    792 	r := lx.next()
    793 	switch r {
    794 	default:
    795 		return lexMultilineRawString
    796 	case eof:
    797 		return lx.errorf(`unexpected EOF; expected "'''"`)
    798 	case '\'':
    799 		/// Found ' → try to read two more ''.
    800 		if lx.accept('\'') {
    801 			if lx.accept('\'') {
    802 				/// Peek ahead: the string can contain ' and '', including at the
    803 				/// end: '''str'''''
    804 				/// 6 or more at the end, however, is an error.
    805 				if lx.peek() == '\'' {
    806 					/// Check if we already lexed 5 's; if so we have 6 now, and
    807 					/// that's just too many man!
    808 					if strings.HasSuffix(lx.current(), "'''''") {
    809 						return lx.errorf(`unexpected "''''''"`)
    810 					}
    811 					lx.backup()
    812 					lx.backup()
    813 					return lexMultilineRawString
    814 				}
    815 
    816 				lx.backup() /// backup: don't include the ''' in the item.
    817 				lx.backup()
    818 				lx.backup()
    819 				lx.emit(itemRawMultilineString)
    820 				lx.next() /// Read over ''' again and discard it.
    821 				lx.next()
    822 				lx.next()
    823 				lx.ignore()
    824 				return lx.pop()
    825 			}
    826 			lx.backup()
    827 		}
    828 		return lexMultilineRawString
    829 	}
    830 }
    831 
    832 // lexMultilineStringEscape consumes an escaped character. It assumes that the
    833 // preceding '\\' has already been consumed.
    834 func lexMultilineStringEscape(lx *lexer) stateFn {
    835 	if isNL(lx.next()) { /// \ escaping newline.
    836 		return lexMultilineString
    837 	}
    838 	lx.backup()
    839 	lx.push(lexMultilineString)
    840 	return lexStringEscape(lx)
    841 }
    842 
    843 func lexStringEscape(lx *lexer) stateFn {
    844 	lx.esc = true
    845 	r := lx.next()
    846 	switch r {
    847 	case 'e':
    848 		fallthrough
    849 	case 'b':
    850 		fallthrough
    851 	case 't':
    852 		fallthrough
    853 	case 'n':
    854 		fallthrough
    855 	case 'f':
    856 		fallthrough
    857 	case 'r':
    858 		fallthrough
    859 	case '"':
    860 		fallthrough
    861 	case ' ', '\t':
    862 		// Inside """ .. """ strings you can use \ to escape newlines, and any
    863 		// amount of whitespace can be between the \ and \n.
    864 		fallthrough
    865 	case '\\':
    866 		return lx.pop()
    867 	case 'x':
    868 		return lexHexEscape
    869 	case 'u':
    870 		return lexShortUnicodeEscape
    871 	case 'U':
    872 		return lexLongUnicodeEscape
    873 	}
    874 	return lx.error(errLexEscape{r})
    875 }
    876 
    877 func lexHexEscape(lx *lexer) stateFn {
    878 	var r rune
    879 	for i := 0; i < 2; i++ {
    880 		r = lx.next()
    881 		if !isHex(r) {
    882 			return lx.errorf(`expected two hexadecimal digits after '\x', but got %q instead`, lx.current())
    883 		}
    884 	}
    885 	return lx.pop()
    886 }
    887 
    888 func lexShortUnicodeEscape(lx *lexer) stateFn {
    889 	var r rune
    890 	for i := 0; i < 4; i++ {
    891 		r = lx.next()
    892 		if !isHex(r) {
    893 			return lx.errorf(`expected four hexadecimal digits after '\u', but got %q instead`, lx.current())
    894 		}
    895 	}
    896 	return lx.pop()
    897 }
    898 
    899 func lexLongUnicodeEscape(lx *lexer) stateFn {
    900 	var r rune
    901 	for i := 0; i < 8; i++ {
    902 		r = lx.next()
    903 		if !isHex(r) {
    904 			return lx.errorf(`expected eight hexadecimal digits after '\U', but got %q instead`, lx.current())
    905 		}
    906 	}
    907 	return lx.pop()
    908 }
    909 
    910 // lexNumberOrDateStart processes the first character of a value which begins
    911 // with a digit. It exists to catch values starting with '0', so that
    912 // lexBaseNumberOrDate can differentiate base prefixed integers from other
    913 // types.
    914 func lexNumberOrDateStart(lx *lexer) stateFn {
    915 	if lx.next() == '0' {
    916 		return lexBaseNumberOrDate
    917 	}
    918 	return lexNumberOrDate
    919 }
    920 
    921 // lexNumberOrDate consumes either an integer, float or datetime.
    922 func lexNumberOrDate(lx *lexer) stateFn {
    923 	r := lx.next()
    924 	if isDigit(r) {
    925 		return lexNumberOrDate
    926 	}
    927 	switch r {
    928 	case '-', ':':
    929 		return lexDatetime
    930 	case '_':
    931 		return lexDecimalNumber
    932 	case '.', 'e', 'E':
    933 		return lexFloat
    934 	}
    935 
    936 	lx.backup()
    937 	lx.emit(itemInteger)
    938 	return lx.pop()
    939 }
    940 
    941 // lexDatetime consumes a Datetime, to a first approximation.
    942 // The parser validates that it matches one of the accepted formats.
    943 func lexDatetime(lx *lexer) stateFn {
    944 	r := lx.next()
    945 	if isDigit(r) {
    946 		return lexDatetime
    947 	}
    948 	switch r {
    949 	case '-', ':', 'T', 't', ' ', '.', 'Z', 'z', '+':
    950 		return lexDatetime
    951 	}
    952 
    953 	lx.backup()
    954 	lx.emitTrim(itemDatetime)
    955 	return lx.pop()
    956 }
    957 
    958 // lexHexInteger consumes a hexadecimal integer after seeing the '0x' prefix.
    959 func lexHexInteger(lx *lexer) stateFn {
    960 	r := lx.next()
    961 	if isHex(r) {
    962 		return lexHexInteger
    963 	}
    964 	switch r {
    965 	case '_':
    966 		return lexHexInteger
    967 	}
    968 
    969 	lx.backup()
    970 	lx.emit(itemInteger)
    971 	return lx.pop()
    972 }
    973 
    974 // lexOctalInteger consumes an octal integer after seeing the '0o' prefix.
    975 func lexOctalInteger(lx *lexer) stateFn {
    976 	r := lx.next()
    977 	if isOctal(r) {
    978 		return lexOctalInteger
    979 	}
    980 	switch r {
    981 	case '_':
    982 		return lexOctalInteger
    983 	}
    984 
    985 	lx.backup()
    986 	lx.emit(itemInteger)
    987 	return lx.pop()
    988 }
    989 
    990 // lexBinaryInteger consumes a binary integer after seeing the '0b' prefix.
    991 func lexBinaryInteger(lx *lexer) stateFn {
    992 	r := lx.next()
    993 	if isBinary(r) {
    994 		return lexBinaryInteger
    995 	}
    996 	switch r {
    997 	case '_':
    998 		return lexBinaryInteger
    999 	}
   1000 
   1001 	lx.backup()
   1002 	lx.emit(itemInteger)
   1003 	return lx.pop()
   1004 }
   1005 
   1006 // lexDecimalNumber consumes a decimal float or integer.
   1007 func lexDecimalNumber(lx *lexer) stateFn {
   1008 	r := lx.next()
   1009 	if isDigit(r) {
   1010 		return lexDecimalNumber
   1011 	}
   1012 	switch r {
   1013 	case '.', 'e', 'E':
   1014 		return lexFloat
   1015 	case '_':
   1016 		return lexDecimalNumber
   1017 	}
   1018 
   1019 	lx.backup()
   1020 	lx.emit(itemInteger)
   1021 	return lx.pop()
   1022 }
   1023 
   1024 // lexDecimalNumber consumes the first digit of a number beginning with a sign.
   1025 // It assumes the sign has already been consumed. Values which start with a sign
   1026 // are only allowed to be decimal integers or floats.
   1027 //
   1028 // The special "nan" and "inf" values are also recognized.
   1029 func lexDecimalNumberStart(lx *lexer) stateFn {
   1030 	r := lx.next()
   1031 
   1032 	// Special error cases to give users better error messages
   1033 	switch r {
   1034 	case 'i':
   1035 		if !lx.accept('n') || !lx.accept('f') {
   1036 			return lx.errorf("invalid float: '%s'", lx.current())
   1037 		}
   1038 		lx.emit(itemFloat)
   1039 		return lx.pop()
   1040 	case 'n':
   1041 		if !lx.accept('a') || !lx.accept('n') {
   1042 			return lx.errorf("invalid float: '%s'", lx.current())
   1043 		}
   1044 		lx.emit(itemFloat)
   1045 		return lx.pop()
   1046 	case '0':
   1047 		p := lx.peek()
   1048 		switch p {
   1049 		case 'b', 'o', 'x':
   1050 			return lx.errorf("cannot use sign with non-decimal numbers: '%s%c'", lx.current(), p)
   1051 		}
   1052 	case '.':
   1053 		return lx.errorf("floats must start with a digit, not '.'")
   1054 	}
   1055 
   1056 	if isDigit(r) {
   1057 		return lexDecimalNumber
   1058 	}
   1059 
   1060 	return lx.errorf("expected a digit but got %q", r)
   1061 }
   1062 
   1063 // lexBaseNumberOrDate differentiates between the possible values which
   1064 // start with '0'. It assumes that before reaching this state, the initial '0'
   1065 // has been consumed.
   1066 func lexBaseNumberOrDate(lx *lexer) stateFn {
   1067 	r := lx.next()
   1068 	// Note: All datetimes start with at least two digits, so we don't
   1069 	// handle date characters (':', '-', etc.) here.
   1070 	if isDigit(r) {
   1071 		return lexNumberOrDate
   1072 	}
   1073 	switch r {
   1074 	case '_':
   1075 		// Can only be decimal, because there can't be an underscore
   1076 		// between the '0' and the base designator, and dates can't
   1077 		// contain underscores.
   1078 		return lexDecimalNumber
   1079 	case '.', 'e', 'E':
   1080 		return lexFloat
   1081 	case 'b':
   1082 		r = lx.peek()
   1083 		if !isBinary(r) {
   1084 			lx.errorf("not a binary number: '%s%c'", lx.current(), r)
   1085 		}
   1086 		return lexBinaryInteger
   1087 	case 'o':
   1088 		r = lx.peek()
   1089 		if !isOctal(r) {
   1090 			lx.errorf("not an octal number: '%s%c'", lx.current(), r)
   1091 		}
   1092 		return lexOctalInteger
   1093 	case 'x':
   1094 		r = lx.peek()
   1095 		if !isHex(r) {
   1096 			lx.errorf("not a hexadecimal number: '%s%c'", lx.current(), r)
   1097 		}
   1098 		return lexHexInteger
   1099 	}
   1100 
   1101 	lx.backup()
   1102 	lx.emit(itemInteger)
   1103 	return lx.pop()
   1104 }
   1105 
   1106 // lexFloat consumes the elements of a float. It allows any sequence of
   1107 // float-like characters, so floats emitted by the lexer are only a first
   1108 // approximation and must be validated by the parser.
   1109 func lexFloat(lx *lexer) stateFn {
   1110 	r := lx.next()
   1111 	if isDigit(r) {
   1112 		return lexFloat
   1113 	}
   1114 	switch r {
   1115 	case '_', '.', '-', '+', 'e', 'E':
   1116 		return lexFloat
   1117 	}
   1118 
   1119 	lx.backup()
   1120 	lx.emit(itemFloat)
   1121 	return lx.pop()
   1122 }
   1123 
   1124 // lexBool consumes a bool string: 'true' or 'false.
   1125 func lexBool(lx *lexer) stateFn {
   1126 	var rs []rune
   1127 	for {
   1128 		r := lx.next()
   1129 		if !unicode.IsLetter(r) {
   1130 			lx.backup()
   1131 			break
   1132 		}
   1133 		rs = append(rs, r)
   1134 	}
   1135 	s := string(rs)
   1136 	switch s {
   1137 	case "true", "false":
   1138 		lx.emit(itemBool)
   1139 		return lx.pop()
   1140 	}
   1141 	return lx.errorf("expected value but found %q instead", s)
   1142 }
   1143 
   1144 // lexCommentStart begins the lexing of a comment. It will emit
   1145 // itemCommentStart and consume no characters, passing control to lexComment.
   1146 func lexCommentStart(lx *lexer) stateFn {
   1147 	lx.ignore()
   1148 	lx.emit(itemCommentStart)
   1149 	return lexComment
   1150 }
   1151 
   1152 // lexComment lexes an entire comment. It assumes that '#' has been consumed.
   1153 // It will consume *up to* the first newline character, and pass control
   1154 // back to the last state on the stack.
   1155 func lexComment(lx *lexer) stateFn {
   1156 	switch r := lx.next(); {
   1157 	case isNL(r) || r == eof:
   1158 		lx.backup()
   1159 		lx.emit(itemText)
   1160 		return lx.pop()
   1161 	default:
   1162 		return lexComment
   1163 	}
   1164 }
   1165 
   1166 // lexSkip ignores all slurped input and moves on to the next state.
   1167 func lexSkip(lx *lexer, nextState stateFn) stateFn {
   1168 	lx.ignore()
   1169 	return nextState
   1170 }
   1171 
   1172 func (s stateFn) String() string {
   1173 	if s == nil {
   1174 		return "<nil>"
   1175 	}
   1176 	name := runtime.FuncForPC(reflect.ValueOf(s).Pointer()).Name()
   1177 	if i := strings.LastIndexByte(name, '.'); i > -1 {
   1178 		name = name[i+1:]
   1179 	}
   1180 	return name + "()"
   1181 }
   1182 
   1183 func (itype itemType) String() string {
   1184 	switch itype {
   1185 	case itemError:
   1186 		return "Error"
   1187 	case itemEOF:
   1188 		return "EOF"
   1189 	case itemText:
   1190 		return "Text"
   1191 	case itemString, itemStringEsc, itemRawString, itemMultilineString, itemRawMultilineString:
   1192 		return "String"
   1193 	case itemBool:
   1194 		return "Bool"
   1195 	case itemInteger:
   1196 		return "Integer"
   1197 	case itemFloat:
   1198 		return "Float"
   1199 	case itemDatetime:
   1200 		return "DateTime"
   1201 	case itemArray:
   1202 		return "Array"
   1203 	case itemArrayEnd:
   1204 		return "ArrayEnd"
   1205 	case itemTableStart:
   1206 		return "TableStart"
   1207 	case itemTableEnd:
   1208 		return "TableEnd"
   1209 	case itemArrayTableStart:
   1210 		return "ArrayTableStart"
   1211 	case itemArrayTableEnd:
   1212 		return "ArrayTableEnd"
   1213 	case itemKeyStart:
   1214 		return "KeyStart"
   1215 	case itemKeyEnd:
   1216 		return "KeyEnd"
   1217 	case itemCommentStart:
   1218 		return "CommentStart"
   1219 	case itemInlineTableStart:
   1220 		return "InlineTableStart"
   1221 	case itemInlineTableEnd:
   1222 		return "InlineTableEnd"
   1223 	}
   1224 	panic(fmt.Sprintf("BUG: Unknown type '%d'.", int(itype)))
   1225 }
   1226 
   1227 func (item item) String() string {
   1228 	return fmt.Sprintf("(%s, %s)", item.typ, item.val)
   1229 }
   1230 
   1231 func isWhitespace(r rune) bool { return r == '\t' || r == ' ' }
   1232 func isNL(r rune) bool         { return r == '\n' || r == '\r' }
   1233 func isControl(r rune) bool { // Control characters except \t, \r, \n
   1234 	switch r {
   1235 	case '\t', '\r', '\n':
   1236 		return false
   1237 	default:
   1238 		return (r >= 0x00 && r <= 0x1f) || r == 0x7f
   1239 	}
   1240 }
   1241 func isDigit(r rune) bool  { return r >= '0' && r <= '9' }
   1242 func isBinary(r rune) bool { return r == '0' || r == '1' }
   1243 func isOctal(r rune) bool  { return r >= '0' && r <= '7' }
   1244 func isHex(r rune) bool    { return (r >= '0' && r <= '9') || (r|0x20 >= 'a' && r|0x20 <= 'f') }
   1245 func isBareKeyChar(r rune) bool {
   1246 	return (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') ||
   1247 		(r >= '0' && r <= '9') || r == '_' || r == '-'
   1248 }