taldir

Directory service to resolve wallet mailboxes by messenger addresses
Log | Files | Refs | Submodules | README | LICENSE

parser.go (13322B)


      1 // Copyright 2015 Unknwon
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License"): you may
      4 // not use this file except in compliance with the License. You may obtain
      5 // a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
     11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
     12 // License for the specific language governing permissions and limitations
     13 // under the License.
     14 
     15 package ini
     16 
     17 import (
     18 	"bufio"
     19 	"bytes"
     20 	"fmt"
     21 	"io"
     22 	"regexp"
     23 	"strconv"
     24 	"strings"
     25 	"unicode"
     26 )
     27 
     28 const minReaderBufferSize = 4096
     29 
     30 var pythonMultiline = regexp.MustCompile(`^([\t\f ]+)(.*)`)
     31 
     32 type parserOptions struct {
     33 	IgnoreContinuation          bool
     34 	IgnoreInlineComment         bool
     35 	AllowPythonMultilineValues  bool
     36 	SpaceBeforeInlineComment    bool
     37 	UnescapeValueDoubleQuotes   bool
     38 	UnescapeValueCommentSymbols bool
     39 	PreserveSurroundedQuote     bool
     40 	DebugFunc                   DebugFunc
     41 	ReaderBufferSize            int
     42 }
     43 
     44 type parser struct {
     45 	buf     *bufio.Reader
     46 	options parserOptions
     47 
     48 	isEOF   bool
     49 	count   int
     50 	comment *bytes.Buffer
     51 }
     52 
     53 func (p *parser) debug(format string, args ...interface{}) {
     54 	if p.options.DebugFunc != nil {
     55 		p.options.DebugFunc(fmt.Sprintf(format, args...))
     56 	}
     57 }
     58 
     59 func newParser(r io.Reader, opts parserOptions) *parser {
     60 	size := opts.ReaderBufferSize
     61 	if size < minReaderBufferSize {
     62 		size = minReaderBufferSize
     63 	}
     64 
     65 	return &parser{
     66 		buf:     bufio.NewReaderSize(r, size),
     67 		options: opts,
     68 		count:   1,
     69 		comment: &bytes.Buffer{},
     70 	}
     71 }
     72 
     73 // BOM handles header of UTF-8, UTF-16 LE and UTF-16 BE's BOM format.
     74 // http://en.wikipedia.org/wiki/Byte_order_mark#Representations_of_byte_order_marks_by_encoding
     75 func (p *parser) BOM() error {
     76 	mask, err := p.buf.Peek(2)
     77 	if err != nil && err != io.EOF {
     78 		return err
     79 	} else if len(mask) < 2 {
     80 		return nil
     81 	}
     82 
     83 	switch {
     84 	case mask[0] == 254 && mask[1] == 255:
     85 		fallthrough
     86 	case mask[0] == 255 && mask[1] == 254:
     87 		_, err = p.buf.Read(mask)
     88 		if err != nil {
     89 			return err
     90 		}
     91 	case mask[0] == 239 && mask[1] == 187:
     92 		mask, err := p.buf.Peek(3)
     93 		if err != nil && err != io.EOF {
     94 			return err
     95 		} else if len(mask) < 3 {
     96 			return nil
     97 		}
     98 		if mask[2] == 191 {
     99 			_, err = p.buf.Read(mask)
    100 			if err != nil {
    101 				return err
    102 			}
    103 		}
    104 	}
    105 	return nil
    106 }
    107 
    108 func (p *parser) readUntil(delim byte) ([]byte, error) {
    109 	data, err := p.buf.ReadBytes(delim)
    110 	if err != nil {
    111 		if err == io.EOF {
    112 			p.isEOF = true
    113 		} else {
    114 			return nil, err
    115 		}
    116 	}
    117 	return data, nil
    118 }
    119 
    120 func cleanComment(in []byte) ([]byte, bool) {
    121 	i := bytes.IndexAny(in, "#;")
    122 	if i == -1 {
    123 		return nil, false
    124 	}
    125 	return in[i:], true
    126 }
    127 
    128 func readKeyName(delimiters string, in []byte) (string, int, error) {
    129 	line := string(in)
    130 
    131 	// Check if key name surrounded by quotes.
    132 	var keyQuote string
    133 	switch line[0] {
    134 	case '"':
    135 		if len(line) > 6 && line[0:3] == `"""` {
    136 			keyQuote = `"""`
    137 		} else {
    138 			keyQuote = `"`
    139 		}
    140 	case '`':
    141 		keyQuote = "`"
    142 	}
    143 
    144 	// Get out key name
    145 	var endIdx int
    146 	if len(keyQuote) > 0 {
    147 		startIdx := len(keyQuote)
    148 		// FIXME: fail case -> """"""name"""=value
    149 		pos := strings.Index(line[startIdx:], keyQuote)
    150 		if pos == -1 {
    151 			return "", -1, fmt.Errorf("missing closing key quote: %s", line)
    152 		}
    153 		pos += startIdx
    154 
    155 		// Find key-value delimiter
    156 		i := strings.IndexAny(line[pos+startIdx:], delimiters)
    157 		if i < 0 {
    158 			return "", -1, ErrDelimiterNotFound{line}
    159 		}
    160 		endIdx = pos + i
    161 		return strings.TrimSpace(line[startIdx:pos]), endIdx + startIdx + 1, nil
    162 	}
    163 
    164 	endIdx = strings.IndexAny(line, delimiters)
    165 	if endIdx < 0 {
    166 		return "", -1, ErrDelimiterNotFound{line}
    167 	}
    168 	if endIdx == 0 {
    169 		return "", -1, ErrEmptyKeyName{line}
    170 	}
    171 
    172 	return strings.TrimSpace(line[0:endIdx]), endIdx + 1, nil
    173 }
    174 
    175 func (p *parser) readMultilines(line, val, valQuote string) (string, error) {
    176 	for {
    177 		data, err := p.readUntil('\n')
    178 		if err != nil {
    179 			return "", err
    180 		}
    181 		next := string(data)
    182 
    183 		pos := strings.LastIndex(next, valQuote)
    184 		if pos > -1 {
    185 			// Check if the line ends with backslash continuation after the quote
    186 			restOfLine := strings.TrimRight(next[pos+len(valQuote):], "\r\n")
    187 			if !p.options.IgnoreContinuation && strings.HasSuffix(strings.TrimSpace(restOfLine), `\`) {
    188 				val += next
    189 				continue
    190 			}
    191 
    192 			val += next[:pos]
    193 
    194 			comment, has := cleanComment([]byte(next[pos:]))
    195 			if has {
    196 				p.comment.Write(bytes.TrimSpace(comment))
    197 			}
    198 			break
    199 		}
    200 		val += next
    201 		if p.isEOF {
    202 			return "", fmt.Errorf("missing closing key quote from %q to %q", line, next)
    203 		}
    204 	}
    205 	return val, nil
    206 }
    207 
    208 func (p *parser) readContinuationLines(val string) (string, error) {
    209 	for {
    210 		data, err := p.readUntil('\n')
    211 		if err != nil {
    212 			return "", err
    213 		}
    214 		next := strings.TrimSpace(string(data))
    215 
    216 		if len(next) == 0 {
    217 			break
    218 		}
    219 		val += next
    220 		if val[len(val)-1] != '\\' {
    221 			break
    222 		}
    223 		val = val[:len(val)-1]
    224 	}
    225 	return val, nil
    226 }
    227 
    228 // hasSurroundedQuote check if and only if the first and last characters
    229 // are quotes \" or \'.
    230 // It returns false if any other parts also contain same kind of quotes.
    231 func hasSurroundedQuote(in string, quote byte) bool {
    232 	return len(in) >= 2 && in[0] == quote && in[len(in)-1] == quote &&
    233 		strings.IndexByte(in[1:], quote) == len(in)-2
    234 }
    235 
    236 func (p *parser) readValue(in []byte, bufferSize int) (string, error) {
    237 
    238 	line := strings.TrimLeftFunc(string(in), unicode.IsSpace)
    239 	if len(line) == 0 {
    240 		if p.options.AllowPythonMultilineValues && len(in) > 0 && in[len(in)-1] == '\n' {
    241 			return p.readPythonMultilines(line, bufferSize)
    242 		}
    243 		return "", nil
    244 	}
    245 
    246 	var valQuote string
    247 	if len(line) > 3 && line[0:3] == `"""` {
    248 		valQuote = `"""`
    249 	} else if line[0] == '`' {
    250 		valQuote = "`"
    251 	} else if p.options.UnescapeValueDoubleQuotes && line[0] == '"' {
    252 		valQuote = `"`
    253 	}
    254 
    255 	if len(valQuote) > 0 {
    256 		startIdx := len(valQuote)
    257 		pos := strings.LastIndex(line[startIdx:], valQuote)
    258 		// Check for multi-line value
    259 		if pos == -1 {
    260 			return p.readMultilines(line, line[startIdx:], valQuote)
    261 		}
    262 
    263 		if p.options.UnescapeValueDoubleQuotes && valQuote == `"` {
    264 			return strings.ReplaceAll(line[startIdx:pos+startIdx], `\"`, `"`), nil
    265 		}
    266 		return line[startIdx : pos+startIdx], nil
    267 	}
    268 
    269 	lastChar := line[len(line)-1]
    270 	// Won't be able to reach here if value only contains whitespace
    271 	line = strings.TrimSpace(line)
    272 	trimmedLastChar := line[len(line)-1]
    273 
    274 	// Check continuation lines when desired
    275 	if !p.options.IgnoreContinuation && trimmedLastChar == '\\' {
    276 		return p.readContinuationLines(line[:len(line)-1])
    277 	}
    278 
    279 	// Check if ignore inline comment
    280 	if !p.options.IgnoreInlineComment {
    281 		var i int
    282 		if p.options.SpaceBeforeInlineComment {
    283 			i = strings.Index(line, " #")
    284 			if i == -1 {
    285 				i = strings.Index(line, " ;")
    286 			}
    287 
    288 		} else {
    289 			i = strings.IndexAny(line, "#;")
    290 		}
    291 
    292 		if i > -1 {
    293 			p.comment.WriteString(line[i:])
    294 			line = strings.TrimSpace(line[:i])
    295 		}
    296 
    297 	}
    298 
    299 	// Trim single and double quotes
    300 	if (hasSurroundedQuote(line, '\'') ||
    301 		hasSurroundedQuote(line, '"')) && !p.options.PreserveSurroundedQuote {
    302 		line = line[1 : len(line)-1]
    303 	} else if len(valQuote) == 0 && p.options.UnescapeValueCommentSymbols {
    304 		line = strings.ReplaceAll(line, `\;`, ";")
    305 		line = strings.ReplaceAll(line, `\#`, "#")
    306 	} else if p.options.AllowPythonMultilineValues && lastChar == '\n' {
    307 		return p.readPythonMultilines(line, bufferSize)
    308 	}
    309 
    310 	return line, nil
    311 }
    312 
    313 func (p *parser) readPythonMultilines(line string, bufferSize int) (string, error) {
    314 	parserBufferPeekResult, _ := p.buf.Peek(bufferSize)
    315 	peekBuffer := bytes.NewBuffer(parserBufferPeekResult)
    316 
    317 	for {
    318 		peekData, peekErr := peekBuffer.ReadBytes('\n')
    319 		if peekErr != nil && peekErr != io.EOF {
    320 			p.debug("readPythonMultilines: failed to peek with error: %v", peekErr)
    321 			return "", peekErr
    322 		}
    323 
    324 		p.debug("readPythonMultilines: parsing %q", string(peekData))
    325 
    326 		peekMatches := pythonMultiline.FindStringSubmatch(string(peekData))
    327 		p.debug("readPythonMultilines: matched %d parts", len(peekMatches))
    328 		for n, v := range peekMatches {
    329 			p.debug("   %d: %q", n, v)
    330 		}
    331 
    332 		// Return if not a Python multiline value.
    333 		if len(peekMatches) != 3 {
    334 			p.debug("readPythonMultilines: end of value, got: %q", line)
    335 			return line, nil
    336 		}
    337 
    338 		// Advance the parser reader (buffer) in-sync with the peek buffer.
    339 		_, err := p.buf.Discard(len(peekData))
    340 		if err != nil {
    341 			p.debug("readPythonMultilines: failed to skip to the end, returning error")
    342 			return "", err
    343 		}
    344 
    345 		line += "\n" + peekMatches[0]
    346 	}
    347 }
    348 
    349 // parse parses data through an io.Reader.
    350 func (f *File) parse(reader io.Reader) (err error) {
    351 	p := newParser(reader, parserOptions{
    352 		IgnoreContinuation:          f.options.IgnoreContinuation,
    353 		IgnoreInlineComment:         f.options.IgnoreInlineComment,
    354 		AllowPythonMultilineValues:  f.options.AllowPythonMultilineValues,
    355 		SpaceBeforeInlineComment:    f.options.SpaceBeforeInlineComment,
    356 		UnescapeValueDoubleQuotes:   f.options.UnescapeValueDoubleQuotes,
    357 		UnescapeValueCommentSymbols: f.options.UnescapeValueCommentSymbols,
    358 		PreserveSurroundedQuote:     f.options.PreserveSurroundedQuote,
    359 		DebugFunc:                   f.options.DebugFunc,
    360 		ReaderBufferSize:            f.options.ReaderBufferSize,
    361 	})
    362 	if err = p.BOM(); err != nil {
    363 		return fmt.Errorf("BOM: %v", err)
    364 	}
    365 
    366 	// Ignore error because default section name is never empty string.
    367 	name := DefaultSection
    368 	if f.options.Insensitive || f.options.InsensitiveSections {
    369 		name = strings.ToLower(DefaultSection)
    370 	}
    371 	section, _ := f.NewSection(name)
    372 
    373 	// This "last" is not strictly equivalent to "previous one" if current key is not the first nested key
    374 	var isLastValueEmpty bool
    375 	var lastRegularKey *Key
    376 
    377 	var line []byte
    378 	var inUnparseableSection bool
    379 
    380 	// NOTE: Iterate and increase `currentPeekSize` until
    381 	// the size of the parser buffer is found.
    382 	// TODO(unknwon): When Golang 1.10 is the lowest version supported, replace with `parserBufferSize := p.buf.Size()`.
    383 	parserBufferSize := 0
    384 	// NOTE: Peek 4kb at a time.
    385 	currentPeekSize := minReaderBufferSize
    386 
    387 	if f.options.AllowPythonMultilineValues {
    388 		for {
    389 			peekBytes, _ := p.buf.Peek(currentPeekSize)
    390 			peekBytesLength := len(peekBytes)
    391 
    392 			if parserBufferSize >= peekBytesLength {
    393 				break
    394 			}
    395 
    396 			currentPeekSize *= 2
    397 			parserBufferSize = peekBytesLength
    398 		}
    399 	}
    400 
    401 	for !p.isEOF {
    402 		line, err = p.readUntil('\n')
    403 		if err != nil {
    404 			return err
    405 		}
    406 
    407 		if f.options.AllowNestedValues &&
    408 			isLastValueEmpty && len(line) > 0 {
    409 			if line[0] == ' ' || line[0] == '\t' {
    410 				err = lastRegularKey.addNestedValue(string(bytes.TrimSpace(line)))
    411 				if err != nil {
    412 					return err
    413 				}
    414 				continue
    415 			}
    416 		}
    417 
    418 		line = bytes.TrimLeftFunc(line, unicode.IsSpace)
    419 		if len(line) == 0 {
    420 			continue
    421 		}
    422 
    423 		// Comments
    424 		if line[0] == '#' || line[0] == ';' {
    425 			// Note: we do not care ending line break,
    426 			// it is needed for adding second line,
    427 			// so just clean it once at the end when set to value.
    428 			p.comment.Write(line)
    429 			continue
    430 		}
    431 
    432 		// Section
    433 		if line[0] == '[' {
    434 			// Read to the next ']' (TODO: support quoted strings)
    435 			closeIdx := bytes.LastIndexByte(line, ']')
    436 			if closeIdx == -1 {
    437 				return fmt.Errorf("unclosed section: %s", line)
    438 			}
    439 
    440 			name := string(line[1:closeIdx])
    441 			section, err = f.NewSection(name)
    442 			if err != nil {
    443 				return err
    444 			}
    445 
    446 			comment, has := cleanComment(line[closeIdx+1:])
    447 			if has {
    448 				p.comment.Write(comment)
    449 			}
    450 
    451 			section.Comment = strings.TrimSpace(p.comment.String())
    452 
    453 			// Reset auto-counter and comments
    454 			p.comment.Reset()
    455 			p.count = 1
    456 			// Nested values can't span sections
    457 			isLastValueEmpty = false
    458 
    459 			inUnparseableSection = false
    460 			for i := range f.options.UnparseableSections {
    461 				if f.options.UnparseableSections[i] == name ||
    462 					((f.options.Insensitive || f.options.InsensitiveSections) && strings.EqualFold(f.options.UnparseableSections[i], name)) {
    463 					inUnparseableSection = true
    464 					continue
    465 				}
    466 			}
    467 			continue
    468 		}
    469 
    470 		if inUnparseableSection {
    471 			section.isRawSection = true
    472 			section.rawBody += string(line)
    473 			continue
    474 		}
    475 
    476 		kname, offset, err := readKeyName(f.options.KeyValueDelimiters, line)
    477 		if err != nil {
    478 			switch {
    479 			// Treat as boolean key when desired, and whole line is key name.
    480 			case IsErrDelimiterNotFound(err):
    481 				switch {
    482 				case f.options.AllowBooleanKeys:
    483 					kname, err := p.readValue(line, parserBufferSize)
    484 					if err != nil {
    485 						return err
    486 					}
    487 					key, err := section.NewBooleanKey(kname)
    488 					if err != nil {
    489 						return err
    490 					}
    491 					key.Comment = strings.TrimSpace(p.comment.String())
    492 					p.comment.Reset()
    493 					continue
    494 
    495 				case f.options.SkipUnrecognizableLines:
    496 					continue
    497 				}
    498 			case IsErrEmptyKeyName(err) && f.options.SkipUnrecognizableLines:
    499 				continue
    500 			}
    501 			return err
    502 		}
    503 
    504 		// Auto increment.
    505 		isAutoIncr := false
    506 		if kname == "-" {
    507 			isAutoIncr = true
    508 			kname = "#" + strconv.Itoa(p.count)
    509 			p.count++
    510 		}
    511 
    512 		value, err := p.readValue(line[offset:], parserBufferSize)
    513 		if err != nil {
    514 			return err
    515 		}
    516 		isLastValueEmpty = len(value) == 0
    517 
    518 		key, err := section.NewKey(kname, value)
    519 		if err != nil {
    520 			return err
    521 		}
    522 		key.isAutoIncrement = isAutoIncr
    523 		key.Comment = strings.TrimSpace(p.comment.String())
    524 		p.comment.Reset()
    525 		lastRegularKey = key
    526 	}
    527 	return nil
    528 }