parser.go (13322B)
1 // Copyright 2015 Unknwon 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"): you may 4 // not use this file except in compliance with the License. You may obtain 5 // a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12 // License for the specific language governing permissions and limitations 13 // under the License. 14 15 package ini 16 17 import ( 18 "bufio" 19 "bytes" 20 "fmt" 21 "io" 22 "regexp" 23 "strconv" 24 "strings" 25 "unicode" 26 ) 27 28 const minReaderBufferSize = 4096 29 30 var pythonMultiline = regexp.MustCompile(`^([\t\f ]+)(.*)`) 31 32 type parserOptions struct { 33 IgnoreContinuation bool 34 IgnoreInlineComment bool 35 AllowPythonMultilineValues bool 36 SpaceBeforeInlineComment bool 37 UnescapeValueDoubleQuotes bool 38 UnescapeValueCommentSymbols bool 39 PreserveSurroundedQuote bool 40 DebugFunc DebugFunc 41 ReaderBufferSize int 42 } 43 44 type parser struct { 45 buf *bufio.Reader 46 options parserOptions 47 48 isEOF bool 49 count int 50 comment *bytes.Buffer 51 } 52 53 func (p *parser) debug(format string, args ...interface{}) { 54 if p.options.DebugFunc != nil { 55 p.options.DebugFunc(fmt.Sprintf(format, args...)) 56 } 57 } 58 59 func newParser(r io.Reader, opts parserOptions) *parser { 60 size := opts.ReaderBufferSize 61 if size < minReaderBufferSize { 62 size = minReaderBufferSize 63 } 64 65 return &parser{ 66 buf: bufio.NewReaderSize(r, size), 67 options: opts, 68 count: 1, 69 comment: &bytes.Buffer{}, 70 } 71 } 72 73 // BOM handles header of UTF-8, UTF-16 LE and UTF-16 BE's BOM format. 74 // http://en.wikipedia.org/wiki/Byte_order_mark#Representations_of_byte_order_marks_by_encoding 75 func (p *parser) BOM() error { 76 mask, err := p.buf.Peek(2) 77 if err != nil && err != io.EOF { 78 return err 79 } else if len(mask) < 2 { 80 return nil 81 } 82 83 switch { 84 case mask[0] == 254 && mask[1] == 255: 85 fallthrough 86 case mask[0] == 255 && mask[1] == 254: 87 _, err = p.buf.Read(mask) 88 if err != nil { 89 return err 90 } 91 case mask[0] == 239 && mask[1] == 187: 92 mask, err := p.buf.Peek(3) 93 if err != nil && err != io.EOF { 94 return err 95 } else if len(mask) < 3 { 96 return nil 97 } 98 if mask[2] == 191 { 99 _, err = p.buf.Read(mask) 100 if err != nil { 101 return err 102 } 103 } 104 } 105 return nil 106 } 107 108 func (p *parser) readUntil(delim byte) ([]byte, error) { 109 data, err := p.buf.ReadBytes(delim) 110 if err != nil { 111 if err == io.EOF { 112 p.isEOF = true 113 } else { 114 return nil, err 115 } 116 } 117 return data, nil 118 } 119 120 func cleanComment(in []byte) ([]byte, bool) { 121 i := bytes.IndexAny(in, "#;") 122 if i == -1 { 123 return nil, false 124 } 125 return in[i:], true 126 } 127 128 func readKeyName(delimiters string, in []byte) (string, int, error) { 129 line := string(in) 130 131 // Check if key name surrounded by quotes. 132 var keyQuote string 133 switch line[0] { 134 case '"': 135 if len(line) > 6 && line[0:3] == `"""` { 136 keyQuote = `"""` 137 } else { 138 keyQuote = `"` 139 } 140 case '`': 141 keyQuote = "`" 142 } 143 144 // Get out key name 145 var endIdx int 146 if len(keyQuote) > 0 { 147 startIdx := len(keyQuote) 148 // FIXME: fail case -> """"""name"""=value 149 pos := strings.Index(line[startIdx:], keyQuote) 150 if pos == -1 { 151 return "", -1, fmt.Errorf("missing closing key quote: %s", line) 152 } 153 pos += startIdx 154 155 // Find key-value delimiter 156 i := strings.IndexAny(line[pos+startIdx:], delimiters) 157 if i < 0 { 158 return "", -1, ErrDelimiterNotFound{line} 159 } 160 endIdx = pos + i 161 return strings.TrimSpace(line[startIdx:pos]), endIdx + startIdx + 1, nil 162 } 163 164 endIdx = strings.IndexAny(line, delimiters) 165 if endIdx < 0 { 166 return "", -1, ErrDelimiterNotFound{line} 167 } 168 if endIdx == 0 { 169 return "", -1, ErrEmptyKeyName{line} 170 } 171 172 return strings.TrimSpace(line[0:endIdx]), endIdx + 1, nil 173 } 174 175 func (p *parser) readMultilines(line, val, valQuote string) (string, error) { 176 for { 177 data, err := p.readUntil('\n') 178 if err != nil { 179 return "", err 180 } 181 next := string(data) 182 183 pos := strings.LastIndex(next, valQuote) 184 if pos > -1 { 185 // Check if the line ends with backslash continuation after the quote 186 restOfLine := strings.TrimRight(next[pos+len(valQuote):], "\r\n") 187 if !p.options.IgnoreContinuation && strings.HasSuffix(strings.TrimSpace(restOfLine), `\`) { 188 val += next 189 continue 190 } 191 192 val += next[:pos] 193 194 comment, has := cleanComment([]byte(next[pos:])) 195 if has { 196 p.comment.Write(bytes.TrimSpace(comment)) 197 } 198 break 199 } 200 val += next 201 if p.isEOF { 202 return "", fmt.Errorf("missing closing key quote from %q to %q", line, next) 203 } 204 } 205 return val, nil 206 } 207 208 func (p *parser) readContinuationLines(val string) (string, error) { 209 for { 210 data, err := p.readUntil('\n') 211 if err != nil { 212 return "", err 213 } 214 next := strings.TrimSpace(string(data)) 215 216 if len(next) == 0 { 217 break 218 } 219 val += next 220 if val[len(val)-1] != '\\' { 221 break 222 } 223 val = val[:len(val)-1] 224 } 225 return val, nil 226 } 227 228 // hasSurroundedQuote check if and only if the first and last characters 229 // are quotes \" or \'. 230 // It returns false if any other parts also contain same kind of quotes. 231 func hasSurroundedQuote(in string, quote byte) bool { 232 return len(in) >= 2 && in[0] == quote && in[len(in)-1] == quote && 233 strings.IndexByte(in[1:], quote) == len(in)-2 234 } 235 236 func (p *parser) readValue(in []byte, bufferSize int) (string, error) { 237 238 line := strings.TrimLeftFunc(string(in), unicode.IsSpace) 239 if len(line) == 0 { 240 if p.options.AllowPythonMultilineValues && len(in) > 0 && in[len(in)-1] == '\n' { 241 return p.readPythonMultilines(line, bufferSize) 242 } 243 return "", nil 244 } 245 246 var valQuote string 247 if len(line) > 3 && line[0:3] == `"""` { 248 valQuote = `"""` 249 } else if line[0] == '`' { 250 valQuote = "`" 251 } else if p.options.UnescapeValueDoubleQuotes && line[0] == '"' { 252 valQuote = `"` 253 } 254 255 if len(valQuote) > 0 { 256 startIdx := len(valQuote) 257 pos := strings.LastIndex(line[startIdx:], valQuote) 258 // Check for multi-line value 259 if pos == -1 { 260 return p.readMultilines(line, line[startIdx:], valQuote) 261 } 262 263 if p.options.UnescapeValueDoubleQuotes && valQuote == `"` { 264 return strings.ReplaceAll(line[startIdx:pos+startIdx], `\"`, `"`), nil 265 } 266 return line[startIdx : pos+startIdx], nil 267 } 268 269 lastChar := line[len(line)-1] 270 // Won't be able to reach here if value only contains whitespace 271 line = strings.TrimSpace(line) 272 trimmedLastChar := line[len(line)-1] 273 274 // Check continuation lines when desired 275 if !p.options.IgnoreContinuation && trimmedLastChar == '\\' { 276 return p.readContinuationLines(line[:len(line)-1]) 277 } 278 279 // Check if ignore inline comment 280 if !p.options.IgnoreInlineComment { 281 var i int 282 if p.options.SpaceBeforeInlineComment { 283 i = strings.Index(line, " #") 284 if i == -1 { 285 i = strings.Index(line, " ;") 286 } 287 288 } else { 289 i = strings.IndexAny(line, "#;") 290 } 291 292 if i > -1 { 293 p.comment.WriteString(line[i:]) 294 line = strings.TrimSpace(line[:i]) 295 } 296 297 } 298 299 // Trim single and double quotes 300 if (hasSurroundedQuote(line, '\'') || 301 hasSurroundedQuote(line, '"')) && !p.options.PreserveSurroundedQuote { 302 line = line[1 : len(line)-1] 303 } else if len(valQuote) == 0 && p.options.UnescapeValueCommentSymbols { 304 line = strings.ReplaceAll(line, `\;`, ";") 305 line = strings.ReplaceAll(line, `\#`, "#") 306 } else if p.options.AllowPythonMultilineValues && lastChar == '\n' { 307 return p.readPythonMultilines(line, bufferSize) 308 } 309 310 return line, nil 311 } 312 313 func (p *parser) readPythonMultilines(line string, bufferSize int) (string, error) { 314 parserBufferPeekResult, _ := p.buf.Peek(bufferSize) 315 peekBuffer := bytes.NewBuffer(parserBufferPeekResult) 316 317 for { 318 peekData, peekErr := peekBuffer.ReadBytes('\n') 319 if peekErr != nil && peekErr != io.EOF { 320 p.debug("readPythonMultilines: failed to peek with error: %v", peekErr) 321 return "", peekErr 322 } 323 324 p.debug("readPythonMultilines: parsing %q", string(peekData)) 325 326 peekMatches := pythonMultiline.FindStringSubmatch(string(peekData)) 327 p.debug("readPythonMultilines: matched %d parts", len(peekMatches)) 328 for n, v := range peekMatches { 329 p.debug(" %d: %q", n, v) 330 } 331 332 // Return if not a Python multiline value. 333 if len(peekMatches) != 3 { 334 p.debug("readPythonMultilines: end of value, got: %q", line) 335 return line, nil 336 } 337 338 // Advance the parser reader (buffer) in-sync with the peek buffer. 339 _, err := p.buf.Discard(len(peekData)) 340 if err != nil { 341 p.debug("readPythonMultilines: failed to skip to the end, returning error") 342 return "", err 343 } 344 345 line += "\n" + peekMatches[0] 346 } 347 } 348 349 // parse parses data through an io.Reader. 350 func (f *File) parse(reader io.Reader) (err error) { 351 p := newParser(reader, parserOptions{ 352 IgnoreContinuation: f.options.IgnoreContinuation, 353 IgnoreInlineComment: f.options.IgnoreInlineComment, 354 AllowPythonMultilineValues: f.options.AllowPythonMultilineValues, 355 SpaceBeforeInlineComment: f.options.SpaceBeforeInlineComment, 356 UnescapeValueDoubleQuotes: f.options.UnescapeValueDoubleQuotes, 357 UnescapeValueCommentSymbols: f.options.UnescapeValueCommentSymbols, 358 PreserveSurroundedQuote: f.options.PreserveSurroundedQuote, 359 DebugFunc: f.options.DebugFunc, 360 ReaderBufferSize: f.options.ReaderBufferSize, 361 }) 362 if err = p.BOM(); err != nil { 363 return fmt.Errorf("BOM: %v", err) 364 } 365 366 // Ignore error because default section name is never empty string. 367 name := DefaultSection 368 if f.options.Insensitive || f.options.InsensitiveSections { 369 name = strings.ToLower(DefaultSection) 370 } 371 section, _ := f.NewSection(name) 372 373 // This "last" is not strictly equivalent to "previous one" if current key is not the first nested key 374 var isLastValueEmpty bool 375 var lastRegularKey *Key 376 377 var line []byte 378 var inUnparseableSection bool 379 380 // NOTE: Iterate and increase `currentPeekSize` until 381 // the size of the parser buffer is found. 382 // TODO(unknwon): When Golang 1.10 is the lowest version supported, replace with `parserBufferSize := p.buf.Size()`. 383 parserBufferSize := 0 384 // NOTE: Peek 4kb at a time. 385 currentPeekSize := minReaderBufferSize 386 387 if f.options.AllowPythonMultilineValues { 388 for { 389 peekBytes, _ := p.buf.Peek(currentPeekSize) 390 peekBytesLength := len(peekBytes) 391 392 if parserBufferSize >= peekBytesLength { 393 break 394 } 395 396 currentPeekSize *= 2 397 parserBufferSize = peekBytesLength 398 } 399 } 400 401 for !p.isEOF { 402 line, err = p.readUntil('\n') 403 if err != nil { 404 return err 405 } 406 407 if f.options.AllowNestedValues && 408 isLastValueEmpty && len(line) > 0 { 409 if line[0] == ' ' || line[0] == '\t' { 410 err = lastRegularKey.addNestedValue(string(bytes.TrimSpace(line))) 411 if err != nil { 412 return err 413 } 414 continue 415 } 416 } 417 418 line = bytes.TrimLeftFunc(line, unicode.IsSpace) 419 if len(line) == 0 { 420 continue 421 } 422 423 // Comments 424 if line[0] == '#' || line[0] == ';' { 425 // Note: we do not care ending line break, 426 // it is needed for adding second line, 427 // so just clean it once at the end when set to value. 428 p.comment.Write(line) 429 continue 430 } 431 432 // Section 433 if line[0] == '[' { 434 // Read to the next ']' (TODO: support quoted strings) 435 closeIdx := bytes.LastIndexByte(line, ']') 436 if closeIdx == -1 { 437 return fmt.Errorf("unclosed section: %s", line) 438 } 439 440 name := string(line[1:closeIdx]) 441 section, err = f.NewSection(name) 442 if err != nil { 443 return err 444 } 445 446 comment, has := cleanComment(line[closeIdx+1:]) 447 if has { 448 p.comment.Write(comment) 449 } 450 451 section.Comment = strings.TrimSpace(p.comment.String()) 452 453 // Reset auto-counter and comments 454 p.comment.Reset() 455 p.count = 1 456 // Nested values can't span sections 457 isLastValueEmpty = false 458 459 inUnparseableSection = false 460 for i := range f.options.UnparseableSections { 461 if f.options.UnparseableSections[i] == name || 462 ((f.options.Insensitive || f.options.InsensitiveSections) && strings.EqualFold(f.options.UnparseableSections[i], name)) { 463 inUnparseableSection = true 464 continue 465 } 466 } 467 continue 468 } 469 470 if inUnparseableSection { 471 section.isRawSection = true 472 section.rawBody += string(line) 473 continue 474 } 475 476 kname, offset, err := readKeyName(f.options.KeyValueDelimiters, line) 477 if err != nil { 478 switch { 479 // Treat as boolean key when desired, and whole line is key name. 480 case IsErrDelimiterNotFound(err): 481 switch { 482 case f.options.AllowBooleanKeys: 483 kname, err := p.readValue(line, parserBufferSize) 484 if err != nil { 485 return err 486 } 487 key, err := section.NewBooleanKey(kname) 488 if err != nil { 489 return err 490 } 491 key.Comment = strings.TrimSpace(p.comment.String()) 492 p.comment.Reset() 493 continue 494 495 case f.options.SkipUnrecognizableLines: 496 continue 497 } 498 case IsErrEmptyKeyName(err) && f.options.SkipUnrecognizableLines: 499 continue 500 } 501 return err 502 } 503 504 // Auto increment. 505 isAutoIncr := false 506 if kname == "-" { 507 isAutoIncr = true 508 kname = "#" + strconv.Itoa(p.count) 509 p.count++ 510 } 511 512 value, err := p.readValue(line[offset:], parserBufferSize) 513 if err != nil { 514 return err 515 } 516 isLastValueEmpty = len(value) == 0 517 518 key, err := section.NewKey(kname, value) 519 if err != nil { 520 return err 521 } 522 key.isAutoIncrement = isAutoIncr 523 key.Comment = strings.TrimSpace(p.comment.String()) 524 p.comment.Reset() 525 lastRegularKey = key 526 } 527 return nil 528 }