taldir

Directory service to resolve wallet mailboxes by messenger addresses
Log | Files | Refs | Submodules | README | LICENSE

pattern.go (12292B)


      1 // Copyright 2015 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package number
      6 
      7 import (
      8 	"errors"
      9 	"unicode/utf8"
     10 )
     11 
     12 // This file contains a parser for the CLDR number patterns as described in
     13 // https://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns.
     14 //
     15 // The following BNF is derived from this standard.
     16 //
     17 // pattern    := subpattern (';' subpattern)?
     18 // subpattern := affix? number exponent? affix?
     19 // number     := decimal | sigDigits
     20 // decimal    := '#'* '0'* ('.' fraction)? | '#' | '0'
     21 // fraction   := '0'* '#'*
     22 // sigDigits  := '#'* '@' '@'* '#'*
     23 // exponent   := 'E' '+'? '0'* '0'
     24 // padSpec    := '*' \L
     25 //
     26 // Notes:
     27 // - An affix pattern may contain any runes, but runes with special meaning
     28 //   should be escaped.
     29 // - Sequences of digits, '#', and '@' in decimal and sigDigits may have
     30 //   interstitial commas.
     31 
     32 // TODO: replace special characters in affixes (-, +, ¤) with control codes.
     33 
     34 // Pattern holds information for formatting numbers. It is designed to hold
     35 // information from CLDR number patterns.
     36 //
     37 // This pattern is precompiled  for all patterns for all languages. Even though
     38 // the number of patterns is not very large, we want to keep this small.
     39 //
     40 // This type is only intended for internal use.
     41 type Pattern struct {
     42 	RoundingContext
     43 
     44 	Affix       string // includes prefix and suffix. First byte is prefix length.
     45 	Offset      uint16 // Offset into Affix for prefix and suffix
     46 	NegOffset   uint16 // Offset into Affix for negative prefix and suffix or 0.
     47 	PadRune     rune
     48 	FormatWidth uint16
     49 
     50 	GroupingSize [2]uint8
     51 	Flags        PatternFlag
     52 }
     53 
     54 // A RoundingContext indicates how a number should be converted to digits.
     55 // It contains all information needed to determine the "visible digits" as
     56 // required by the pluralization rules.
     57 type RoundingContext struct {
     58 	// TODO: unify these two fields so that there is a more unambiguous meaning
     59 	// of how precision is handled.
     60 	MaxSignificantDigits int16 // -1 is unlimited
     61 	MaxFractionDigits    int16 // -1 is unlimited
     62 
     63 	Increment      uint32
     64 	IncrementScale uint8 // May differ from printed scale.
     65 
     66 	Mode RoundingMode
     67 
     68 	DigitShift uint8 // Number of decimals to shift. Used for % and ‰.
     69 
     70 	// Number of digits.
     71 	MinIntegerDigits uint8
     72 
     73 	MaxIntegerDigits     uint8
     74 	MinFractionDigits    uint8
     75 	MinSignificantDigits uint8
     76 
     77 	MinExponentDigits uint8
     78 }
     79 
     80 // RoundSignificantDigits returns the number of significant digits an
     81 // implementation of Convert may round to or n < 0 if there is no maximum or
     82 // a maximum is not recommended.
     83 func (r *RoundingContext) RoundSignificantDigits() (n int) {
     84 	if r.MaxFractionDigits == 0 && r.MaxSignificantDigits > 0 {
     85 		return int(r.MaxSignificantDigits)
     86 	} else if r.isScientific() && r.MaxIntegerDigits == 1 {
     87 		if r.MaxSignificantDigits == 0 ||
     88 			int(r.MaxFractionDigits+1) == int(r.MaxSignificantDigits) {
     89 			// Note: don't add DigitShift: it is only used for decimals.
     90 			return int(r.MaxFractionDigits) + 1
     91 		}
     92 	}
     93 	return -1
     94 }
     95 
     96 // RoundFractionDigits returns the number of fraction digits an implementation
     97 // of Convert may round to or n < 0 if there is no maximum or a maximum is not
     98 // recommended.
     99 func (r *RoundingContext) RoundFractionDigits() (n int) {
    100 	if r.MinExponentDigits == 0 &&
    101 		r.MaxSignificantDigits == 0 &&
    102 		r.MaxFractionDigits >= 0 {
    103 		return int(r.MaxFractionDigits) + int(r.DigitShift)
    104 	}
    105 	return -1
    106 }
    107 
    108 // SetScale fixes the RoundingContext to a fixed number of fraction digits.
    109 func (r *RoundingContext) SetScale(scale int) {
    110 	r.MinFractionDigits = uint8(scale)
    111 	r.MaxFractionDigits = int16(scale)
    112 }
    113 
    114 func (r *RoundingContext) SetPrecision(prec int) {
    115 	r.MaxSignificantDigits = int16(prec)
    116 }
    117 
    118 func (r *RoundingContext) isScientific() bool {
    119 	return r.MinExponentDigits > 0
    120 }
    121 
    122 func (f *Pattern) needsSep(pos int) bool {
    123 	p := pos - 1
    124 	size := int(f.GroupingSize[0])
    125 	if size == 0 || p == 0 {
    126 		return false
    127 	}
    128 	if p == size {
    129 		return true
    130 	}
    131 	if p -= size; p < 0 {
    132 		return false
    133 	}
    134 	// TODO: make second groupingsize the same as first if 0 so that we can
    135 	// avoid this check.
    136 	if x := int(f.GroupingSize[1]); x != 0 {
    137 		size = x
    138 	}
    139 	return p%size == 0
    140 }
    141 
    142 // A PatternFlag is a bit mask for the flag field of a Pattern.
    143 type PatternFlag uint8
    144 
    145 const (
    146 	AlwaysSign PatternFlag = 1 << iota
    147 	ElideSign              // Use space instead of plus sign. AlwaysSign must be true.
    148 	AlwaysExpSign
    149 	AlwaysDecimalSeparator
    150 	ParenthesisForNegative // Common pattern. Saves space.
    151 
    152 	PadAfterNumber
    153 	PadAfterAffix
    154 
    155 	PadBeforePrefix = 0 // Default
    156 	PadAfterPrefix  = PadAfterAffix
    157 	PadBeforeSuffix = PadAfterNumber
    158 	PadAfterSuffix  = PadAfterNumber | PadAfterAffix
    159 	PadMask         = PadAfterNumber | PadAfterAffix
    160 )
    161 
    162 type parser struct {
    163 	*Pattern
    164 
    165 	leadingSharps int
    166 
    167 	pos            int
    168 	err            error
    169 	doNotTerminate bool
    170 	groupingCount  uint
    171 	hasGroup       bool
    172 	buf            []byte
    173 }
    174 
    175 func (p *parser) setError(err error) {
    176 	if p.err == nil {
    177 		p.err = err
    178 	}
    179 }
    180 
    181 func (p *parser) updateGrouping() {
    182 	if p.hasGroup &&
    183 		0 < p.groupingCount && p.groupingCount < 255 {
    184 		p.GroupingSize[1] = p.GroupingSize[0]
    185 		p.GroupingSize[0] = uint8(p.groupingCount)
    186 	}
    187 	p.groupingCount = 0
    188 	p.hasGroup = true
    189 }
    190 
    191 var (
    192 	// TODO: more sensible and localizeable error messages.
    193 	errMultiplePadSpecifiers = errors.New("format: pattern has multiple pad specifiers")
    194 	errInvalidPadSpecifier   = errors.New("format: invalid pad specifier")
    195 	errInvalidQuote          = errors.New("format: invalid quote")
    196 	errAffixTooLarge         = errors.New("format: prefix or suffix exceeds maximum UTF-8 length of 256 bytes")
    197 	errDuplicatePercentSign  = errors.New("format: duplicate percent sign")
    198 	errDuplicatePermilleSign = errors.New("format: duplicate permille sign")
    199 	errUnexpectedEnd         = errors.New("format: unexpected end of pattern")
    200 )
    201 
    202 // ParsePattern extracts formatting information from a CLDR number pattern.
    203 //
    204 // See https://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns.
    205 func ParsePattern(s string) (f *Pattern, err error) {
    206 	p := parser{Pattern: &Pattern{}}
    207 
    208 	s = p.parseSubPattern(s)
    209 
    210 	if s != "" {
    211 		// Parse negative sub pattern.
    212 		if s[0] != ';' {
    213 			p.setError(errors.New("format: error parsing first sub pattern"))
    214 			return nil, p.err
    215 		}
    216 		neg := parser{Pattern: &Pattern{}} // just for extracting the affixes.
    217 		s = neg.parseSubPattern(s[len(";"):])
    218 		p.NegOffset = uint16(len(p.buf))
    219 		p.buf = append(p.buf, neg.buf...)
    220 	}
    221 	if s != "" {
    222 		p.setError(errors.New("format: spurious characters at end of pattern"))
    223 	}
    224 	if p.err != nil {
    225 		return nil, p.err
    226 	}
    227 	if affix := string(p.buf); affix == "\x00\x00" || affix == "\x00\x00\x00\x00" {
    228 		// No prefix or suffixes.
    229 		p.NegOffset = 0
    230 	} else {
    231 		p.Affix = affix
    232 	}
    233 	if p.Increment == 0 {
    234 		p.IncrementScale = 0
    235 	}
    236 	return p.Pattern, nil
    237 }
    238 
    239 func (p *parser) parseSubPattern(s string) string {
    240 	s = p.parsePad(s, PadBeforePrefix)
    241 	s = p.parseAffix(s)
    242 	s = p.parsePad(s, PadAfterPrefix)
    243 
    244 	s = p.parse(p.number, s)
    245 	p.updateGrouping()
    246 
    247 	s = p.parsePad(s, PadBeforeSuffix)
    248 	s = p.parseAffix(s)
    249 	s = p.parsePad(s, PadAfterSuffix)
    250 	return s
    251 }
    252 
    253 func (p *parser) parsePad(s string, f PatternFlag) (tail string) {
    254 	if len(s) >= 2 && s[0] == '*' {
    255 		r, sz := utf8.DecodeRuneInString(s[1:])
    256 		if p.PadRune != 0 {
    257 			p.err = errMultiplePadSpecifiers
    258 		} else {
    259 			p.Flags |= f
    260 			p.PadRune = r
    261 		}
    262 		return s[1+sz:]
    263 	}
    264 	return s
    265 }
    266 
    267 func (p *parser) parseAffix(s string) string {
    268 	x := len(p.buf)
    269 	p.buf = append(p.buf, 0) // placeholder for affix length
    270 
    271 	s = p.parse(p.affix, s)
    272 
    273 	n := len(p.buf) - x - 1
    274 	if n > 0xFF {
    275 		p.setError(errAffixTooLarge)
    276 	}
    277 	p.buf[x] = uint8(n)
    278 	return s
    279 }
    280 
    281 // state implements a state transition. It returns the new state. A state
    282 // function may set an error on the parser or may simply return on an incorrect
    283 // token and let the next phase fail.
    284 type state func(r rune) state
    285 
    286 // parse repeatedly applies a state function on the given string until a
    287 // termination condition is reached.
    288 func (p *parser) parse(fn state, s string) (tail string) {
    289 	for i, r := range s {
    290 		p.doNotTerminate = false
    291 		if fn = fn(r); fn == nil || p.err != nil {
    292 			return s[i:]
    293 		}
    294 		p.FormatWidth++
    295 	}
    296 	if p.doNotTerminate {
    297 		p.setError(errUnexpectedEnd)
    298 	}
    299 	return ""
    300 }
    301 
    302 func (p *parser) affix(r rune) state {
    303 	switch r {
    304 	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
    305 		'#', '@', '.', '*', ',', ';':
    306 		return nil
    307 	case '\'':
    308 		p.FormatWidth--
    309 		return p.escapeFirst
    310 	case '%':
    311 		if p.DigitShift != 0 {
    312 			p.setError(errDuplicatePercentSign)
    313 		}
    314 		p.DigitShift = 2
    315 	case '\u2030': // ‰ Per mille
    316 		if p.DigitShift != 0 {
    317 			p.setError(errDuplicatePermilleSign)
    318 		}
    319 		p.DigitShift = 3
    320 		// TODO: handle currency somehow: ¤, ¤¤, ¤¤¤, ¤¤¤¤
    321 	}
    322 	p.buf = append(p.buf, string(r)...)
    323 	return p.affix
    324 }
    325 
    326 func (p *parser) escapeFirst(r rune) state {
    327 	switch r {
    328 	case '\'':
    329 		p.buf = append(p.buf, "\\'"...)
    330 		return p.affix
    331 	default:
    332 		p.buf = append(p.buf, '\'')
    333 		p.buf = append(p.buf, string(r)...)
    334 	}
    335 	return p.escape
    336 }
    337 
    338 func (p *parser) escape(r rune) state {
    339 	switch r {
    340 	case '\'':
    341 		p.FormatWidth--
    342 		p.buf = append(p.buf, '\'')
    343 		return p.affix
    344 	default:
    345 		p.buf = append(p.buf, string(r)...)
    346 	}
    347 	return p.escape
    348 }
    349 
    350 // number parses a number. The BNF says the integer part should always have
    351 // a '0', but that does not appear to be the case according to the rest of the
    352 // documentation. We will allow having only '#' numbers.
    353 func (p *parser) number(r rune) state {
    354 	switch r {
    355 	case '#':
    356 		p.groupingCount++
    357 		p.leadingSharps++
    358 	case '@':
    359 		p.groupingCount++
    360 		p.leadingSharps = 0
    361 		p.MaxFractionDigits = -1
    362 		return p.sigDigits(r)
    363 	case ',':
    364 		if p.leadingSharps == 0 { // no leading commas
    365 			return nil
    366 		}
    367 		p.updateGrouping()
    368 	case 'E':
    369 		p.MaxIntegerDigits = uint8(p.leadingSharps)
    370 		return p.exponent
    371 	case '.': // allow ".##" etc.
    372 		p.updateGrouping()
    373 		return p.fraction
    374 	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
    375 		return p.integer(r)
    376 	default:
    377 		return nil
    378 	}
    379 	return p.number
    380 }
    381 
    382 func (p *parser) integer(r rune) state {
    383 	if !('0' <= r && r <= '9') {
    384 		var next state
    385 		switch r {
    386 		case 'E':
    387 			if p.leadingSharps > 0 {
    388 				p.MaxIntegerDigits = uint8(p.leadingSharps) + p.MinIntegerDigits
    389 			}
    390 			next = p.exponent
    391 		case '.':
    392 			next = p.fraction
    393 		case ',':
    394 			next = p.integer
    395 		}
    396 		p.updateGrouping()
    397 		return next
    398 	}
    399 	p.Increment = p.Increment*10 + uint32(r-'0')
    400 	p.groupingCount++
    401 	p.MinIntegerDigits++
    402 	return p.integer
    403 }
    404 
    405 func (p *parser) sigDigits(r rune) state {
    406 	switch r {
    407 	case '@':
    408 		p.groupingCount++
    409 		p.MaxSignificantDigits++
    410 		p.MinSignificantDigits++
    411 	case '#':
    412 		return p.sigDigitsFinal(r)
    413 	case 'E':
    414 		p.updateGrouping()
    415 		return p.normalizeSigDigitsWithExponent()
    416 	default:
    417 		p.updateGrouping()
    418 		return nil
    419 	}
    420 	return p.sigDigits
    421 }
    422 
    423 func (p *parser) sigDigitsFinal(r rune) state {
    424 	switch r {
    425 	case '#':
    426 		p.groupingCount++
    427 		p.MaxSignificantDigits++
    428 	case 'E':
    429 		p.updateGrouping()
    430 		return p.normalizeSigDigitsWithExponent()
    431 	default:
    432 		p.updateGrouping()
    433 		return nil
    434 	}
    435 	return p.sigDigitsFinal
    436 }
    437 
    438 func (p *parser) normalizeSigDigitsWithExponent() state {
    439 	p.MinIntegerDigits, p.MaxIntegerDigits = 1, 1
    440 	p.MinFractionDigits = p.MinSignificantDigits - 1
    441 	p.MaxFractionDigits = p.MaxSignificantDigits - 1
    442 	p.MinSignificantDigits, p.MaxSignificantDigits = 0, 0
    443 	return p.exponent
    444 }
    445 
    446 func (p *parser) fraction(r rune) state {
    447 	switch r {
    448 	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
    449 		p.Increment = p.Increment*10 + uint32(r-'0')
    450 		p.IncrementScale++
    451 		p.MinFractionDigits++
    452 		p.MaxFractionDigits++
    453 	case '#':
    454 		p.MaxFractionDigits++
    455 	case 'E':
    456 		if p.leadingSharps > 0 {
    457 			p.MaxIntegerDigits = uint8(p.leadingSharps) + p.MinIntegerDigits
    458 		}
    459 		return p.exponent
    460 	default:
    461 		return nil
    462 	}
    463 	return p.fraction
    464 }
    465 
    466 func (p *parser) exponent(r rune) state {
    467 	switch r {
    468 	case '+':
    469 		// Set mode and check it wasn't already set.
    470 		if p.Flags&AlwaysExpSign != 0 || p.MinExponentDigits > 0 {
    471 			break
    472 		}
    473 		p.Flags |= AlwaysExpSign
    474 		p.doNotTerminate = true
    475 		return p.exponent
    476 	case '0':
    477 		p.MinExponentDigits++
    478 		return p.exponent
    479 	}
    480 	// termination condition
    481 	if p.MinExponentDigits == 0 {
    482 		p.setError(errors.New("format: need at least one digit"))
    483 	}
    484 	return nil
    485 }