taldir

Directory service to resolve wallet mailboxes by messenger addresses
Log | Files | Refs | Submodules | README | LICENSE

catmsg.go (13906B)


      1 // Copyright 2017 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // Package catmsg contains support types for package x/text/message/catalog.
      6 //
      7 // This package contains the low-level implementations of Message used by the
      8 // catalog package and provides primitives for other packages to implement their
      9 // own. For instance, the plural package provides functionality for selecting
     10 // translation strings based on the plural category of substitution arguments.
     11 //
     12 // # Encoding and Decoding
     13 //
     14 // Catalogs store Messages encoded as a single string. Compiling a message into
     15 // a string both results in compacter representation and speeds up evaluation.
     16 //
     17 // A Message must implement a Compile method to convert its arbitrary
     18 // representation to a string. The Compile method takes an Encoder which
     19 // facilitates serializing the message. Encoders also provide more context of
     20 // the messages's creation (such as for which language the message is intended),
     21 // which may not be known at the time of the creation of the message.
     22 //
     23 // Each message type must also have an accompanying decoder registered to decode
     24 // the message. This decoder takes a Decoder argument which provides the
     25 // counterparts for the decoding.
     26 //
     27 // # Renderers
     28 //
     29 // A Decoder must be initialized with a Renderer implementation. These
     30 // implementations must be provided by packages that use Catalogs, typically
     31 // formatting packages such as x/text/message. A typical user will not need to
     32 // worry about this type; it is only relevant to packages that do string
     33 // formatting and want to use the catalog package to handle localized strings.
     34 //
     35 // A package that uses catalogs for selecting strings receives selection results
     36 // as sequence of substrings passed to the Renderer. The following snippet shows
     37 // how to express the above example using the message package.
     38 //
     39 //	message.Set(language.English, "You are %d minute(s) late.",
     40 //		catalog.Var("minutes", plural.Select(1, "one", "minute")),
     41 //		catalog.String("You are %[1]d ${minutes} late."))
     42 //
     43 //	p := message.NewPrinter(language.English)
     44 //	p.Printf("You are %d minute(s) late.", 5) // always 5 minutes late.
     45 //
     46 // To evaluate the Printf, package message wraps the arguments in a Renderer
     47 // that is passed to the catalog for message decoding. The call sequence that
     48 // results from evaluating the above message, assuming the person is rather
     49 // tardy, is:
     50 //
     51 //	Render("You are %[1]d ")
     52 //	Arg(1)
     53 //	Render("minutes")
     54 //	Render(" late.")
     55 //
     56 // The calls to Arg is caused by the plural.Select execution, which evaluates
     57 // the argument to determine whether the singular or plural message form should
     58 // be selected. The calls to Render reports the partial results to the message
     59 // package for further evaluation.
     60 package catmsg
     61 
     62 import (
     63 	"errors"
     64 	"fmt"
     65 	"strconv"
     66 	"strings"
     67 	"sync"
     68 
     69 	"golang.org/x/text/language"
     70 )
     71 
     72 // A Handle refers to a registered message type.
     73 type Handle int
     74 
     75 // A Handler decodes and evaluates data compiled by a Message and sends the
     76 // result to the Decoder. The output may depend on the value of the substitution
     77 // arguments, accessible by the Decoder's Arg method. The Handler returns false
     78 // if there is no translation for the given substitution arguments.
     79 type Handler func(d *Decoder) bool
     80 
     81 // Register records the existence of a message type and returns a Handle that
     82 // can be used in the Encoder's EncodeMessageType method to create such
     83 // messages. The prefix of the name should be the package path followed by
     84 // an optional disambiguating string.
     85 // Register will panic if a handle for the same name was already registered.
     86 func Register(name string, handler Handler) Handle {
     87 	mutex.Lock()
     88 	defer mutex.Unlock()
     89 
     90 	if _, ok := names[name]; ok {
     91 		panic(fmt.Errorf("catmsg: handler for %q already exists", name))
     92 	}
     93 	h := Handle(len(handlers))
     94 	names[name] = h
     95 	handlers = append(handlers, handler)
     96 	return h
     97 }
     98 
     99 // These handlers require fixed positions in the handlers slice.
    100 const (
    101 	msgVars Handle = iota
    102 	msgFirst
    103 	msgRaw
    104 	msgString
    105 	msgAffix
    106 	// Leave some arbitrary room for future expansion: 20 should suffice.
    107 	numInternal = 20
    108 )
    109 
    110 const prefix = "golang.org/x/text/internal/catmsg."
    111 
    112 var (
    113 	// TODO: find a more stable way to link handles to message types.
    114 	mutex sync.Mutex
    115 	names = map[string]Handle{
    116 		prefix + "Vars":   msgVars,
    117 		prefix + "First":  msgFirst,
    118 		prefix + "Raw":    msgRaw,
    119 		prefix + "String": msgString,
    120 		prefix + "Affix":  msgAffix,
    121 	}
    122 	handlers = make([]Handler, numInternal)
    123 )
    124 
    125 func init() {
    126 	// This handler is a message type wrapper that initializes a decoder
    127 	// with a variable block. This message type, if present, is always at the
    128 	// start of an encoded message.
    129 	handlers[msgVars] = func(d *Decoder) bool {
    130 		blockSize := int(d.DecodeUint())
    131 		d.vars = d.data[:blockSize]
    132 		d.data = d.data[blockSize:]
    133 		return d.executeMessage()
    134 	}
    135 
    136 	// First takes the first message in a sequence that results in a match for
    137 	// the given substitution arguments.
    138 	handlers[msgFirst] = func(d *Decoder) bool {
    139 		for !d.Done() {
    140 			if d.ExecuteMessage() {
    141 				return true
    142 			}
    143 		}
    144 		return false
    145 	}
    146 
    147 	handlers[msgRaw] = func(d *Decoder) bool {
    148 		d.Render(d.data)
    149 		return true
    150 	}
    151 
    152 	// A String message alternates between a string constant and a variable
    153 	// substitution.
    154 	handlers[msgString] = func(d *Decoder) bool {
    155 		for !d.Done() {
    156 			if str := d.DecodeString(); str != "" {
    157 				d.Render(str)
    158 			}
    159 			if d.Done() {
    160 				break
    161 			}
    162 			d.ExecuteSubstitution()
    163 		}
    164 		return true
    165 	}
    166 
    167 	handlers[msgAffix] = func(d *Decoder) bool {
    168 		// TODO: use an alternative method for common cases.
    169 		prefix := d.DecodeString()
    170 		suffix := d.DecodeString()
    171 		if prefix != "" {
    172 			d.Render(prefix)
    173 		}
    174 		ret := d.ExecuteMessage()
    175 		if suffix != "" {
    176 			d.Render(suffix)
    177 		}
    178 		return ret
    179 	}
    180 }
    181 
    182 var (
    183 	// ErrIncomplete indicates a compiled message does not define translations
    184 	// for all possible argument values. If this message is returned, evaluating
    185 	// a message may result in the ErrNoMatch error.
    186 	ErrIncomplete = errors.New("catmsg: incomplete message; may not give result for all inputs")
    187 
    188 	// ErrNoMatch indicates no translation message matched the given input
    189 	// parameters when evaluating a message.
    190 	ErrNoMatch = errors.New("catmsg: no translation for inputs")
    191 )
    192 
    193 // A Message holds a collection of translations for the same phrase that may
    194 // vary based on the values of substitution arguments.
    195 type Message interface {
    196 	// Compile encodes the format string(s) of the message as a string for later
    197 	// evaluation.
    198 	//
    199 	// The first call Compile makes on the encoder must be EncodeMessageType.
    200 	// The handle passed to this call may either be a handle returned by
    201 	// Register to encode a single custom message, or HandleFirst followed by
    202 	// a sequence of calls to EncodeMessage.
    203 	//
    204 	// Compile must return ErrIncomplete if it is possible for evaluation to
    205 	// not match any translation for a given set of formatting parameters.
    206 	// For example, selecting a translation based on plural form may not yield
    207 	// a match if the form "Other" is not one of the selectors.
    208 	//
    209 	// Compile may return any other application-specific error. For backwards
    210 	// compatibility with package like fmt, which often do not do sanity
    211 	// checking of format strings ahead of time, Compile should still make an
    212 	// effort to have some sensible fallback in case of an error.
    213 	Compile(e *Encoder) error
    214 }
    215 
    216 // Compile converts a Message to a data string that can be stored in a Catalog.
    217 // The resulting string can subsequently be decoded by passing to the Execute
    218 // method of a Decoder.
    219 func Compile(tag language.Tag, macros Dictionary, m Message) (data string, err error) {
    220 	// TODO: pass macros so they can be used for validation.
    221 	v := &Encoder{inBody: true} // encoder for variables
    222 	v.root = v
    223 	e := &Encoder{root: v, parent: v, tag: tag} // encoder for messages
    224 	err = m.Compile(e)
    225 	// This package serves te message package, which in turn is meant to be a
    226 	// drop-in replacement for fmt.  With the fmt package, format strings are
    227 	// evaluated lazily and errors are handled by substituting strings in the
    228 	// result, rather then returning an error. Dealing with multiple languages
    229 	// makes it more important to check errors ahead of time. We chose to be
    230 	// consistent and compatible and allow graceful degradation in case of
    231 	// errors.
    232 	buf := e.buf[stripPrefix(e.buf):]
    233 	if len(v.buf) > 0 {
    234 		// Prepend variable block.
    235 		b := make([]byte, 1+maxVarintBytes+len(v.buf)+len(buf))
    236 		b[0] = byte(msgVars)
    237 		b = b[:1+encodeUint(b[1:], uint64(len(v.buf)))]
    238 		b = append(b, v.buf...)
    239 		b = append(b, buf...)
    240 		buf = b
    241 	}
    242 	if err == nil {
    243 		err = v.err
    244 	}
    245 	return string(buf), err
    246 }
    247 
    248 // FirstOf is a message type that prints the first message in the sequence that
    249 // resolves to a match for the given substitution arguments.
    250 type FirstOf []Message
    251 
    252 // Compile implements Message.
    253 func (s FirstOf) Compile(e *Encoder) error {
    254 	e.EncodeMessageType(msgFirst)
    255 	err := ErrIncomplete
    256 	for i, m := range s {
    257 		if err == nil {
    258 			return fmt.Errorf("catalog: message argument %d is complete and blocks subsequent messages", i-1)
    259 		}
    260 		err = e.EncodeMessage(m)
    261 	}
    262 	return err
    263 }
    264 
    265 // Var defines a message that can be substituted for a placeholder of the same
    266 // name. If an expression does not result in a string after evaluation, Name is
    267 // used as the substitution. For example:
    268 //
    269 //	Var{
    270 //	  Name:    "minutes",
    271 //	  Message: plural.Select(1, "one", "minute"),
    272 //	}
    273 //
    274 // will resolve to minute for singular and minutes for plural forms.
    275 type Var struct {
    276 	Name    string
    277 	Message Message
    278 }
    279 
    280 var errIsVar = errors.New("catmsg: variable used as message")
    281 
    282 // Compile implements Message.
    283 //
    284 // Note that this method merely registers a variable; it does not create an
    285 // encoded message.
    286 func (v *Var) Compile(e *Encoder) error {
    287 	if err := e.addVar(v.Name, v.Message); err != nil {
    288 		return err
    289 	}
    290 	// Using a Var by itself is an error. If it is in a sequence followed by
    291 	// other messages referring to it, this error will be ignored.
    292 	return errIsVar
    293 }
    294 
    295 // Raw is a message consisting of a single format string that is passed as is
    296 // to the Renderer.
    297 //
    298 // Note that a Renderer may still do its own variable substitution.
    299 type Raw string
    300 
    301 // Compile implements Message.
    302 func (r Raw) Compile(e *Encoder) (err error) {
    303 	e.EncodeMessageType(msgRaw)
    304 	// Special case: raw strings don't have a size encoding and so don't use
    305 	// EncodeString.
    306 	e.buf = append(e.buf, r...)
    307 	return nil
    308 }
    309 
    310 // String is a message consisting of a single format string which contains
    311 // placeholders that may be substituted with variables.
    312 //
    313 // Variable substitutions are marked with placeholders and a variable name of
    314 // the form ${name}. Any other substitutions such as Go templates or
    315 // printf-style substitutions are left to be done by the Renderer.
    316 //
    317 // When evaluation a string interpolation, a Renderer will receive separate
    318 // calls for each placeholder and interstitial string. For example, for the
    319 // message: "%[1]v ${invites} %[2]v to ${their} party." The sequence of calls
    320 // is:
    321 //
    322 //	d.Render("%[1]v ")
    323 //	d.Arg(1)
    324 //	d.Render(resultOfInvites)
    325 //	d.Render(" %[2]v to ")
    326 //	d.Arg(2)
    327 //	d.Render(resultOfTheir)
    328 //	d.Render(" party.")
    329 //
    330 // where the messages for "invites" and "their" both use a plural.Select
    331 // referring to the first argument.
    332 //
    333 // Strings may also invoke macros. Macros are essentially variables that can be
    334 // reused. Macros may, for instance, be used to make selections between
    335 // different conjugations of a verb. See the catalog package description for an
    336 // overview of macros.
    337 type String string
    338 
    339 // Compile implements Message. It parses the placeholder formats and returns
    340 // any error.
    341 func (s String) Compile(e *Encoder) (err error) {
    342 	msg := string(s)
    343 	const subStart = "${"
    344 	hasHeader := false
    345 	p := 0
    346 	b := []byte{}
    347 	for {
    348 		i := strings.Index(msg[p:], subStart)
    349 		if i == -1 {
    350 			break
    351 		}
    352 		b = append(b, msg[p:p+i]...)
    353 		p += i + len(subStart)
    354 		if i = strings.IndexByte(msg[p:], '}'); i == -1 {
    355 			b = append(b, "$!(MISSINGBRACE)"...)
    356 			err = fmt.Errorf("catmsg: missing '}'")
    357 			p = len(msg)
    358 			break
    359 		}
    360 		name := strings.TrimSpace(msg[p : p+i])
    361 		if q := strings.IndexByte(name, '('); q == -1 {
    362 			if !hasHeader {
    363 				hasHeader = true
    364 				e.EncodeMessageType(msgString)
    365 			}
    366 			e.EncodeString(string(b))
    367 			e.EncodeSubstitution(name)
    368 			b = b[:0]
    369 		} else if j := strings.IndexByte(name[q:], ')'); j == -1 {
    370 			// TODO: what should the error be?
    371 			b = append(b, "$!(MISSINGPAREN)"...)
    372 			err = fmt.Errorf("catmsg: missing ')'")
    373 		} else if x, sErr := strconv.ParseUint(strings.TrimSpace(name[q+1:q+j]), 10, 32); sErr != nil {
    374 			// TODO: handle more than one argument
    375 			b = append(b, "$!(BADNUM)"...)
    376 			err = fmt.Errorf("catmsg: invalid number %q", strings.TrimSpace(name[q+1:q+j]))
    377 		} else {
    378 			if !hasHeader {
    379 				hasHeader = true
    380 				e.EncodeMessageType(msgString)
    381 			}
    382 			e.EncodeString(string(b))
    383 			e.EncodeSubstitution(name[:q], int(x))
    384 			b = b[:0]
    385 		}
    386 		p += i + 1
    387 	}
    388 	b = append(b, msg[p:]...)
    389 	if !hasHeader {
    390 		// Simplify string to a raw string.
    391 		Raw(string(b)).Compile(e)
    392 	} else if len(b) > 0 {
    393 		e.EncodeString(string(b))
    394 	}
    395 	return err
    396 }
    397 
    398 // Affix is a message that adds a prefix and suffix to another message.
    399 // This is mostly used add back whitespace to a translation that was stripped
    400 // before sending it out.
    401 type Affix struct {
    402 	Message Message
    403 	Prefix  string
    404 	Suffix  string
    405 }
    406 
    407 // Compile implements Message.
    408 func (a Affix) Compile(e *Encoder) (err error) {
    409 	// TODO: consider adding a special message type that just adds a single
    410 	// return. This is probably common enough to handle the majority of cases.
    411 	// Get some stats first, though.
    412 	e.EncodeMessageType(msgAffix)
    413 	e.EncodeString(a.Prefix)
    414 	e.EncodeString(a.Suffix)
    415 	e.EncodeMessage(a.Message)
    416 	return nil
    417 }