taldir

Directory service to resolve wallet mailboxes by messenger addresses
Log | Files | Refs | Submodules | README | LICENSE

encoder.go (12249B)


      1 // go-qrcode
      2 // Copyright 2014 Tom Harwood
      3 
      4 package qrcode
      5 
      6 import (
      7 	"errors"
      8 	"log"
      9 
     10 	bitset "github.com/skip2/go-qrcode/bitset"
     11 )
     12 
     13 // Data encoding.
     14 //
     15 // The main data portion of a QR Code consists of one or more segments of data.
     16 // A segment consists of:
     17 //
     18 // - The segment Data Mode: numeric, alphanumeric, or byte.
     19 // - The length of segment in bits.
     20 // - Encoded data.
     21 //
     22 // For example, the string "123ZZ#!#!" may be represented as:
     23 //
     24 // [numeric, 3, "123"] [alphanumeric, 2, "ZZ"] [byte, 4, "#!#!"]
     25 //
     26 // Multiple data modes exist to minimise the size of encoded data. For example,
     27 // 8-bit bytes require 8 bits to encode each, but base 10 numeric data can be
     28 // encoded at a higher density of 3 numbers (e.g. 123) per 10 bits.
     29 //
     30 // Some data can be represented in multiple modes. Numeric data can be
     31 // represented in all three modes, whereas alphanumeric data (e.g. 'A') can be
     32 // represented in alphanumeric and byte mode.
     33 //
     34 // Starting a new segment (to use a different Data Mode) has a cost, the bits to
     35 // state the new segment Data Mode and length. To minimise each QR Code's symbol
     36 // size, an optimisation routine coalesces segment types where possible, to
     37 // reduce the encoded data length.
     38 //
     39 // There are several other data modes available (e.g. Kanji mode) which are not
     40 // implemented here.
     41 
     42 // A segment encoding mode.
     43 type dataMode uint8
     44 
     45 const (
     46 	// Each dataMode is a subset of the subsequent dataMode:
     47 	// dataModeNone < dataModeNumeric < dataModeAlphanumeric < dataModeByte
     48 	//
     49 	// This ordering is important for determining which data modes a character can
     50 	// be encoded with. E.g. 'E' can be encoded in both dataModeAlphanumeric and
     51 	// dataModeByte.
     52 	dataModeNone dataMode = 1 << iota
     53 	dataModeNumeric
     54 	dataModeAlphanumeric
     55 	dataModeByte
     56 )
     57 
     58 // dataModeString returns d as a short printable string.
     59 func dataModeString(d dataMode) string {
     60 	switch d {
     61 	case dataModeNone:
     62 		return "none"
     63 	case dataModeNumeric:
     64 		return "numeric"
     65 	case dataModeAlphanumeric:
     66 		return "alphanumeric"
     67 	case dataModeByte:
     68 		return "byte"
     69 	}
     70 
     71 	return "unknown"
     72 }
     73 
     74 type dataEncoderType uint8
     75 
     76 const (
     77 	dataEncoderType1To9 dataEncoderType = iota
     78 	dataEncoderType10To26
     79 	dataEncoderType27To40
     80 )
     81 
     82 // segment is a single segment of data.
     83 type segment struct {
     84 	// Data Mode (e.g. numeric).
     85 	dataMode dataMode
     86 
     87 	// segment data (e.g. "abc").
     88 	data []byte
     89 }
     90 
     91 // A dataEncoder encodes data for a particular QR Code version.
     92 type dataEncoder struct {
     93 	// Minimum & maximum versions supported.
     94 	minVersion int
     95 	maxVersion int
     96 
     97 	// Mode indicator bit sequences.
     98 	numericModeIndicator      *bitset.Bitset
     99 	alphanumericModeIndicator *bitset.Bitset
    100 	byteModeIndicator         *bitset.Bitset
    101 
    102 	// Character count lengths.
    103 	numNumericCharCountBits      int
    104 	numAlphanumericCharCountBits int
    105 	numByteCharCountBits         int
    106 
    107 	// The raw input data.
    108 	data []byte
    109 
    110 	// The data classified into unoptimised segments.
    111 	actual []segment
    112 
    113 	// The data classified into optimised segments.
    114 	optimised []segment
    115 }
    116 
    117 // newDataEncoder constructs a dataEncoder.
    118 func newDataEncoder(t dataEncoderType) *dataEncoder {
    119 	d := &dataEncoder{}
    120 
    121 	switch t {
    122 	case dataEncoderType1To9:
    123 		d = &dataEncoder{
    124 			minVersion:                   1,
    125 			maxVersion:                   9,
    126 			numericModeIndicator:         bitset.New(b0, b0, b0, b1),
    127 			alphanumericModeIndicator:    bitset.New(b0, b0, b1, b0),
    128 			byteModeIndicator:            bitset.New(b0, b1, b0, b0),
    129 			numNumericCharCountBits:      10,
    130 			numAlphanumericCharCountBits: 9,
    131 			numByteCharCountBits:         8,
    132 		}
    133 	case dataEncoderType10To26:
    134 		d = &dataEncoder{
    135 			minVersion:                   10,
    136 			maxVersion:                   26,
    137 			numericModeIndicator:         bitset.New(b0, b0, b0, b1),
    138 			alphanumericModeIndicator:    bitset.New(b0, b0, b1, b0),
    139 			byteModeIndicator:            bitset.New(b0, b1, b0, b0),
    140 			numNumericCharCountBits:      12,
    141 			numAlphanumericCharCountBits: 11,
    142 			numByteCharCountBits:         16,
    143 		}
    144 	case dataEncoderType27To40:
    145 		d = &dataEncoder{
    146 			minVersion:                   27,
    147 			maxVersion:                   40,
    148 			numericModeIndicator:         bitset.New(b0, b0, b0, b1),
    149 			alphanumericModeIndicator:    bitset.New(b0, b0, b1, b0),
    150 			byteModeIndicator:            bitset.New(b0, b1, b0, b0),
    151 			numNumericCharCountBits:      14,
    152 			numAlphanumericCharCountBits: 13,
    153 			numByteCharCountBits:         16,
    154 		}
    155 	default:
    156 		log.Panic("Unknown dataEncoderType")
    157 	}
    158 
    159 	return d
    160 }
    161 
    162 // encode data as one or more segments and return the encoded data.
    163 //
    164 // The returned data does not include the terminator bit sequence.
    165 func (d *dataEncoder) encode(data []byte) (*bitset.Bitset, error) {
    166 	d.data = data
    167 	d.actual = nil
    168 	d.optimised = nil
    169 
    170 	if len(data) == 0 {
    171 		return nil, errors.New("no data to encode")
    172 	}
    173 
    174 	// Classify data into unoptimised segments.
    175 	highestRequiredMode := d.classifyDataModes()
    176 
    177 	// Optimise segments.
    178 	err := d.optimiseDataModes()
    179 	if err != nil {
    180 		return nil, err
    181 	}
    182 
    183 	// Check if a single byte encoded segment would be more efficient.
    184 	optimizedLength := 0
    185 	for _, s := range d.optimised {
    186 		length, err := d.encodedLength(s.dataMode, len(s.data))
    187 		if err != nil {
    188 			return nil, err
    189 		}
    190 		optimizedLength += length
    191 	}
    192 
    193 	singleByteSegmentLength, err := d.encodedLength(highestRequiredMode, len(d.data))
    194 	if err != nil {
    195 		return nil, err
    196 	}
    197 
    198 	if singleByteSegmentLength <= optimizedLength {
    199 		d.optimised = []segment{segment{dataMode: highestRequiredMode, data: d.data}}
    200 	}
    201 
    202 	// Encode data.
    203 	encoded := bitset.New()
    204 	for _, s := range d.optimised {
    205 		d.encodeDataRaw(s.data, s.dataMode, encoded)
    206 	}
    207 
    208 	return encoded, nil
    209 }
    210 
    211 // classifyDataModes classifies the raw data into unoptimised segments.
    212 // e.g. "123ZZ#!#!" =>
    213 // [numeric, 3, "123"] [alphanumeric, 2, "ZZ"] [byte, 4, "#!#!"].
    214 //
    215 // Returns the highest data mode needed to encode the data. e.g. for a mixed
    216 // numeric/alphanumeric input, the highest is alphanumeric.
    217 //
    218 // dataModeNone < dataModeNumeric < dataModeAlphanumeric < dataModeByte
    219 func (d *dataEncoder) classifyDataModes() dataMode {
    220 	var start int
    221 	mode := dataModeNone
    222 	highestRequiredMode := mode
    223 
    224 	for i, v := range d.data {
    225 		newMode := dataModeNone
    226 		switch {
    227 		case v >= 0x30 && v <= 0x39:
    228 			newMode = dataModeNumeric
    229 		case v == 0x20 || v == 0x24 || v == 0x25 || v == 0x2a || v == 0x2b || v ==
    230 			0x2d || v == 0x2e || v == 0x2f || v == 0x3a || (v >= 0x41 && v <= 0x5a):
    231 			newMode = dataModeAlphanumeric
    232 		default:
    233 			newMode = dataModeByte
    234 		}
    235 
    236 		if newMode != mode {
    237 			if i > 0 {
    238 				d.actual = append(d.actual, segment{dataMode: mode, data: d.data[start:i]})
    239 
    240 				start = i
    241 			}
    242 
    243 			mode = newMode
    244 		}
    245 
    246 		if newMode > highestRequiredMode {
    247 			highestRequiredMode = newMode
    248 		}
    249 	}
    250 
    251 	d.actual = append(d.actual, segment{dataMode: mode, data: d.data[start:len(d.data)]})
    252 
    253 	return highestRequiredMode
    254 }
    255 
    256 // optimiseDataModes optimises the list of segments to reduce the overall output
    257 // encoded data length.
    258 //
    259 // The algorithm coalesces adjacent segments. segments are only coalesced when
    260 // the Data Modes are compatible, and when the coalesced segment has a shorter
    261 // encoded length than separate segments.
    262 //
    263 // Multiple segments may be coalesced. For example a string of alternating
    264 // alphanumeric/numeric segments ANANANANA can be optimised to just A.
    265 func (d *dataEncoder) optimiseDataModes() error {
    266 	for i := 0; i < len(d.actual); {
    267 		mode := d.actual[i].dataMode
    268 		numChars := len(d.actual[i].data)
    269 
    270 		j := i + 1
    271 		for j < len(d.actual) {
    272 			nextNumChars := len(d.actual[j].data)
    273 			nextMode := d.actual[j].dataMode
    274 
    275 			if nextMode > mode {
    276 				break
    277 			}
    278 
    279 			coalescedLength, err := d.encodedLength(mode, numChars+nextNumChars)
    280 
    281 			if err != nil {
    282 				return err
    283 			}
    284 
    285 			seperateLength1, err := d.encodedLength(mode, numChars)
    286 
    287 			if err != nil {
    288 				return err
    289 			}
    290 
    291 			seperateLength2, err := d.encodedLength(nextMode, nextNumChars)
    292 
    293 			if err != nil {
    294 				return err
    295 			}
    296 
    297 			if coalescedLength < seperateLength1+seperateLength2 {
    298 				j++
    299 				numChars += nextNumChars
    300 			} else {
    301 				break
    302 			}
    303 		}
    304 
    305 		optimised := segment{dataMode: mode,
    306 			data: make([]byte, 0, numChars)}
    307 
    308 		for k := i; k < j; k++ {
    309 			optimised.data = append(optimised.data, d.actual[k].data...)
    310 		}
    311 
    312 		d.optimised = append(d.optimised, optimised)
    313 
    314 		i = j
    315 	}
    316 
    317 	return nil
    318 }
    319 
    320 // encodeDataRaw encodes data in dataMode. The encoded data is appended to
    321 // encoded.
    322 func (d *dataEncoder) encodeDataRaw(data []byte, dataMode dataMode, encoded *bitset.Bitset) {
    323 	modeIndicator := d.modeIndicator(dataMode)
    324 	charCountBits := d.charCountBits(dataMode)
    325 
    326 	// Append mode indicator.
    327 	encoded.Append(modeIndicator)
    328 
    329 	// Append character count.
    330 	encoded.AppendUint32(uint32(len(data)), charCountBits)
    331 
    332 	// Append data.
    333 	switch dataMode {
    334 	case dataModeNumeric:
    335 		for i := 0; i < len(data); i += 3 {
    336 			charsRemaining := len(data) - i
    337 
    338 			var value uint32
    339 			bitsUsed := 1
    340 
    341 			for j := 0; j < charsRemaining && j < 3; j++ {
    342 				value *= 10
    343 				value += uint32(data[i+j] - 0x30)
    344 				bitsUsed += 3
    345 			}
    346 			encoded.AppendUint32(value, bitsUsed)
    347 		}
    348 	case dataModeAlphanumeric:
    349 		for i := 0; i < len(data); i += 2 {
    350 			charsRemaining := len(data) - i
    351 
    352 			var value uint32
    353 			for j := 0; j < charsRemaining && j < 2; j++ {
    354 				value *= 45
    355 				value += encodeAlphanumericCharacter(data[i+j])
    356 			}
    357 
    358 			bitsUsed := 6
    359 			if charsRemaining > 1 {
    360 				bitsUsed = 11
    361 			}
    362 
    363 			encoded.AppendUint32(value, bitsUsed)
    364 		}
    365 	case dataModeByte:
    366 		for _, b := range data {
    367 			encoded.AppendByte(b, 8)
    368 		}
    369 	}
    370 }
    371 
    372 // modeIndicator returns the segment header bits for a segment of type dataMode.
    373 func (d *dataEncoder) modeIndicator(dataMode dataMode) *bitset.Bitset {
    374 	switch dataMode {
    375 	case dataModeNumeric:
    376 		return d.numericModeIndicator
    377 	case dataModeAlphanumeric:
    378 		return d.alphanumericModeIndicator
    379 	case dataModeByte:
    380 		return d.byteModeIndicator
    381 	default:
    382 		log.Panic("Unknown data mode")
    383 	}
    384 
    385 	return nil
    386 }
    387 
    388 // charCountBits returns the number of bits used to encode the length of a data
    389 // segment of type dataMode.
    390 func (d *dataEncoder) charCountBits(dataMode dataMode) int {
    391 	switch dataMode {
    392 	case dataModeNumeric:
    393 		return d.numNumericCharCountBits
    394 	case dataModeAlphanumeric:
    395 		return d.numAlphanumericCharCountBits
    396 	case dataModeByte:
    397 		return d.numByteCharCountBits
    398 	default:
    399 		log.Panic("Unknown data mode")
    400 	}
    401 
    402 	return 0
    403 }
    404 
    405 // encodedLength returns the number of bits required to encode n symbols in
    406 // dataMode.
    407 //
    408 // The number of bits required is affected by:
    409 //	- QR code type - Mode Indicator length.
    410 //	- Data mode - number of bits used to represent data length.
    411 //	- Data mode - how the data is encoded.
    412 //	- Number of symbols encoded.
    413 //
    414 // An error is returned if the mode is not supported, or the length requested is
    415 // too long to be represented.
    416 func (d *dataEncoder) encodedLength(dataMode dataMode, n int) (int, error) {
    417 	modeIndicator := d.modeIndicator(dataMode)
    418 	charCountBits := d.charCountBits(dataMode)
    419 
    420 	if modeIndicator == nil {
    421 		return 0, errors.New("mode not supported")
    422 	}
    423 
    424 	maxLength := (1 << uint8(charCountBits)) - 1
    425 
    426 	if n > maxLength {
    427 		return 0, errors.New("length too long to be represented")
    428 	}
    429 
    430 	length := modeIndicator.Len() + charCountBits
    431 
    432 	switch dataMode {
    433 	case dataModeNumeric:
    434 		length += 10 * (n / 3)
    435 
    436 		if n%3 != 0 {
    437 			length += 1 + 3*(n%3)
    438 		}
    439 	case dataModeAlphanumeric:
    440 		length += 11 * (n / 2)
    441 		length += 6 * (n % 2)
    442 	case dataModeByte:
    443 		length += 8 * n
    444 	}
    445 
    446 	return length, nil
    447 }
    448 
    449 // encodeAlphanumericChar returns the QR Code encoded value of v.
    450 //
    451 // v must be a QR Code defined alphanumeric character: 0-9, A-Z, SP, $%*+-./ or
    452 // :. The characters are mapped to values in the range 0-44 respectively.
    453 func encodeAlphanumericCharacter(v byte) uint32 {
    454 	c := uint32(v)
    455 
    456 	switch {
    457 	case c >= '0' && c <= '9':
    458 		// 0-9 encoded as 0-9.
    459 		return c - '0'
    460 	case c >= 'A' && c <= 'Z':
    461 		// A-Z encoded as 10-35.
    462 		return c - 'A' + 10
    463 	case c == ' ':
    464 		return 36
    465 	case c == '$':
    466 		return 37
    467 	case c == '%':
    468 		return 38
    469 	case c == '*':
    470 		return 39
    471 	case c == '+':
    472 		return 40
    473 	case c == '-':
    474 		return 41
    475 	case c == '.':
    476 		return 42
    477 	case c == '/':
    478 		return 43
    479 	case c == ':':
    480 		return 44
    481 	default:
    482 		log.Panicf("encodeAlphanumericCharacter() with non alphanumeric char %v.", v)
    483 	}
    484 
    485 	return 0
    486 }