encoder.go (12249B)
1 // go-qrcode 2 // Copyright 2014 Tom Harwood 3 4 package qrcode 5 6 import ( 7 "errors" 8 "log" 9 10 bitset "github.com/skip2/go-qrcode/bitset" 11 ) 12 13 // Data encoding. 14 // 15 // The main data portion of a QR Code consists of one or more segments of data. 16 // A segment consists of: 17 // 18 // - The segment Data Mode: numeric, alphanumeric, or byte. 19 // - The length of segment in bits. 20 // - Encoded data. 21 // 22 // For example, the string "123ZZ#!#!" may be represented as: 23 // 24 // [numeric, 3, "123"] [alphanumeric, 2, "ZZ"] [byte, 4, "#!#!"] 25 // 26 // Multiple data modes exist to minimise the size of encoded data. For example, 27 // 8-bit bytes require 8 bits to encode each, but base 10 numeric data can be 28 // encoded at a higher density of 3 numbers (e.g. 123) per 10 bits. 29 // 30 // Some data can be represented in multiple modes. Numeric data can be 31 // represented in all three modes, whereas alphanumeric data (e.g. 'A') can be 32 // represented in alphanumeric and byte mode. 33 // 34 // Starting a new segment (to use a different Data Mode) has a cost, the bits to 35 // state the new segment Data Mode and length. To minimise each QR Code's symbol 36 // size, an optimisation routine coalesces segment types where possible, to 37 // reduce the encoded data length. 38 // 39 // There are several other data modes available (e.g. Kanji mode) which are not 40 // implemented here. 41 42 // A segment encoding mode. 43 type dataMode uint8 44 45 const ( 46 // Each dataMode is a subset of the subsequent dataMode: 47 // dataModeNone < dataModeNumeric < dataModeAlphanumeric < dataModeByte 48 // 49 // This ordering is important for determining which data modes a character can 50 // be encoded with. E.g. 'E' can be encoded in both dataModeAlphanumeric and 51 // dataModeByte. 52 dataModeNone dataMode = 1 << iota 53 dataModeNumeric 54 dataModeAlphanumeric 55 dataModeByte 56 ) 57 58 // dataModeString returns d as a short printable string. 59 func dataModeString(d dataMode) string { 60 switch d { 61 case dataModeNone: 62 return "none" 63 case dataModeNumeric: 64 return "numeric" 65 case dataModeAlphanumeric: 66 return "alphanumeric" 67 case dataModeByte: 68 return "byte" 69 } 70 71 return "unknown" 72 } 73 74 type dataEncoderType uint8 75 76 const ( 77 dataEncoderType1To9 dataEncoderType = iota 78 dataEncoderType10To26 79 dataEncoderType27To40 80 ) 81 82 // segment is a single segment of data. 83 type segment struct { 84 // Data Mode (e.g. numeric). 85 dataMode dataMode 86 87 // segment data (e.g. "abc"). 88 data []byte 89 } 90 91 // A dataEncoder encodes data for a particular QR Code version. 92 type dataEncoder struct { 93 // Minimum & maximum versions supported. 94 minVersion int 95 maxVersion int 96 97 // Mode indicator bit sequences. 98 numericModeIndicator *bitset.Bitset 99 alphanumericModeIndicator *bitset.Bitset 100 byteModeIndicator *bitset.Bitset 101 102 // Character count lengths. 103 numNumericCharCountBits int 104 numAlphanumericCharCountBits int 105 numByteCharCountBits int 106 107 // The raw input data. 108 data []byte 109 110 // The data classified into unoptimised segments. 111 actual []segment 112 113 // The data classified into optimised segments. 114 optimised []segment 115 } 116 117 // newDataEncoder constructs a dataEncoder. 118 func newDataEncoder(t dataEncoderType) *dataEncoder { 119 d := &dataEncoder{} 120 121 switch t { 122 case dataEncoderType1To9: 123 d = &dataEncoder{ 124 minVersion: 1, 125 maxVersion: 9, 126 numericModeIndicator: bitset.New(b0, b0, b0, b1), 127 alphanumericModeIndicator: bitset.New(b0, b0, b1, b0), 128 byteModeIndicator: bitset.New(b0, b1, b0, b0), 129 numNumericCharCountBits: 10, 130 numAlphanumericCharCountBits: 9, 131 numByteCharCountBits: 8, 132 } 133 case dataEncoderType10To26: 134 d = &dataEncoder{ 135 minVersion: 10, 136 maxVersion: 26, 137 numericModeIndicator: bitset.New(b0, b0, b0, b1), 138 alphanumericModeIndicator: bitset.New(b0, b0, b1, b0), 139 byteModeIndicator: bitset.New(b0, b1, b0, b0), 140 numNumericCharCountBits: 12, 141 numAlphanumericCharCountBits: 11, 142 numByteCharCountBits: 16, 143 } 144 case dataEncoderType27To40: 145 d = &dataEncoder{ 146 minVersion: 27, 147 maxVersion: 40, 148 numericModeIndicator: bitset.New(b0, b0, b0, b1), 149 alphanumericModeIndicator: bitset.New(b0, b0, b1, b0), 150 byteModeIndicator: bitset.New(b0, b1, b0, b0), 151 numNumericCharCountBits: 14, 152 numAlphanumericCharCountBits: 13, 153 numByteCharCountBits: 16, 154 } 155 default: 156 log.Panic("Unknown dataEncoderType") 157 } 158 159 return d 160 } 161 162 // encode data as one or more segments and return the encoded data. 163 // 164 // The returned data does not include the terminator bit sequence. 165 func (d *dataEncoder) encode(data []byte) (*bitset.Bitset, error) { 166 d.data = data 167 d.actual = nil 168 d.optimised = nil 169 170 if len(data) == 0 { 171 return nil, errors.New("no data to encode") 172 } 173 174 // Classify data into unoptimised segments. 175 highestRequiredMode := d.classifyDataModes() 176 177 // Optimise segments. 178 err := d.optimiseDataModes() 179 if err != nil { 180 return nil, err 181 } 182 183 // Check if a single byte encoded segment would be more efficient. 184 optimizedLength := 0 185 for _, s := range d.optimised { 186 length, err := d.encodedLength(s.dataMode, len(s.data)) 187 if err != nil { 188 return nil, err 189 } 190 optimizedLength += length 191 } 192 193 singleByteSegmentLength, err := d.encodedLength(highestRequiredMode, len(d.data)) 194 if err != nil { 195 return nil, err 196 } 197 198 if singleByteSegmentLength <= optimizedLength { 199 d.optimised = []segment{segment{dataMode: highestRequiredMode, data: d.data}} 200 } 201 202 // Encode data. 203 encoded := bitset.New() 204 for _, s := range d.optimised { 205 d.encodeDataRaw(s.data, s.dataMode, encoded) 206 } 207 208 return encoded, nil 209 } 210 211 // classifyDataModes classifies the raw data into unoptimised segments. 212 // e.g. "123ZZ#!#!" => 213 // [numeric, 3, "123"] [alphanumeric, 2, "ZZ"] [byte, 4, "#!#!"]. 214 // 215 // Returns the highest data mode needed to encode the data. e.g. for a mixed 216 // numeric/alphanumeric input, the highest is alphanumeric. 217 // 218 // dataModeNone < dataModeNumeric < dataModeAlphanumeric < dataModeByte 219 func (d *dataEncoder) classifyDataModes() dataMode { 220 var start int 221 mode := dataModeNone 222 highestRequiredMode := mode 223 224 for i, v := range d.data { 225 newMode := dataModeNone 226 switch { 227 case v >= 0x30 && v <= 0x39: 228 newMode = dataModeNumeric 229 case v == 0x20 || v == 0x24 || v == 0x25 || v == 0x2a || v == 0x2b || v == 230 0x2d || v == 0x2e || v == 0x2f || v == 0x3a || (v >= 0x41 && v <= 0x5a): 231 newMode = dataModeAlphanumeric 232 default: 233 newMode = dataModeByte 234 } 235 236 if newMode != mode { 237 if i > 0 { 238 d.actual = append(d.actual, segment{dataMode: mode, data: d.data[start:i]}) 239 240 start = i 241 } 242 243 mode = newMode 244 } 245 246 if newMode > highestRequiredMode { 247 highestRequiredMode = newMode 248 } 249 } 250 251 d.actual = append(d.actual, segment{dataMode: mode, data: d.data[start:len(d.data)]}) 252 253 return highestRequiredMode 254 } 255 256 // optimiseDataModes optimises the list of segments to reduce the overall output 257 // encoded data length. 258 // 259 // The algorithm coalesces adjacent segments. segments are only coalesced when 260 // the Data Modes are compatible, and when the coalesced segment has a shorter 261 // encoded length than separate segments. 262 // 263 // Multiple segments may be coalesced. For example a string of alternating 264 // alphanumeric/numeric segments ANANANANA can be optimised to just A. 265 func (d *dataEncoder) optimiseDataModes() error { 266 for i := 0; i < len(d.actual); { 267 mode := d.actual[i].dataMode 268 numChars := len(d.actual[i].data) 269 270 j := i + 1 271 for j < len(d.actual) { 272 nextNumChars := len(d.actual[j].data) 273 nextMode := d.actual[j].dataMode 274 275 if nextMode > mode { 276 break 277 } 278 279 coalescedLength, err := d.encodedLength(mode, numChars+nextNumChars) 280 281 if err != nil { 282 return err 283 } 284 285 seperateLength1, err := d.encodedLength(mode, numChars) 286 287 if err != nil { 288 return err 289 } 290 291 seperateLength2, err := d.encodedLength(nextMode, nextNumChars) 292 293 if err != nil { 294 return err 295 } 296 297 if coalescedLength < seperateLength1+seperateLength2 { 298 j++ 299 numChars += nextNumChars 300 } else { 301 break 302 } 303 } 304 305 optimised := segment{dataMode: mode, 306 data: make([]byte, 0, numChars)} 307 308 for k := i; k < j; k++ { 309 optimised.data = append(optimised.data, d.actual[k].data...) 310 } 311 312 d.optimised = append(d.optimised, optimised) 313 314 i = j 315 } 316 317 return nil 318 } 319 320 // encodeDataRaw encodes data in dataMode. The encoded data is appended to 321 // encoded. 322 func (d *dataEncoder) encodeDataRaw(data []byte, dataMode dataMode, encoded *bitset.Bitset) { 323 modeIndicator := d.modeIndicator(dataMode) 324 charCountBits := d.charCountBits(dataMode) 325 326 // Append mode indicator. 327 encoded.Append(modeIndicator) 328 329 // Append character count. 330 encoded.AppendUint32(uint32(len(data)), charCountBits) 331 332 // Append data. 333 switch dataMode { 334 case dataModeNumeric: 335 for i := 0; i < len(data); i += 3 { 336 charsRemaining := len(data) - i 337 338 var value uint32 339 bitsUsed := 1 340 341 for j := 0; j < charsRemaining && j < 3; j++ { 342 value *= 10 343 value += uint32(data[i+j] - 0x30) 344 bitsUsed += 3 345 } 346 encoded.AppendUint32(value, bitsUsed) 347 } 348 case dataModeAlphanumeric: 349 for i := 0; i < len(data); i += 2 { 350 charsRemaining := len(data) - i 351 352 var value uint32 353 for j := 0; j < charsRemaining && j < 2; j++ { 354 value *= 45 355 value += encodeAlphanumericCharacter(data[i+j]) 356 } 357 358 bitsUsed := 6 359 if charsRemaining > 1 { 360 bitsUsed = 11 361 } 362 363 encoded.AppendUint32(value, bitsUsed) 364 } 365 case dataModeByte: 366 for _, b := range data { 367 encoded.AppendByte(b, 8) 368 } 369 } 370 } 371 372 // modeIndicator returns the segment header bits for a segment of type dataMode. 373 func (d *dataEncoder) modeIndicator(dataMode dataMode) *bitset.Bitset { 374 switch dataMode { 375 case dataModeNumeric: 376 return d.numericModeIndicator 377 case dataModeAlphanumeric: 378 return d.alphanumericModeIndicator 379 case dataModeByte: 380 return d.byteModeIndicator 381 default: 382 log.Panic("Unknown data mode") 383 } 384 385 return nil 386 } 387 388 // charCountBits returns the number of bits used to encode the length of a data 389 // segment of type dataMode. 390 func (d *dataEncoder) charCountBits(dataMode dataMode) int { 391 switch dataMode { 392 case dataModeNumeric: 393 return d.numNumericCharCountBits 394 case dataModeAlphanumeric: 395 return d.numAlphanumericCharCountBits 396 case dataModeByte: 397 return d.numByteCharCountBits 398 default: 399 log.Panic("Unknown data mode") 400 } 401 402 return 0 403 } 404 405 // encodedLength returns the number of bits required to encode n symbols in 406 // dataMode. 407 // 408 // The number of bits required is affected by: 409 // - QR code type - Mode Indicator length. 410 // - Data mode - number of bits used to represent data length. 411 // - Data mode - how the data is encoded. 412 // - Number of symbols encoded. 413 // 414 // An error is returned if the mode is not supported, or the length requested is 415 // too long to be represented. 416 func (d *dataEncoder) encodedLength(dataMode dataMode, n int) (int, error) { 417 modeIndicator := d.modeIndicator(dataMode) 418 charCountBits := d.charCountBits(dataMode) 419 420 if modeIndicator == nil { 421 return 0, errors.New("mode not supported") 422 } 423 424 maxLength := (1 << uint8(charCountBits)) - 1 425 426 if n > maxLength { 427 return 0, errors.New("length too long to be represented") 428 } 429 430 length := modeIndicator.Len() + charCountBits 431 432 switch dataMode { 433 case dataModeNumeric: 434 length += 10 * (n / 3) 435 436 if n%3 != 0 { 437 length += 1 + 3*(n%3) 438 } 439 case dataModeAlphanumeric: 440 length += 11 * (n / 2) 441 length += 6 * (n % 2) 442 case dataModeByte: 443 length += 8 * n 444 } 445 446 return length, nil 447 } 448 449 // encodeAlphanumericChar returns the QR Code encoded value of v. 450 // 451 // v must be a QR Code defined alphanumeric character: 0-9, A-Z, SP, $%*+-./ or 452 // :. The characters are mapped to values in the range 0-44 respectively. 453 func encodeAlphanumericCharacter(v byte) uint32 { 454 c := uint32(v) 455 456 switch { 457 case c >= '0' && c <= '9': 458 // 0-9 encoded as 0-9. 459 return c - '0' 460 case c >= 'A' && c <= 'Z': 461 // A-Z encoded as 10-35. 462 return c - 'A' + 10 463 case c == ' ': 464 return 36 465 case c == '$': 466 return 37 467 case c == '%': 468 return 38 469 case c == '*': 470 return 39 471 case c == '+': 472 return 40 473 case c == '-': 474 return 41 475 case c == '.': 476 return 42 477 case c == '/': 478 return 43 479 case c == ':': 480 return 44 481 default: 482 log.Panicf("encodeAlphanumericCharacter() with non alphanumeric char %v.", v) 483 } 484 485 return 0 486 }