h2_huffman_codec.c (48091B)
1 /* SPDX-License-Identifier: LGPL-2.1-or-later OR (GPL-2.0-or-later WITH eCos-exception-2.0) */ 2 /* 3 This file is part of GNU libmicrohttpd. 4 Copyright (C) 2025 Evgeny Grin (Karlson2k) 5 6 GNU libmicrohttpd is free software; you can redistribute it and/or 7 modify it under the terms of the GNU Lesser General Public 8 License as published by the Free Software Foundation; either 9 version 2.1 of the License, or (at your option) any later version. 10 11 GNU libmicrohttpd is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 Lesser General Public License for more details. 15 16 Alternatively, you can redistribute GNU libmicrohttpd and/or 17 modify it under the terms of the GNU General Public License as 18 published by the Free Software Foundation; either version 2 of 19 the License, or (at your option) any later version, together 20 with the eCos exception, as follows: 21 22 As a special exception, if other files instantiate templates or 23 use macros or inline functions from this file, or you compile this 24 file and link it with other works to produce a work based on this 25 file, this file does not by itself cause the resulting work to be 26 covered by the GNU General Public License. However the source code 27 for this file must still be made available in accordance with 28 section (3) of the GNU General Public License v2. 29 30 This exception does not invalidate any other reasons why a work 31 based on this file might be covered by the GNU General Public 32 License. 33 34 You should have received copies of the GNU Lesser General Public 35 License and the GNU General Public License along with this library; 36 if not, see <https://www.gnu.org/licenses/>. 37 */ 38 39 /** 40 * @file src/mhd2/h2/hpack/h2_huffman_codec.c 41 * @brief The implementation of HTTP/2 Huffman encoding and decoding functions 42 * @author Karlson2k (Evgeny Grin) 43 */ 44 45 46 #include "mhd_sys_options.h" 47 48 #include "sys_bool_type.h" 49 #include "sys_base_types.h" 50 #include <string.h> 51 52 #include "mhd_assert.h" 53 54 #include "mhd_constexpr.h" 55 #include "mhd_predict.h" 56 #include "mhd_align.h" 57 58 #include "mhd_bithelpers.h" 59 60 #include "h2_huffman_codec.h" 61 62 /** 63 * @def MHD_USE_CODE_HARDENING 64 * Enable additional code-hardening checks. 65 * 66 * Automatically enabled when unit-testing. 67 */ 68 #if defined(MHD_UNIT_TESTING) 69 # if ! defined(MHD_USE_CODE_HARDENING) 70 # define MHD_USE_CODE_HARDENING 1 71 # endif 72 #endif 73 74 /** 75 * HTTP/2 static Huffman codes from RFC 7541 76 * 77 * Index is the character value (0..255). 78 * The table values are codes that are left-algined (MSB-aligned) to 79 * the 32 bit value. 80 * See https://www.rfc-editor.org/rfc/rfc7541.html#appendix-B 81 * 82 * @note The table does not include EOS code. 83 */ 84 mhd_constexpr uint_least32_t mhd_h2huff_code_by_sym[256] = { 85 /* ( 0) */ 0xFFC00000u, 86 /* ( 1) */ 0xFFFFB000u, 87 /* ( 2) */ 0xFFFFFE20u, 88 /* ( 3) */ 0xFFFFFE30u, 89 /* ( 4) */ 0xFFFFFE40u, 90 /* ( 5) */ 0xFFFFFE50u, 91 /* ( 6) */ 0xFFFFFE60u, 92 /* ( 7) */ 0xFFFFFE70u, 93 /* ( 8) */ 0xFFFFFE80u, 94 /* ( 9) */ 0xFFFFEA00u, 95 /* ( 10) */ 0xFFFFFFF0u, 96 /* ( 11) */ 0xFFFFFE90u, 97 /* ( 12) */ 0xFFFFFEA0u, 98 /* ( 13) */ 0xFFFFFFF4u, 99 /* ( 14) */ 0xFFFFFEB0u, 100 /* ( 15) */ 0xFFFFFEC0u, 101 /* ( 16) */ 0xFFFFFED0u, 102 /* ( 17) */ 0xFFFFFEE0u, 103 /* ( 18) */ 0xFFFFFEF0u, 104 /* ( 19) */ 0xFFFFFF00u, 105 /* ( 20) */ 0xFFFFFF10u, 106 /* ( 21) */ 0xFFFFFF20u, 107 /* ( 22) */ 0xFFFFFFF8u, 108 /* ( 23) */ 0xFFFFFF30u, 109 /* ( 24) */ 0xFFFFFF40u, 110 /* ( 25) */ 0xFFFFFF50u, 111 /* ( 26) */ 0xFFFFFF60u, 112 /* ( 27) */ 0xFFFFFF70u, 113 /* ( 28) */ 0xFFFFFF80u, 114 /* ( 29) */ 0xFFFFFF90u, 115 /* ( 30) */ 0xFFFFFFA0u, 116 /* ( 31) */ 0xFFFFFFB0u, 117 /* ' ' ( 32) */ 0x50000000u, 118 /* '!' ( 33) */ 0xFE000000u, 119 /* '"' ( 34) */ 0xFE400000u, 120 /* '#' ( 35) */ 0xFFA00000u, 121 /* '$' ( 36) */ 0xFFC80000u, 122 /* '%' ( 37) */ 0x54000000u, 123 /* '&' ( 38) */ 0xF8000000u, 124 /* ''' ( 39) */ 0xFF400000u, 125 /* '(' ( 40) */ 0xFE800000u, 126 /* ')' ( 41) */ 0xFEC00000u, 127 /* '*' ( 42) */ 0xF9000000u, 128 /* '+' ( 43) */ 0xFF600000u, 129 /* ',' ( 44) */ 0xFA000000u, 130 /* '-' ( 45) */ 0x58000000u, 131 /* '.' ( 46) */ 0x5C000000u, 132 /* '/' ( 47) */ 0x60000000u, 133 /* '0' ( 48) */ 0x00000000u, 134 /* '1' ( 49) */ 0x08000000u, 135 /* '2' ( 50) */ 0x10000000u, 136 /* '3' ( 51) */ 0x64000000u, 137 /* '4' ( 52) */ 0x68000000u, 138 /* '5' ( 53) */ 0x6C000000u, 139 /* '6' ( 54) */ 0x70000000u, 140 /* '7' ( 55) */ 0x74000000u, 141 /* '8' ( 56) */ 0x78000000u, 142 /* '9' ( 57) */ 0x7C000000u, 143 /* ':' ( 58) */ 0xB8000000u, 144 /* ';' ( 59) */ 0xFB000000u, 145 /* '<' ( 60) */ 0xFFF80000u, 146 /* '=' ( 61) */ 0x80000000u, 147 /* '>' ( 62) */ 0xFFB00000u, 148 /* '?' ( 63) */ 0xFF000000u, 149 /* '@' ( 64) */ 0xFFD00000u, 150 /* 'A' ( 65) */ 0x84000000u, 151 /* 'B' ( 66) */ 0xBA000000u, 152 /* 'C' ( 67) */ 0xBC000000u, 153 /* 'D' ( 68) */ 0xBE000000u, 154 /* 'E' ( 69) */ 0xC0000000u, 155 /* 'F' ( 70) */ 0xC2000000u, 156 /* 'G' ( 71) */ 0xC4000000u, 157 /* 'H' ( 72) */ 0xC6000000u, 158 /* 'I' ( 73) */ 0xC8000000u, 159 /* 'J' ( 74) */ 0xCA000000u, 160 /* 'K' ( 75) */ 0xCC000000u, 161 /* 'L' ( 76) */ 0xCE000000u, 162 /* 'M' ( 77) */ 0xD0000000u, 163 /* 'N' ( 78) */ 0xD2000000u, 164 /* 'O' ( 79) */ 0xD4000000u, 165 /* 'P' ( 80) */ 0xD6000000u, 166 /* 'Q' ( 81) */ 0xD8000000u, 167 /* 'R' ( 82) */ 0xDA000000u, 168 /* 'S' ( 83) */ 0xDC000000u, 169 /* 'T' ( 84) */ 0xDE000000u, 170 /* 'U' ( 85) */ 0xE0000000u, 171 /* 'V' ( 86) */ 0xE2000000u, 172 /* 'W' ( 87) */ 0xE4000000u, 173 /* 'X' ( 88) */ 0xFC000000u, 174 /* 'Y' ( 89) */ 0xE6000000u, 175 /* 'Z' ( 90) */ 0xFD000000u, 176 /* '[' ( 91) */ 0xFFD80000u, 177 /* '\' ( 92) */ 0xFFFE0000u, 178 /* ']' ( 93) */ 0xFFE00000u, 179 /* '^' ( 94) */ 0xFFF00000u, 180 /* '_' ( 95) */ 0x88000000u, 181 /* '`' ( 96) */ 0xFFFA0000u, 182 /* 'a' ( 97) */ 0x18000000u, 183 /* 'b' ( 98) */ 0x8C000000u, 184 /* 'c' ( 99) */ 0x20000000u, 185 /* 'd' (100) */ 0x90000000u, 186 /* 'e' (101) */ 0x28000000u, 187 /* 'f' (102) */ 0x94000000u, 188 /* 'g' (103) */ 0x98000000u, 189 /* 'h' (104) */ 0x9C000000u, 190 /* 'i' (105) */ 0x30000000u, 191 /* 'j' (106) */ 0xE8000000u, 192 /* 'k' (107) */ 0xEA000000u, 193 /* 'l' (108) */ 0xA0000000u, 194 /* 'm' (109) */ 0xA4000000u, 195 /* 'n' (110) */ 0xA8000000u, 196 /* 'o' (111) */ 0x38000000u, 197 /* 'p' (112) */ 0xAC000000u, 198 /* 'q' (113) */ 0xEC000000u, 199 /* 'r' (114) */ 0xB0000000u, 200 /* 's' (115) */ 0x40000000u, 201 /* 't' (116) */ 0x48000000u, 202 /* 'u' (117) */ 0xB4000000u, 203 /* 'v' (118) */ 0xEE000000u, 204 /* 'w' (119) */ 0xF0000000u, 205 /* 'x' (120) */ 0xF2000000u, 206 /* 'y' (121) */ 0xF4000000u, 207 /* 'z' (122) */ 0xF6000000u, 208 /* '{' (123) */ 0xFFFC0000u, 209 /* '|' (124) */ 0xFF800000u, 210 /* '}' (125) */ 0xFFF40000u, 211 /* '~' (126) */ 0xFFE80000u, 212 /* (127) */ 0xFFFFFFC0u, 213 /* (128) */ 0xFFFE6000u, 214 /* (129) */ 0xFFFF4800u, 215 /* (130) */ 0xFFFE7000u, 216 /* (131) */ 0xFFFE8000u, 217 /* (132) */ 0xFFFF4C00u, 218 /* (133) */ 0xFFFF5000u, 219 /* (134) */ 0xFFFF5400u, 220 /* (135) */ 0xFFFFB200u, 221 /* (136) */ 0xFFFF5800u, 222 /* (137) */ 0xFFFFB400u, 223 /* (138) */ 0xFFFFB600u, 224 /* (139) */ 0xFFFFB800u, 225 /* (140) */ 0xFFFFBA00u, 226 /* (141) */ 0xFFFFBC00u, 227 /* (142) */ 0xFFFFEB00u, 228 /* (143) */ 0xFFFFBE00u, 229 /* (144) */ 0xFFFFEC00u, 230 /* (145) */ 0xFFFFED00u, 231 /* (146) */ 0xFFFF5C00u, 232 /* (147) */ 0xFFFFC000u, 233 /* (148) */ 0xFFFFEE00u, 234 /* (149) */ 0xFFFFC200u, 235 /* (150) */ 0xFFFFC400u, 236 /* (151) */ 0xFFFFC600u, 237 /* (152) */ 0xFFFFC800u, 238 /* (153) */ 0xFFFEE000u, 239 /* (154) */ 0xFFFF6000u, 240 /* (155) */ 0xFFFFCA00u, 241 /* (156) */ 0xFFFF6400u, 242 /* (157) */ 0xFFFFCC00u, 243 /* (158) */ 0xFFFFCE00u, 244 /* (159) */ 0xFFFFEF00u, 245 /* (160) */ 0xFFFF6800u, 246 /* (161) */ 0xFFFEE800u, 247 /* (162) */ 0xFFFE9000u, 248 /* (163) */ 0xFFFF6C00u, 249 /* (164) */ 0xFFFF7000u, 250 /* (165) */ 0xFFFFD000u, 251 /* (166) */ 0xFFFFD200u, 252 /* (167) */ 0xFFFEF000u, 253 /* (168) */ 0xFFFFD400u, 254 /* (169) */ 0xFFFF7400u, 255 /* (170) */ 0xFFFF7800u, 256 /* (171) */ 0xFFFFF000u, 257 /* (172) */ 0xFFFEF800u, 258 /* (173) */ 0xFFFF7C00u, 259 /* (174) */ 0xFFFFD600u, 260 /* (175) */ 0xFFFFD800u, 261 /* (176) */ 0xFFFF0000u, 262 /* (177) */ 0xFFFF0800u, 263 /* (178) */ 0xFFFF8000u, 264 /* (179) */ 0xFFFF1000u, 265 /* (180) */ 0xFFFFDA00u, 266 /* (181) */ 0xFFFF8400u, 267 /* (182) */ 0xFFFFDC00u, 268 /* (183) */ 0xFFFFDE00u, 269 /* (184) */ 0xFFFEA000u, 270 /* (185) */ 0xFFFF8800u, 271 /* (186) */ 0xFFFF8C00u, 272 /* (187) */ 0xFFFF9000u, 273 /* (188) */ 0xFFFFE000u, 274 /* (189) */ 0xFFFF9400u, 275 /* (190) */ 0xFFFF9800u, 276 /* (191) */ 0xFFFFE200u, 277 /* (192) */ 0xFFFFF800u, 278 /* (193) */ 0xFFFFF840u, 279 /* (194) */ 0xFFFEB000u, 280 /* (195) */ 0xFFFE2000u, 281 /* (196) */ 0xFFFF9C00u, 282 /* (197) */ 0xFFFFE400u, 283 /* (198) */ 0xFFFFA000u, 284 /* (199) */ 0xFFFFF600u, 285 /* (200) */ 0xFFFFF880u, 286 /* (201) */ 0xFFFFF8C0u, 287 /* (202) */ 0xFFFFF900u, 288 /* (203) */ 0xFFFFFBC0u, 289 /* (204) */ 0xFFFFFBE0u, 290 /* (205) */ 0xFFFFF940u, 291 /* (206) */ 0xFFFFF100u, 292 /* (207) */ 0xFFFFF680u, 293 /* (208) */ 0xFFFE4000u, 294 /* (209) */ 0xFFFF1800u, 295 /* (210) */ 0xFFFFF980u, 296 /* (211) */ 0xFFFFFC00u, 297 /* (212) */ 0xFFFFFC20u, 298 /* (213) */ 0xFFFFF9C0u, 299 /* (214) */ 0xFFFFFC40u, 300 /* (215) */ 0xFFFFF200u, 301 /* (216) */ 0xFFFF2000u, 302 /* (217) */ 0xFFFF2800u, 303 /* (218) */ 0xFFFFFA00u, 304 /* (219) */ 0xFFFFFA40u, 305 /* (220) */ 0xFFFFFFD0u, 306 /* (221) */ 0xFFFFFC60u, 307 /* (222) */ 0xFFFFFC80u, 308 /* (223) */ 0xFFFFFCA0u, 309 /* (224) */ 0xFFFEC000u, 310 /* (225) */ 0xFFFFF300u, 311 /* (226) */ 0xFFFED000u, 312 /* (227) */ 0xFFFF3000u, 313 /* (228) */ 0xFFFFA400u, 314 /* (229) */ 0xFFFF3800u, 315 /* (230) */ 0xFFFF4000u, 316 /* (231) */ 0xFFFFE600u, 317 /* (232) */ 0xFFFFA800u, 318 /* (233) */ 0xFFFFAC00u, 319 /* (234) */ 0xFFFFF700u, 320 /* (235) */ 0xFFFFF780u, 321 /* (236) */ 0xFFFFF400u, 322 /* (237) */ 0xFFFFF500u, 323 /* (238) */ 0xFFFFFA80u, 324 /* (239) */ 0xFFFFE800u, 325 /* (240) */ 0xFFFFFAC0u, 326 /* (241) */ 0xFFFFFCC0u, 327 /* (242) */ 0xFFFFFB00u, 328 /* (243) */ 0xFFFFFB40u, 329 /* (244) */ 0xFFFFFCE0u, 330 /* (245) */ 0xFFFFFD00u, 331 /* (246) */ 0xFFFFFD20u, 332 /* (247) */ 0xFFFFFD40u, 333 /* (248) */ 0xFFFFFD60u, 334 /* (249) */ 0xFFFFFFE0u, 335 /* (250) */ 0xFFFFFD80u, 336 /* (251) */ 0xFFFFFDA0u, 337 /* (252) */ 0xFFFFFDC0u, 338 /* (253) */ 0xFFFFFDE0u, 339 /* (254) */ 0xFFFFFE00u, 340 /* (255) */ 0xFFFFFB80u 341 }; 342 343 344 /** 345 * HTTP/2 static Huffman codes length in bits from RFC 7541 346 * See https://www.rfc-editor.org/rfc/rfc7541.html#appendix-B 347 * 348 * Index is the character value (0..255). 349 * The table values are lengths of the code in bits. 350 * See https://www.rfc-editor.org/rfc/rfc7541.html#appendix-B 351 * 352 * @note The table does not include EOS code. 353 */ 354 mhd_constexpr uint8_t mhd_h2huff_bitlen_by_sym[256] = { 355 /* ( 0) */ 13u, 356 /* ( 1) */ 23u, 357 /* ( 2) */ 28u, 358 /* ( 3) */ 28u, 359 /* ( 4) */ 28u, 360 /* ( 5) */ 28u, 361 /* ( 6) */ 28u, 362 /* ( 7) */ 28u, 363 /* ( 8) */ 28u, 364 /* ( 9) */ 24u, 365 /* ( 10) */ 30u, 366 /* ( 11) */ 28u, 367 /* ( 12) */ 28u, 368 /* ( 13) */ 30u, 369 /* ( 14) */ 28u, 370 /* ( 15) */ 28u, 371 /* ( 16) */ 28u, 372 /* ( 17) */ 28u, 373 /* ( 18) */ 28u, 374 /* ( 19) */ 28u, 375 /* ( 20) */ 28u, 376 /* ( 21) */ 28u, 377 /* ( 22) */ 30u, 378 /* ( 23) */ 28u, 379 /* ( 24) */ 28u, 380 /* ( 25) */ 28u, 381 /* ( 26) */ 28u, 382 /* ( 27) */ 28u, 383 /* ( 28) */ 28u, 384 /* ( 29) */ 28u, 385 /* ( 30) */ 28u, 386 /* ( 31) */ 28u, 387 /* ' ' ( 32) */ 6u, 388 /* '!' ( 33) */ 10u, 389 /* '"' ( 34) */ 10u, 390 /* '#' ( 35) */ 12u, 391 /* '$' ( 36) */ 13u, 392 /* '%' ( 37) */ 6u, 393 /* '&' ( 38) */ 8u, 394 /* ''' ( 39) */ 11u, 395 /* '(' ( 40) */ 10u, 396 /* ')' ( 41) */ 10u, 397 /* '*' ( 42) */ 8u, 398 /* '+' ( 43) */ 11u, 399 /* ',' ( 44) */ 8u, 400 /* '-' ( 45) */ 6u, 401 /* '.' ( 46) */ 6u, 402 /* '/' ( 47) */ 6u, 403 /* '0' ( 48) */ 5u, 404 /* '1' ( 49) */ 5u, 405 /* '2' ( 50) */ 5u, 406 /* '3' ( 51) */ 6u, 407 /* '4' ( 52) */ 6u, 408 /* '5' ( 53) */ 6u, 409 /* '6' ( 54) */ 6u, 410 /* '7' ( 55) */ 6u, 411 /* '8' ( 56) */ 6u, 412 /* '9' ( 57) */ 6u, 413 /* ':' ( 58) */ 7u, 414 /* ';' ( 59) */ 8u, 415 /* '<' ( 60) */ 15u, 416 /* '=' ( 61) */ 6u, 417 /* '>' ( 62) */ 12u, 418 /* '?' ( 63) */ 10u, 419 /* '@' ( 64) */ 13u, 420 /* 'A' ( 65) */ 6u, 421 /* 'B' ( 66) */ 7u, 422 /* 'C' ( 67) */ 7u, 423 /* 'D' ( 68) */ 7u, 424 /* 'E' ( 69) */ 7u, 425 /* 'F' ( 70) */ 7u, 426 /* 'G' ( 71) */ 7u, 427 /* 'H' ( 72) */ 7u, 428 /* 'I' ( 73) */ 7u, 429 /* 'J' ( 74) */ 7u, 430 /* 'K' ( 75) */ 7u, 431 /* 'L' ( 76) */ 7u, 432 /* 'M' ( 77) */ 7u, 433 /* 'N' ( 78) */ 7u, 434 /* 'O' ( 79) */ 7u, 435 /* 'P' ( 80) */ 7u, 436 /* 'Q' ( 81) */ 7u, 437 /* 'R' ( 82) */ 7u, 438 /* 'S' ( 83) */ 7u, 439 /* 'T' ( 84) */ 7u, 440 /* 'U' ( 85) */ 7u, 441 /* 'V' ( 86) */ 7u, 442 /* 'W' ( 87) */ 7u, 443 /* 'X' ( 88) */ 8u, 444 /* 'Y' ( 89) */ 7u, 445 /* 'Z' ( 90) */ 8u, 446 /* '[' ( 91) */ 13u, 447 /* '\' ( 92) */ 19u, 448 /* ']' ( 93) */ 13u, 449 /* '^' ( 94) */ 14u, 450 /* '_' ( 95) */ 6u, 451 /* '`' ( 96) */ 15u, 452 /* 'a' ( 97) */ 5u, 453 /* 'b' ( 98) */ 6u, 454 /* 'c' ( 99) */ 5u, 455 /* 'd' (100) */ 6u, 456 /* 'e' (101) */ 5u, 457 /* 'f' (102) */ 6u, 458 /* 'g' (103) */ 6u, 459 /* 'h' (104) */ 6u, 460 /* 'i' (105) */ 5u, 461 /* 'j' (106) */ 7u, 462 /* 'k' (107) */ 7u, 463 /* 'l' (108) */ 6u, 464 /* 'm' (109) */ 6u, 465 /* 'n' (110) */ 6u, 466 /* 'o' (111) */ 5u, 467 /* 'p' (112) */ 6u, 468 /* 'q' (113) */ 7u, 469 /* 'r' (114) */ 6u, 470 /* 's' (115) */ 5u, 471 /* 't' (116) */ 5u, 472 /* 'u' (117) */ 6u, 473 /* 'v' (118) */ 7u, 474 /* 'w' (119) */ 7u, 475 /* 'x' (120) */ 7u, 476 /* 'y' (121) */ 7u, 477 /* 'z' (122) */ 7u, 478 /* '{' (123) */ 15u, 479 /* '|' (124) */ 11u, 480 /* '}' (125) */ 14u, 481 /* '~' (126) */ 13u, 482 /* (127) */ 28u, 483 /* (128) */ 20u, 484 /* (129) */ 22u, 485 /* (130) */ 20u, 486 /* (131) */ 20u, 487 /* (132) */ 22u, 488 /* (133) */ 22u, 489 /* (134) */ 22u, 490 /* (135) */ 23u, 491 /* (136) */ 22u, 492 /* (137) */ 23u, 493 /* (138) */ 23u, 494 /* (139) */ 23u, 495 /* (140) */ 23u, 496 /* (141) */ 23u, 497 /* (142) */ 24u, 498 /* (143) */ 23u, 499 /* (144) */ 24u, 500 /* (145) */ 24u, 501 /* (146) */ 22u, 502 /* (147) */ 23u, 503 /* (148) */ 24u, 504 /* (149) */ 23u, 505 /* (150) */ 23u, 506 /* (151) */ 23u, 507 /* (152) */ 23u, 508 /* (153) */ 21u, 509 /* (154) */ 22u, 510 /* (155) */ 23u, 511 /* (156) */ 22u, 512 /* (157) */ 23u, 513 /* (158) */ 23u, 514 /* (159) */ 24u, 515 /* (160) */ 22u, 516 /* (161) */ 21u, 517 /* (162) */ 20u, 518 /* (163) */ 22u, 519 /* (164) */ 22u, 520 /* (165) */ 23u, 521 /* (166) */ 23u, 522 /* (167) */ 21u, 523 /* (168) */ 23u, 524 /* (169) */ 22u, 525 /* (170) */ 22u, 526 /* (171) */ 24u, 527 /* (172) */ 21u, 528 /* (173) */ 22u, 529 /* (174) */ 23u, 530 /* (175) */ 23u, 531 /* (176) */ 21u, 532 /* (177) */ 21u, 533 /* (178) */ 22u, 534 /* (179) */ 21u, 535 /* (180) */ 23u, 536 /* (181) */ 22u, 537 /* (182) */ 23u, 538 /* (183) */ 23u, 539 /* (184) */ 20u, 540 /* (185) */ 22u, 541 /* (186) */ 22u, 542 /* (187) */ 22u, 543 /* (188) */ 23u, 544 /* (189) */ 22u, 545 /* (190) */ 22u, 546 /* (191) */ 23u, 547 /* (192) */ 26u, 548 /* (193) */ 26u, 549 /* (194) */ 20u, 550 /* (195) */ 19u, 551 /* (196) */ 22u, 552 /* (197) */ 23u, 553 /* (198) */ 22u, 554 /* (199) */ 25u, 555 /* (200) */ 26u, 556 /* (201) */ 26u, 557 /* (202) */ 26u, 558 /* (203) */ 27u, 559 /* (204) */ 27u, 560 /* (205) */ 26u, 561 /* (206) */ 24u, 562 /* (207) */ 25u, 563 /* (208) */ 19u, 564 /* (209) */ 21u, 565 /* (210) */ 26u, 566 /* (211) */ 27u, 567 /* (212) */ 27u, 568 /* (213) */ 26u, 569 /* (214) */ 27u, 570 /* (215) */ 24u, 571 /* (216) */ 21u, 572 /* (217) */ 21u, 573 /* (218) */ 26u, 574 /* (219) */ 26u, 575 /* (220) */ 28u, 576 /* (221) */ 27u, 577 /* (222) */ 27u, 578 /* (223) */ 27u, 579 /* (224) */ 20u, 580 /* (225) */ 24u, 581 /* (226) */ 20u, 582 /* (227) */ 21u, 583 /* (228) */ 22u, 584 /* (229) */ 21u, 585 /* (230) */ 21u, 586 /* (231) */ 23u, 587 /* (232) */ 22u, 588 /* (233) */ 22u, 589 /* (234) */ 25u, 590 /* (235) */ 25u, 591 /* (236) */ 24u, 592 /* (237) */ 24u, 593 /* (238) */ 26u, 594 /* (239) */ 23u, 595 /* (240) */ 26u, 596 /* (241) */ 27u, 597 /* (242) */ 26u, 598 /* (243) */ 26u, 599 /* (244) */ 27u, 600 /* (245) */ 27u, 601 /* (246) */ 27u, 602 /* (247) */ 27u, 603 /* (248) */ 27u, 604 /* (249) */ 28u, 605 /* (250) */ 27u, 606 /* (251) */ 27u, 607 /* (252) */ 27u, 608 /* (253) */ 27u, 609 /* (254) */ 27u, 610 /* (255) */ 26u 611 }; 612 613 /** 614 * The symbol with the longest H2 Huffman code 615 */ 616 #define mhd_H2HUFF_LONGEST_CODE_SYM 22u 617 618 /** 619 * Get MSB-aligned 32-bit code and widen to @c uint_fast32_t, keeping it 620 * MSB-aligned on the wider type. 621 * @param idx Symbol (0..255) 622 */ 623 #define mhd_GET_H2HUFF_CODE_AS_UINTFAST32_ALG(idx) \ 624 (((uint_fast32_t) mhd_h2huff_code_by_sym[(idx)]) << \ 625 ((sizeof(uint_fast32_t) - 4u) * 8u)) 626 627 628 MHD_INTERNAL MHD_FN_PAR_NONNULL_ALL_ 629 MHD_FN_PAR_IN_SIZE_ (2,1) MHD_FN_PAR_OUT_SIZE_ (4,3) size_t 630 mhd_h2_huffman_encode (size_t str_len, 631 const char *restrict str, 632 size_t out_buf_size, 633 void *restrict out_buf) 634 { 635 mhd_constexpr uint_fast32_t uif32_all_ones = 636 (uint_fast32_t) (~((uint_fast32_t) 0u)); 637 /** The width of the @a acc_out. */ 638 mhd_constexpr uint_fast8_t acc_out_bits_width = 639 (uint_fast8_t) (sizeof(uint_fast32_t) * 8u); 640 uint8_t *const out_b = (uint8_t *) out_buf; 641 /** 32-bit (or wider) output accumulator. 642 Filled completely with encoded data and then the data is pushed to the 643 output buffer. */ 644 uint_fast32_t acc_out; 645 /** The number of MSB bits currently used in the @a acc_out */ 646 uint_fast8_t acc_out_used_bits; 647 size_t str_idx; 648 size_t out_idx; 649 650 #ifndef MHD_UNIT_TESTING 651 /* DO check the input for validity if NOT unit-testing */ 652 mhd_assert (0 != str_len); 653 mhd_assert (0 != out_buf_size); 654 #endif /* MHD_UNIT_TESTING */ 655 #ifdef BUILDING_MHD_LIB 656 mhd_assert (out_buf_size <= str_len); 657 #endif /* ! BUILDING_MHD_LIB */ 658 mhd_assert (out_buf_size <= ((~((size_t) (0))) - sizeof(acc_out))); 659 660 for (str_idx = 0u, out_idx = 0u, 661 acc_out = 0u, acc_out_used_bits = 0u; str_idx < str_len; ++str_idx) 662 { 663 const uint_fast8_t sym = (uint_fast8_t) (unsigned char) str[str_idx]; 664 const uint_fast32_t sym_code = 665 mhd_GET_H2HUFF_CODE_AS_UINTFAST32_ALG (sym); 666 const uint_fast8_t sym_code_bits = 667 (uint_fast8_t) mhd_h2huff_bitlen_by_sym[sym]; 668 669 mhd_assert (acc_out_bits_width > acc_out_used_bits); 670 mhd_assert (0 == (acc_out & (uif32_all_ones >> acc_out_used_bits))); 671 mhd_assert (5u <= sym_code_bits); 672 mhd_assert (30u >= sym_code_bits); 673 mhd_assert (0 == (sym_code & (uif32_all_ones >> sym_code_bits))); 674 675 acc_out |= (sym_code >> acc_out_used_bits); 676 acc_out_used_bits += sym_code_bits; 677 678 if (mhd_COND_PRED_FALSE_P (acc_out_used_bits >= acc_out_bits_width, 0.81)) 679 { /* The accumulator 'acc_out' must be written to the output buffer. */ 680 uint_fast32_t data_out; 681 size_t next_out_idx; 682 /* Number of bits in the 'sym_code' that does not fit into 'acc_out' */ 683 const uint_fast8_t sym_code_bits_left = 684 acc_out_used_bits - acc_out_bits_width; 685 /* The number of bits from 'sym_code' to be flushed to the output buffer */ 686 const uint_fast8_t sym_code_bits_to_flush = 687 sym_code_bits - sym_code_bits_left; 688 689 mhd_assert (29 >= sym_code_bits_left); 690 691 next_out_idx = out_idx + sizeof(acc_out); 692 if (mhd_COND_HARDLY_EVER (sizeof(acc_out) > next_out_idx)) 693 return 0; /* 'out_idx' overflow */ 694 if (mhd_COND_ALMOST_NEVER (out_buf_size < next_out_idx)) 695 return 0; /* Not enough space in the "out_buf" */ 696 697 mhd_PUT_UINTFAST32_BE (&data_out, acc_out); 698 memcpy (out_b + out_idx, &data_out, sizeof(data_out)); 699 out_idx = next_out_idx; 700 701 mhd_assert (0 != sym_code_bits_to_flush); 702 /* The shift performed in two steps to avoid branches and 703 undefined behaviour */ 704 mhd_assert (sym_code_bits >= sym_code_bits_to_flush); 705 acc_out = (sym_code << 1u) << (sym_code_bits_to_flush - 1u); 706 mhd_assert (0 == (acc_out & (uif32_all_ones >> sym_code_bits_left))); 707 acc_out_used_bits = sym_code_bits_left; 708 } 709 } 710 mhd_assert (acc_out_bits_width > acc_out_used_bits); 711 mhd_assert (0 == (acc_out & (uif32_all_ones >> acc_out_used_bits))); 712 713 if (mhd_COND_PRED_TRUE_P (0 != acc_out_used_bits, \ 714 (sizeof(uint_fast32_t) - 1.0) \ 715 / ((double) sizeof(uint_fast32_t)))) 716 { /* The accumulator 'acc_out' must be flushed to the output buffer. */ 717 const uint_fast8_t tail_bytes = 718 (uint_fast8_t) (acc_out_used_bits + 7u) >> 3u; 719 size_t next_out_idx; 720 uint_fast32_t data_out; 721 722 mhd_assert (sizeof(acc_out) >= tail_bytes); 723 mhd_assert (0 != tail_bytes); 724 725 next_out_idx = out_idx + tail_bytes; 726 if (mhd_COND_HARDLY_EVER (tail_bytes > next_out_idx)) 727 return 0; /* 'out_idx' overflow */ 728 if (mhd_COND_ALMOST_NEVER (out_buf_size < next_out_idx)) 729 return 0; /* Not enough space in the "out_buf" */ 730 731 acc_out |= uif32_all_ones >> acc_out_used_bits; 732 mhd_PUT_UINTFAST32_BE (&data_out, acc_out); 733 memcpy (out_b + out_idx, &data_out, tail_bytes); 734 out_idx = next_out_idx; 735 } 736 737 mhd_assert (out_buf_size >= out_idx); 738 739 return out_idx; 740 } 741 742 743 #ifndef NDEBUG 744 /** 745 * Track decoding table initialisation. 746 */ 747 static bool h2huff_decode_inited = false; 748 #endif 749 750 /** 751 * @def MHD_USE_HUFFMAN_DECODE_TABLES_TAIL_GAP 752 * Omit the unreachable tail part of some decode tables to save memory. 753 * 754 * When enabled, a decode table may be allocated with a shortened size that 755 * excludes entries that are never addressed by valid codes (given the 756 * prefix/next-prefix boundaries). 757 * 758 * If not defined then enabled for code hardening and for debug builds. 759 */ 760 #ifndef MHD_USE_HUFFMAN_DECODE_TABLES_TAIL_GAP 761 # if defined(MHD_USE_CODE_HARDENING) || ! defined(NDEBUG) 762 # define MHD_USE_HUFFMAN_DECODE_TABLES_TAIL_GAP 1 763 # else 764 # define MHD_USE_HUFFMAN_DECODE_TABLES_TAIL_GAP 0 765 # endif 766 #endif 767 768 769 /** 770 * @def HUFF_DTBL_SIZE_ROUNDED 771 * Full table size for a given index width @a twidth (no tail gap). 772 */ 773 #define HUFF_DTBL_SIZE_ROUNDED(twidth) (1u << (twidth)) 774 775 /** 776 * @def HUFF_DTBL_ENTRIES 777 * Number of actually used entries for a table with prefix width 778 * @a pwidth, table index width @a twidth, and the next table prefix width 779 * @a next_pwidth. 780 * 781 * This corresponds to the addressable range of codes for that table, 782 * excluding the part that belongs to the next table deeper in the chain. 783 */ 784 #define HUFF_DTBL_ENTRIES(pwidth,twidth,next_pwidth) \ 785 (HUFF_DTBL_SIZE_ROUNDED (twidth) \ 786 - (1u << ((pwidth) + (twidth) - (next_pwidth))) ) 787 788 /** 789 * @def HUFF_DTBL_SIZE 790 * Actual allocation size for the decode table (may include extra space). 791 */ 792 #if MHD_USE_HUFFMAN_DECODE_TABLES_TAIL_GAP 793 # define HUFF_DTBL_SIZE(pwidth,twidth,next_pwidth) \ 794 HUFF_DTBL_ENTRIES (pwidth,twidth,next_pwidth) 795 #else 796 # define HUFF_DTBL_SIZE(pwidth,twidth,next_pwidth) \ 797 HUFF_DTBL_SIZE_ROUNDED (twidth) 798 #endif 799 800 801 /* Decode table for prefix 0 bits, table width 8 bits. 802 Codes: 803 '0' ( 48) 00000xxx [ 5] 804 '1' ( 49) 00001xxx [ 5] 805 '2' ( 50) 00010xxx [ 5] 806 ... 807 ... 808 ';' ( 59) 11111011 [ 8] 809 'X' ( 88) 11111100 [ 8] 810 'Z' ( 90) 11111101 [ 8] 811 */ 812 mhd_ALIGNED_64 static 813 uint_least16_t mhd_h2huff_SL_by_code_0p8c[HUFF_DTBL_SIZE (0, 8, 7)]; 814 815 /* Decode table for prefix 7 bits, table width 6 bits. 816 Codes: 817 '!' ( 33) 1111111 000xxx [10] 818 '"' ( 34) 1111111 001xxx [10] 819 '(' ( 40) 1111111 010xxx [10] 820 ... 821 ... 822 '[' ( 91) 1111111 111011 [13] 823 ']' ( 93) 1111111 111100 [13] 824 '~' (126) 1111111 111101 [13] 825 */ 826 mhd_ALIGNED_64 static 827 uint_least16_t mhd_h2huff_SL_by_code_7p6c[HUFF_DTBL_SIZE (7, 6, 12)]; 828 829 /* Decode table for prefix 12 bits, table width 3 bits. 830 Codes: 831 '^' ( 94) 111111111111 00x [14] 832 '}' (125) 111111111111 01x [14] 833 '<' ( 60) 111111111111 100 [15] 834 '`' ( 96) 111111111111 101 [15] 835 '{' (123) 111111111111 110 [15] 836 */ 837 mhd_ALIGNED_8 static 838 uint_least16_t mhd_h2huff_SL_by_code_12p3c[HUFF_DTBL_SIZE (12,3,15)]; 839 840 /* Decode table for prefix 15 bits, table width 7 bits. 841 Codes: 842 '\' ( 92) 111111111111111 0000xxx [19] 843 (195) 111111111111111 0001xxx [19] 844 (208) 111111111111111 0010xxx [19] 845 ... 846 ... 847 (169) 111111111111111 1011101 [22] 848 (170) 111111111111111 1011110 [22] 849 (173) 111111111111111 1011111 [22] 850 */ 851 mhd_ALIGNED_16 static 852 uint_least16_t mhd_h2huff_SL_by_code_15p7c[HUFF_DTBL_SIZE (15,7,17)]; 853 854 /* Decode table for prefix 17 bits, table width 6 bits. 855 Codes: 856 (188) 1111111111111111111 0000xx [23] 857 (191) 1111111111111111111 0001xx [23] 858 (197) 1111111111111111111 0010xx [23] 859 ... 860 ... 861 (207) 1111111111111111111 101101 [25] 862 (234) 1111111111111111111 101110 [25] 863 (235) 1111111111111111111 101111 [25] 864 */ 865 mhd_ALIGNED_16 static 866 uint_least16_t mhd_h2huff_SL_by_code_17p6c[HUFF_DTBL_SIZE (17,6,19)]; 867 868 /* Decode table for prefix 19 bits, table width 6 bits. 869 Codes: 870 (192) 111111111111111111111 00000x [26] 871 (193) 111111111111111111111 00001x [26] 872 (200) 111111111111111111111 00010x [26] 873 ... 874 ... 875 (251) 111111111111111111111 101101 [27] 876 (252) 111111111111111111111 101110 [27] 877 (253) 111111111111111111111 101111 [27] 878 */ 879 mhd_ALIGNED_16 static 880 uint_least16_t mhd_h2huff_SL_by_code_19p6c[HUFF_DTBL_SIZE (19,6,21)]; 881 882 /* Decode table for prefix 21 bits, table width 6 bits. 883 Codes: 884 (192) 111111111111111111111 00000x [26] 885 (193) 111111111111111111111 00001x [26] 886 (200) 111111111111111111111 00010x [26] 887 ... 888 ... 889 (251) 111111111111111111111 101101 [27] 890 (252) 111111111111111111111 101110 [27] 891 (253) 111111111111111111111 101111 [27] 892 */ 893 mhd_ALIGNED_16 static 894 uint_least16_t mhd_h2huff_SL_by_code_21p6c[HUFF_DTBL_SIZE (21,6,23)]; 895 896 /* Decode table for prefix 23 bits, table width 5 bits. 897 Codes: 898 (254) 11111111111111111111111 0000x [27] 899 ( 2) 11111111111111111111111 00010 [28] 900 ( 3) 11111111111111111111111 00011 [28] 901 ... 902 ... 903 (127) 11111111111111111111111 11100 [28] 904 (220) 11111111111111111111111 11101 [28] 905 (249) 11111111111111111111111 11110 [28] 906 */ 907 mhd_ALIGNED_16 static 908 uint_least16_t mhd_h2huff_SL_by_code_23p5c[HUFF_DTBL_SIZE (23,5,28)]; 909 910 /* Decode table for prefix 28 bits, table width 2 bits. 911 Codes: 912 ( 10) 1111111111111111111111111111 00 [30] 913 ( 13) 1111111111111111111111111111 01 [30] 914 ( 22) 1111111111111111111111111111 10 [30] 915 */ 916 mhd_ALIGNED_8 static 917 uint_least16_t mhd_h2huff_SL_by_code_28p2c[HUFF_DTBL_SIZE (28,2,30)]; 918 919 920 /** 921 * Populate entries for a single symbol within a particular decode table. 922 * 923 * A decode table covers a window of codes that share a fixed @a pbits prefix 924 * (all-ones in MSB-aligned representation) and differ in up to @a tbits 925 * following bits. This writes the appropriate range of entries for 926 * a particular @a code / @a codebits pair into @a table_to_fill. 927 * 928 * @param sym the symbol (0..255) 929 * @param code the MSB-aligned 32-bit code 930 * @param codebits the @a code length in bits (5..30) 931 * @param pbits the prefix width in bits (all-one MSB prefix) 932 * @param tbits the table-index width in bits 933 * @param next_pbits the next table prefix width 934 * @param[out] table_to_fill the pointer to the target decode table. 935 */ 936 MHD_FN_PAR_INOUT_ (7) MHD_FN_PAR_NONNULL_ALL_ static inline void 937 init_decode_table_entries_per_sym ( 938 const uint_fast8_t sym, 939 const uint_least32_t code, 940 const uint_fast8_t codebits, 941 const uint_fast8_t pbits, 942 const uint_fast8_t tbits, 943 const uint_fast8_t next_pbits, 944 uint_least16_t table_to_fill[ 945 MHD_FN_PAR_DYN_ARR_SIZE_ (HUFF_DTBL_SIZE (pbits,tbits,next_pbits))]) 946 { 947 mhd_constexpr uint_least32_t b32ones = 0xFFFFFFFFu; /**< 32 all-ones bits */ 948 const uint_least16_t sym_and_blen = 949 (uint_least16_t) (((uint_least16_t) codebits) << 8u | sym); 950 uint_least32_t i_tbl; 951 uint_least32_t i_tbl_end; 952 953 #ifdef NDEBUG 954 (void) next_pbits; /* Mute compiler warning */ 955 #endif /* NDEBUG */ 956 957 mhd_assert ((codebits <= 30u) && "The longest code is 30 bits"); 958 mhd_assert ((pbits < codebits) && \ 959 "The prefix must not be wider than the code"); 960 mhd_assert ((pbits + tbits) >= codebits && "The width of the code cannot " \ 961 "be larger than the total width of the prefix plus the " \ 962 "width of the table"); 963 mhd_assert (code == (code & b32ones) && \ 964 "The code cannot have bits higher than 32 bits"); 965 mhd_assert (0u == (code & (b32ones >> codebits)) && \ 966 "The unused part of the code must be zero"); 967 mhd_assert ((0u == pbits) || \ 968 ((b32ones >> (32u - pbits)) == (code >> (32u - pbits)) && \ 969 "The prefix bits in the code must be all set")); 970 mhd_assert ((b32ones >> (32u - codebits)) != (code >> (32u - codebits)) && \ 971 "The bits in the code after the prefix cannot be all set"); 972 973 i_tbl = ((code & (b32ones >> pbits)) >> (32u - (pbits + tbits))); 974 mhd_assert (HUFF_DTBL_ENTRIES (pbits, tbits, next_pbits) > i_tbl); 975 i_tbl_end = i_tbl + (1u << ((pbits + tbits) - codebits)) - 1u; 976 mhd_assert (i_tbl <= i_tbl_end); 977 mhd_assert (HUFF_DTBL_ENTRIES (pbits, tbits, next_pbits) > i_tbl_end); 978 979 do 980 { 981 mhd_assert (HUFF_DTBL_ENTRIES (pbits, tbits, next_pbits) > i_tbl); 982 mhd_assert (0 == table_to_fill[i_tbl] && \ 983 "No entry must be written twice"); 984 table_to_fill[i_tbl] = sym_and_blen; 985 } while (i_tbl++ < i_tbl_end); 986 } 987 988 989 /** 990 * Build one decode table for a particular (prefix, width, next-prefix) triple. 991 * 992 * @param pbits the number of bits in prefix, i.e. the static part common 993 * to all codes in the table (all-one bits) 994 * @param tbits the width of the table index in bits, i.e. the variable part 995 * of the code 996 * @param next_pbits the next table prefix width 997 * @param[out] table_to_fill the pointer to the table to fill 998 */ 999 MHD_FN_PAR_INOUT_ (4) MHD_FN_PAR_NONNULL_ALL_ static void 1000 init_decode_table ( 1001 const uint_fast8_t pbits, 1002 const uint_fast8_t tbits, 1003 const uint_fast8_t next_pbits, 1004 uint_least16_t table_to_fill[ 1005 MHD_FN_PAR_DYN_ARR_SIZE_ (HUFF_DTBL_SIZE (pbits,tbits,next_pbits))]) 1006 { 1007 mhd_constexpr uint_least32_t b32ones = 0xFFFFFFFFu; /**< 32 all-ones bits */ 1008 uint_fast8_t sym; 1009 1010 mhd_assert (30 >= pbits + tbits); 1011 mhd_assert (5 <= pbits + tbits); 1012 mhd_assert (next_pbits > pbits); 1013 mhd_assert (next_pbits <= (pbits + tbits)); 1014 1015 memset (table_to_fill, 1016 0, 1017 HUFF_DTBL_SIZE (pbits,tbits,next_pbits) * sizeof(*table_to_fill)); /* Zero the table completely, including unused entries */ 1018 1019 sym = 0; 1020 do 1021 { 1022 uint_least32_t code = mhd_h2huff_code_by_sym[sym]; 1023 const uint_fast8_t codebits = mhd_h2huff_bitlen_by_sym[sym]; 1024 1025 mhd_assert ((code <= mhd_h2huff_code_by_sym[mhd_H2HUFF_LONGEST_CODE_SYM]) \ 1026 && "Huffman code must be less than or equal to the largest " \ 1027 "Huffman code in the table."); 1028 mhd_assert (codebits <= 30); 1029 mhd_assert (0 == (code & (b32ones >> codebits)) && \ 1030 "The unused part of the code must be zero"); 1031 1032 if ((0 != pbits) && 1033 (((b32ones << (32u - pbits)) & b32ones) > code)) 1034 continue; 1035 1036 if (((b32ones << (32u - next_pbits)) & b32ones) <= code) 1037 continue; 1038 1039 mhd_assert ((pbits + tbits) >= codebits); 1040 1041 init_decode_table_entries_per_sym (sym, 1042 code, 1043 codebits, 1044 pbits, 1045 tbits, 1046 next_pbits, 1047 table_to_fill); 1048 } while (255 > sym++); 1049 1050 #ifndef NDEBUG 1051 if (1) 1052 { /* Check the table */ 1053 uint_least32_t i; 1054 for (i = 0; 1055 i < HUFF_DTBL_ENTRIES (pbits,tbits,next_pbits); 1056 ++i) 1057 mhd_assert ((0u != table_to_fill[i]) && \ 1058 "All valid entries must be used"); 1059 # if MHD_USE_HUFFMAN_DECODE_TABLES_TAIL_GAP 1060 for (i = HUFF_DTBL_ENTRIES (pbits,tbits,next_pbits); 1061 i < HUFF_DTBL_SIZE (pbits,tbits,next_pbits); 1062 ++i) 1063 mhd_assert ((0u == table_to_fill[i]) && \ 1064 "All unused entries must be zero"); 1065 # endif /* MHD_USE_HUFFMAN_DECODE_TABLES_TAIL_GAP */ 1066 } 1067 #endif /* ! NDEBUG */ 1068 } 1069 1070 1071 MHD_INTERNAL void 1072 mhd_h2_huffman_init (void) 1073 { 1074 init_decode_table (0, 8, 7, mhd_h2huff_SL_by_code_0p8c); 1075 init_decode_table (7, 6, 12, mhd_h2huff_SL_by_code_7p6c); 1076 init_decode_table (12, 3, 15, mhd_h2huff_SL_by_code_12p3c); 1077 init_decode_table (15, 7, 17, mhd_h2huff_SL_by_code_15p7c); 1078 init_decode_table (17, 6, 19, mhd_h2huff_SL_by_code_17p6c); 1079 init_decode_table (19, 6, 21, mhd_h2huff_SL_by_code_19p6c); 1080 init_decode_table (21, 6, 23, mhd_h2huff_SL_by_code_21p6c); 1081 init_decode_table (23, 5, 28, mhd_h2huff_SL_by_code_23p5c); 1082 init_decode_table (28, 2, 30, mhd_h2huff_SL_by_code_28p2c); 1083 1084 #ifndef NDEBUG 1085 h2huff_decode_inited = true; 1086 #endif 1087 } 1088 1089 1090 /* 1091 * Additional decoding optimisation ideas. 1092 * Decode by two symbols at once (limited subset of the most used symbols). 1093 * To keep the decoding table small, decode only 5-7 bits codes, put symbols 1094 * value to the 'uint16_t' value, where 14 bits encode symbols value (all 1095 * symbols have high bits zero), zero value means missing second symbol; 1096 * top two bits encode length of the code pair: 10-13 bits length if first 1097 * code bit is zero or 11-14 bits if first code bit is zero; for missing 1098 * second symbol: 5-7 bits length of the symbol. 1099 */ 1100 1101 /** 1102 * @def mhd_HF_DEC64 1103 * Use 64-bit accumulator for decoding on 64-bit platforms. 1104 */ 1105 #if SIZEOF_UINT_FAST32_T >= 8 || SIZEOF_VOIDP >= 8 1106 # define mhd_HF_DEC64 1 1107 #endif 1108 1109 MHD_INTERNAL MHD_FN_PAR_NONNULL_ALL_ 1110 MHD_FN_PAR_IN_SIZE_ (2,1) MHD_FN_PAR_OUT_SIZE_ (4,3) size_t 1111 mhd_h2_huffman_decode (size_t encoded_size, 1112 const void *restrict encoded, 1113 size_t out_buf_size, 1114 char *restrict out_buf, 1115 enum mhd_H2HuffDecodeRes *restrict decode_result) 1116 { 1117 mhd_constexpr uint_least32_t b32ones = 0xFFFFFFFFu; /**< 32 all-ones bits */ 1118 #ifdef mhd_HF_DEC64 1119 mhd_constexpr uint_least64_t b64ones = 0xFFFFFFFFFFFFFFFFu; /**< 64 bits all set to one */ 1120 /** The width of the @a enc_chunk */ 1121 mhd_constexpr uint_fast8_t enc_chunk_width = 64u; 1122 /** The chunk of the encoded data to be decoded. */ 1123 uint_least64_t enc_chunk; 1124 /** The next bytes of the encoded data. 1125 Moved as needed to the @a enc_chunk by 32-bits portions. */ 1126 uint_least64_t enc_shadow; 1127 /**< The number of valid bits in the @a enc_shadow */ 1128 uint_fast8_t enc_shadow_bits; 1129 #else /* ! mhd_HF_DEC64 */ 1130 /** The width of the @a enc_chunk */ 1131 mhd_constexpr uint_fast8_t enc_chunk_width = 32u; 1132 /** The chunk of the encoded data to be decoded. */ 1133 uint_least32_t enc_chunk; 1134 /** The next bytes of the encoded data. 1135 Moved as needed to the @a enc_chunk by bits. */ 1136 uint_least32_t enc_chunk_next; 1137 #endif /* ! mhd_HF_DEC64 */ 1138 /** The total number of valid bits in @a enc_chunk and @a enc_chunk_next */ 1139 uint_fast8_t enc_chunk_bits; 1140 const uint8_t *const enc_b = (const uint8_t *) encoded; 1141 size_t enc_idx; 1142 size_t out_idx; 1143 1144 mhd_assert (h2huff_decode_inited); 1145 #ifndef MHD_UNIT_TESTING 1146 /* DO check the input for validity if NOT unit-testing */ 1147 mhd_assert (0 != encoded_size); 1148 mhd_assert (0 != out_buf_size); 1149 #endif /* ! MHD_UNIT_TESTING */ 1150 mhd_assert (out_buf_size <= ((~((size_t) (0))) - sizeof(uint_least32_t))); 1151 1152 /* Pre-load data */ 1153 if (1) 1154 { 1155 uint_least64_t data_in; 1156 size_t load_bytes; 1157 uint_least64_t enc_chunk64; 1158 data_in = 0; 1159 load_bytes = ((8u > encoded_size) ? encoded_size : 8u); 1160 memcpy (&data_in, enc_b, load_bytes); 1161 enc_chunk64 = mhd_GET_64BIT_BE (&data_in); 1162 #ifdef mhd_HF_DEC64 1163 enc_chunk = enc_chunk64; 1164 #else /* ! mhd_HF_DEC64 */ 1165 enc_chunk = ((uint_least32_t) (enc_chunk64 >> 32u)) & b32ones; 1166 enc_chunk_next = ((uint_least32_t) enc_chunk64) & b32ones; 1167 #endif /* ! mhd_HF_DEC64 */ 1168 enc_chunk_bits = (uint_fast8_t) (load_bytes << 3u); 1169 enc_idx = load_bytes; 1170 #ifdef mhd_HF_DEC64 1171 if (enc_idx < encoded_size) 1172 { 1173 data_in = 0; 1174 load_bytes = encoded_size - enc_idx; 1175 if (8 < load_bytes) 1176 load_bytes = 8u; 1177 memcpy (&data_in, enc_b + enc_idx, load_bytes); 1178 enc_shadow = mhd_GET_64BIT_BE (&data_in); 1179 enc_shadow_bits = (uint_fast8_t) (load_bytes << 3u); 1180 enc_idx += load_bytes; 1181 } 1182 else 1183 { 1184 enc_shadow = 0u; 1185 enc_shadow_bits = 0u; 1186 } 1187 #endif /* mhd_HF_DEC64 */ 1188 } 1189 out_idx = 0; 1190 1191 while (7 < enc_chunk_bits) 1192 { 1193 /* 32 bit MSB-aligned current decoding code */ 1194 const uint_least32_t cur_code = 1195 #ifdef mhd_HF_DEC64 1196 (uint_least32_t) (((uint_least32_t) (enc_chunk >> 32u)) & b32ones) 1197 #else /* ! mhd_HF_DEC64 */ 1198 enc_chunk 1199 #endif /* ! mhd_HF_DEC64 */ 1200 ; 1201 uint_fast8_t pbits; 1202 uint_fast8_t tbits; 1203 uint_fast8_t next_pbits; 1204 const uint_least16_t *dec_table; 1205 uint_least16_t sym_and_blen; 1206 uint8_t sym; 1207 uint_fast8_t sym_bits; 1208 uint_least32_t tbl_pos; 1209 1210 mhd_assert (encoded_size >= enc_idx); 1211 mhd_assert (64u >= enc_chunk_bits); 1212 1213 #ifdef mhd_HF_DEC64 1214 mhd_assert ((64u == enc_chunk_bits) || \ 1215 (0 == ((enc_chunk << enc_chunk_bits) & b64ones))); 1216 mhd_assert (enc_chunk == (enc_chunk & b64ones)); 1217 #else /* ! mhd_HF_DEC64 */ 1218 mhd_assert ((32u <= enc_chunk_bits) || \ 1219 (0 == ((enc_chunk << enc_chunk_bits) & b32ones))); 1220 mhd_assert ((32u > enc_chunk_bits) || (64u == enc_chunk_bits) || \ 1221 (0 == ((enc_chunk_next << (enc_chunk_bits - 32u)) & b32ones))); 1222 mhd_assert (enc_chunk == (enc_chunk & b32ones)); 1223 #endif /* ! mhd_HF_DEC64 */ 1224 1225 /** 1226 * Build an the starting code for the table as 32-bit MSB-aligned value. 1227 * @param pbits the table prefix width in bits. 1228 */ 1229 #define HUFF_DTBL_START(pbits) ((b32ones << (32u - (pbits)))&b32ones) 1230 1231 pbits = 0u; 1232 tbits = 8u; 1233 dec_table = mhd_h2huff_SL_by_code_0p8c; 1234 next_pbits = 7u; 1235 if (mhd_COND_PRED_FALSE_P (HUFF_DTBL_START (next_pbits) <= cur_code, 0.8)) 1236 { 1237 pbits = next_pbits; 1238 tbits = 6u; 1239 dec_table = mhd_h2huff_SL_by_code_7p6c; 1240 next_pbits = 12u; 1241 if (mhd_COND_RARELY (HUFF_DTBL_START (next_pbits) <= cur_code)) 1242 { 1243 pbits = next_pbits; 1244 tbits = 3u; 1245 dec_table = mhd_h2huff_SL_by_code_12p3c; 1246 next_pbits = 15u; 1247 if (HUFF_DTBL_START (next_pbits) <= cur_code) 1248 { 1249 pbits = next_pbits; 1250 tbits = 7u; 1251 dec_table = mhd_h2huff_SL_by_code_15p7c; 1252 next_pbits = 17u; 1253 if (HUFF_DTBL_START (next_pbits) <= cur_code) 1254 { 1255 pbits = next_pbits; 1256 tbits = 6u; 1257 dec_table = mhd_h2huff_SL_by_code_17p6c; 1258 next_pbits = 19u; 1259 if (HUFF_DTBL_START (next_pbits) <= cur_code) 1260 { 1261 pbits = next_pbits; 1262 tbits = 6u; 1263 dec_table = mhd_h2huff_SL_by_code_19p6c; 1264 next_pbits = 21u; 1265 if (HUFF_DTBL_START (next_pbits) <= cur_code) 1266 { 1267 pbits = next_pbits; 1268 tbits = 6u; 1269 dec_table = mhd_h2huff_SL_by_code_21p6c; 1270 next_pbits = 23u; 1271 if (HUFF_DTBL_START (next_pbits) <= cur_code) 1272 { 1273 pbits = next_pbits; 1274 tbits = 5u; 1275 dec_table = mhd_h2huff_SL_by_code_23p5c; 1276 next_pbits = 28u; 1277 if (HUFF_DTBL_START (next_pbits) <= cur_code) 1278 { 1279 pbits = next_pbits; 1280 tbits = 2u; 1281 dec_table = mhd_h2huff_SL_by_code_28p2c; 1282 next_pbits = 30u; 1283 if (HUFF_DTBL_START (next_pbits) <= cur_code) 1284 { 1285 *decode_result = MHD_H2_HUFF_DEC_RES_BROKEN_DATA; 1286 return 0; 1287 } 1288 } 1289 } 1290 } 1291 } 1292 } 1293 } 1294 } 1295 #ifndef MHD_FAVOR_SMALL_CODE 1296 tbl_pos = ((cur_code << pbits) & b32ones) >> (32u - tbits); 1297 #endif /* ! MHD_FAVOR_SMALL_CODE */ 1298 } 1299 #ifndef MHD_FAVOR_SMALL_CODE 1300 else 1301 { 1302 tbl_pos = cur_code >> (32u - tbits); 1303 (void) pbits; 1304 } 1305 #endif /* ! MHD_FAVOR_SMALL_CODE */ 1306 1307 #undef HUFF_DTBL_START 1308 1309 mhd_assert (5u <= (pbits + tbits)); 1310 mhd_assert (30u >= (pbits + tbits)); 1311 1312 #ifdef MHD_FAVOR_SMALL_CODE 1313 tbl_pos = ((cur_code << pbits) & b32ones) >> (32u - tbits); 1314 #endif /* MHD_FAVOR_SMALL_CODE */ 1315 1316 mhd_assert (HUFF_DTBL_ENTRIES (pbits, tbits, next_pbits) > tbl_pos); 1317 sym_and_blen = dec_table[tbl_pos]; 1318 1319 mhd_assert (0u != sym_and_blen); 1320 1321 sym = (uint8_t) (sym_and_blen & 0xFFu); 1322 sym_bits = (uint_fast8_t) (sym_and_blen >> 8u); 1323 1324 mhd_assert (0u != sym_bits); 1325 mhd_assert (5u <= sym_bits); 1326 mhd_assert (30u >= sym_bits); 1327 mhd_assert (pbits <= sym_bits); 1328 mhd_assert ((pbits + tbits) >= sym_bits); 1329 mhd_assert (mhd_h2huff_bitlen_by_sym[sym] == sym_bits); 1330 1331 if (mhd_COND_VERY_RARELY (enc_chunk_bits < sym_bits)) 1332 { 1333 *decode_result = MHD_H2_HUFF_DEC_RES_BROKEN_DATA; 1334 return 0; 1335 } 1336 enc_chunk_bits -= sym_bits; 1337 #ifdef mhd_HF_DEC64 1338 enc_chunk = (enc_chunk << sym_bits) & b64ones; 1339 #else /* ! mhd_HF_DEC64 */ 1340 enc_chunk = ((enc_chunk << sym_bits) & b32ones); 1341 enc_chunk |= enc_chunk_next >> (32u - sym_bits); 1342 enc_chunk_next = ((enc_chunk_next << sym_bits) & b32ones); 1343 #endif /* ! mhd_HF_DEC64 */ 1344 1345 if (mhd_COND_VERY_RARELY (out_buf_size == out_idx)) 1346 { 1347 *decode_result = MHD_H2_HUFF_DEC_RES_NO_SPACE; 1348 return 0; 1349 } 1350 out_buf[out_idx++] = (char) sym; 1351 1352 if (mhd_COND_PRED_FALSE_P (32u > enc_chunk_bits, 0.84)) 1353 { 1354 /* Load new chunk of encoded data */ 1355 #ifdef mhd_HF_DEC64 1356 if (enc_shadow_bits != 0) 1357 { 1358 const uint_fast8_t free_bits = (uint_fast8_t) (64u - enc_chunk_bits); 1359 const uint_fast8_t move_bits = 1360 (enc_shadow_bits > free_bits) ? free_bits : enc_shadow_bits; 1361 mhd_assert (0u != enc_chunk_bits && \ 1362 "A single symbol cannot be more than 30 bits. If on " \ 1363 "the previous step 'enc_chunk' has not been filled to " \ 1364 "more than 32 bits it means that 'enc_shadow' is " \ 1365 "already empty."); 1366 mhd_assert (0 != move_bits); 1367 1368 enc_chunk |= enc_shadow >> enc_chunk_bits; 1369 enc_chunk_bits += move_bits; 1370 enc_shadow <<= move_bits; 1371 enc_shadow_bits -= move_bits; 1372 mhd_assert ((64u == enc_chunk_bits) || \ 1373 (0 == ((enc_chunk << enc_chunk_bits) & b64ones))); 1374 } 1375 if ((0u == enc_shadow_bits) \ 1376 && (encoded_size > enc_idx)) 1377 { 1378 uint_least64_t data_in; 1379 size_t load_bytes; 1380 1381 data_in = 0; 1382 load_bytes = encoded_size - enc_idx; 1383 if (8u < load_bytes) 1384 load_bytes = 8u; 1385 memcpy (&data_in, enc_b + enc_idx, load_bytes); 1386 enc_idx += load_bytes; 1387 enc_shadow = mhd_GET_64BIT_BE (&data_in); 1388 enc_shadow_bits = (uint_fast8_t) (load_bytes << 3u); 1389 1390 if (mhd_COND_RARELY (32u > enc_chunk_bits)) 1391 { 1392 const uint_fast8_t free_bits = (uint_fast8_t) (64u - enc_chunk_bits); 1393 const uint_fast8_t use_bits = 1394 (enc_shadow_bits > free_bits) ? free_bits : enc_shadow_bits; 1395 1396 mhd_assert (0u != enc_chunk_bits); 1397 enc_chunk |= enc_shadow >> enc_chunk_bits; 1398 enc_chunk_bits += use_bits; 1399 enc_shadow <<= use_bits; 1400 enc_shadow_bits -= use_bits; 1401 } 1402 } 1403 #else /* ! mhd_HF_DEC64 */ 1404 if (encoded_size > enc_idx) 1405 { 1406 uint_least32_t data_in; 1407 size_t load_bytes; 1408 uint_least32_t enc_data_load; 1409 1410 mhd_assert (0u != enc_chunk_bits && \ 1411 "A single symbol cannot be more than 30 bits. If more " \ 1412 "data to decode is present, then buffer would be filled " \ 1413 "on the previous step."); 1414 mhd_assert (0 == (enc_chunk & (b32ones >> enc_chunk_bits))); 1415 mhd_assert (0 == enc_chunk_next); 1416 1417 data_in = 0; 1418 load_bytes = (encoded_size - enc_idx); 1419 if (4 < load_bytes) 1420 load_bytes = 4; 1421 memcpy (&data_in, enc_b + enc_idx, load_bytes); 1422 enc_idx += load_bytes; 1423 enc_data_load = mhd_GET_32BIT_BE (&data_in); 1424 1425 enc_chunk |= ((enc_data_load >> enc_chunk_bits) & b32ones); 1426 enc_chunk_next = ((enc_data_load << (32u - enc_chunk_bits)) & b32ones); 1427 enc_chunk_bits += (uint_fast8_t) (load_bytes << 3u); 1428 } 1429 #endif /* mhd_HF_DEC64 */ 1430 } 1431 } 1432 1433 if (0 != enc_chunk_bits) 1434 { /* Decode the last portion of the encoded data */ 1435 1436 /** The last portion of the data one-padded on the right */ 1437 const uint_least8_t last_byte = 1438 (uint_least8_t) ((enc_chunk >> (enc_chunk_width - 8u)) 1439 | (0xFFu >> enc_chunk_bits)); 1440 mhd_assert (7 >= enc_chunk_bits); 1441 1442 if (0xFFu != last_byte) 1443 { /* The encoded data is not pure EOS */ 1444 uint_least16_t sym_and_blen; 1445 uint8_t sym; 1446 uint_fast8_t sym_bits; 1447 1448 sym_and_blen = mhd_h2huff_SL_by_code_0p8c[last_byte]; 1449 1450 mhd_assert (0u != sym_and_blen); 1451 1452 sym = (uint8_t) (sym_and_blen & 0xFFu); 1453 sym_bits = (uint_fast8_t) (sym_and_blen >> 8u); 1454 mhd_assert (sym_bits <= 8u); 1455 1456 if (mhd_COND_VERY_RARELY (enc_chunk_bits < sym_bits)) 1457 { 1458 *decode_result = MHD_H2_HUFF_DEC_RES_BROKEN_DATA; 1459 return 0; 1460 } 1461 1462 if (sym_bits != enc_chunk_bits) 1463 { /* Check whether the tail is EOS */ 1464 if (mhd_COND_VERY_RARELY (((uint_least8_t) (0xFFu >> sym_bits)) != 1465 (last_byte 1466 & (uint_least8_t) (0xFFu >> sym_bits)))) 1467 { 1468 *decode_result = MHD_H2_HUFF_DEC_RES_BROKEN_DATA; 1469 return 0; 1470 } 1471 } 1472 1473 if (mhd_COND_VERY_RARELY (out_buf_size == out_idx)) 1474 { 1475 *decode_result = MHD_H2_HUFF_DEC_RES_NO_SPACE; 1476 return 0; 1477 } 1478 out_buf[out_idx++] = (char) sym; 1479 } 1480 } 1481 *decode_result = MHD_H2_HUFF_DEC_RES_OK; 1482 return out_idx; 1483 }