commit b50bed1269be493f453c24de196bf89229abd2fd
parent 0c9323a9e64ca88a6f11d007e6f31c609762e950
Author: Evgeny Grin (Karlson2k) <k2k@narod.ru>
Date: Wed, 14 Sep 2022 15:58:09 +0300
sha{256,512_256}: improved performance of the first steps on BE arches
Diffstat:
2 files changed, 126 insertions(+), 70 deletions(-)
diff --git a/src/microhttpd/sha256.c b/src/microhttpd/sha256.c
@@ -145,9 +145,7 @@ sha256_transform (uint32_t H[SHA256_DIGEST_SIZE_WORDS],
+ (w)[((t) - 7) & 0xf] + sig0 ((w)[((t) - 15) & 0xf]) )
#ifndef MHD_FAVOR_SMALL_CODE
- /* During first 16 steps, before making any calculations on each step,
- the W element is read from input data buffer as big-endian value and
- stored in array of W elements. */
+
/* Note: instead of using K constants as array, all K values are specified
individually for each step, see FIPS PUB 180-4 paragraph 4.2.2 for
K values. */
@@ -157,38 +155,68 @@ sha256_transform (uint32_t H[SHA256_DIGEST_SIZE_WORDS],
SHA2STEP32(h, a, b, c, d, e, f, g, K[1], data[1]);
so current 'vD' will be used as 'vE' on next step,
current 'vH' will be used as 'vA' on next step. */
- SHA2STEP32 (a, b, c, d, e, f, g, h, UINT32_C (0x428a2f98), W[0] = \
- GET_W_FROM_DATA (data, 0));
- SHA2STEP32 (h, a, b, c, d, e, f, g, UINT32_C (0x71374491), W[1] = \
- GET_W_FROM_DATA (data, 1));
- SHA2STEP32 (g, h, a, b, c, d, e, f, UINT32_C (0xb5c0fbcf), W[2] = \
- GET_W_FROM_DATA (data, 2));
- SHA2STEP32 (f, g, h, a, b, c, d, e, UINT32_C (0xe9b5dba5), W[3] = \
- GET_W_FROM_DATA (data, 3));
- SHA2STEP32 (e, f, g, h, a, b, c, d, UINT32_C (0x3956c25b), W[4] = \
- GET_W_FROM_DATA (data, 4));
- SHA2STEP32 (d, e, f, g, h, a, b, c, UINT32_C (0x59f111f1), W[5] = \
- GET_W_FROM_DATA (data, 5));
- SHA2STEP32 (c, d, e, f, g, h, a, b, UINT32_C (0x923f82a4), W[6] = \
- GET_W_FROM_DATA (data, 6));
- SHA2STEP32 (b, c, d, e, f, g, h, a, UINT32_C (0xab1c5ed5), W[7] = \
- GET_W_FROM_DATA (data, 7));
- SHA2STEP32 (a, b, c, d, e, f, g, h, UINT32_C (0xd807aa98), W[8] = \
- GET_W_FROM_DATA (data, 8));
- SHA2STEP32 (h, a, b, c, d, e, f, g, UINT32_C (0x12835b01), W[9] = \
- GET_W_FROM_DATA (data, 9));
- SHA2STEP32 (g, h, a, b, c, d, e, f, UINT32_C (0x243185be), W[10] = \
- GET_W_FROM_DATA (data, 10));
- SHA2STEP32 (f, g, h, a, b, c, d, e, UINT32_C (0x550c7dc3), W[11] = \
- GET_W_FROM_DATA (data, 11));
- SHA2STEP32 (e, f, g, h, a, b, c, d, UINT32_C (0x72be5d74), W[12] = \
- GET_W_FROM_DATA (data, 12));
- SHA2STEP32 (d, e, f, g, h, a, b, c, UINT32_C (0x80deb1fe), W[13] = \
- GET_W_FROM_DATA (data, 13));
- SHA2STEP32 (c, d, e, f, g, h, a, b, UINT32_C (0x9bdc06a7), W[14] = \
- GET_W_FROM_DATA (data, 14));
- SHA2STEP32 (b, c, d, e, f, g, h, a, UINT32_C (0xc19bf174), W[15] = \
- GET_W_FROM_DATA (data, 15));
+#if _MHD_BYTE_ORDER == _MHD_BIG_ENDIAN
+ if ((const void *) W == data)
+ {
+ /* The input data is already in the cyclic data buffer W[] in correct bytes
+ order. */
+ SHA2STEP32 (a, b, c, d, e, f, g, h, UINT32_C (0x428a2f98), W[0]);
+ SHA2STEP32 (h, a, b, c, d, e, f, g, UINT32_C (0x71374491), W[1]);
+ SHA2STEP32 (g, h, a, b, c, d, e, f, UINT32_C (0xb5c0fbcf), W[2]);
+ SHA2STEP32 (f, g, h, a, b, c, d, e, UINT32_C (0xe9b5dba5), W[3]);
+ SHA2STEP32 (e, f, g, h, a, b, c, d, UINT32_C (0x3956c25b), W[4]);
+ SHA2STEP32 (d, e, f, g, h, a, b, c, UINT32_C (0x59f111f1), W[5]);
+ SHA2STEP32 (c, d, e, f, g, h, a, b, UINT32_C (0x923f82a4), W[6]);
+ SHA2STEP32 (b, c, d, e, f, g, h, a, UINT32_C (0xab1c5ed5), W[7]);
+ SHA2STEP32 (a, b, c, d, e, f, g, h, UINT32_C (0xd807aa98), W[8]);
+ SHA2STEP32 (h, a, b, c, d, e, f, g, UINT32_C (0x12835b01), W[9]);
+ SHA2STEP32 (g, h, a, b, c, d, e, f, UINT32_C (0x243185be), W[10]);
+ SHA2STEP32 (f, g, h, a, b, c, d, e, UINT32_C (0x550c7dc3), W[11]);
+ SHA2STEP32 (e, f, g, h, a, b, c, d, UINT32_C (0x72be5d74), W[12]);
+ SHA2STEP32 (d, e, f, g, h, a, b, c, UINT32_C (0x80deb1fe), W[13]);
+ SHA2STEP32 (c, d, e, f, g, h, a, b, UINT32_C (0x9bdc06a7), W[14]);
+ SHA2STEP32 (b, c, d, e, f, g, h, a, UINT32_C (0xc19bf174), W[15]);
+ }
+ else /* Combined with the next 'if' */
+#endif /* _MHD_BYTE_ORDER == _MHD_BIG_ENDIAN */
+ if (1)
+ {
+ /* During first 16 steps, before making any calculations on each step,
+ the W element is read from input data buffer as big-endian value and
+ stored in array of W elements. */
+ SHA2STEP32 (a, b, c, d, e, f, g, h, UINT32_C (0x428a2f98), W[0] = \
+ GET_W_FROM_DATA (data, 0));
+ SHA2STEP32 (h, a, b, c, d, e, f, g, UINT32_C (0x71374491), W[1] = \
+ GET_W_FROM_DATA (data, 1));
+ SHA2STEP32 (g, h, a, b, c, d, e, f, UINT32_C (0xb5c0fbcf), W[2] = \
+ GET_W_FROM_DATA (data, 2));
+ SHA2STEP32 (f, g, h, a, b, c, d, e, UINT32_C (0xe9b5dba5), W[3] = \
+ GET_W_FROM_DATA (data, 3));
+ SHA2STEP32 (e, f, g, h, a, b, c, d, UINT32_C (0x3956c25b), W[4] = \
+ GET_W_FROM_DATA (data, 4));
+ SHA2STEP32 (d, e, f, g, h, a, b, c, UINT32_C (0x59f111f1), W[5] = \
+ GET_W_FROM_DATA (data, 5));
+ SHA2STEP32 (c, d, e, f, g, h, a, b, UINT32_C (0x923f82a4), W[6] = \
+ GET_W_FROM_DATA (data, 6));
+ SHA2STEP32 (b, c, d, e, f, g, h, a, UINT32_C (0xab1c5ed5), W[7] = \
+ GET_W_FROM_DATA (data, 7));
+ SHA2STEP32 (a, b, c, d, e, f, g, h, UINT32_C (0xd807aa98), W[8] = \
+ GET_W_FROM_DATA (data, 8));
+ SHA2STEP32 (h, a, b, c, d, e, f, g, UINT32_C (0x12835b01), W[9] = \
+ GET_W_FROM_DATA (data, 9));
+ SHA2STEP32 (g, h, a, b, c, d, e, f, UINT32_C (0x243185be), W[10] = \
+ GET_W_FROM_DATA (data, 10));
+ SHA2STEP32 (f, g, h, a, b, c, d, e, UINT32_C (0x550c7dc3), W[11] = \
+ GET_W_FROM_DATA (data, 11));
+ SHA2STEP32 (e, f, g, h, a, b, c, d, UINT32_C (0x72be5d74), W[12] = \
+ GET_W_FROM_DATA (data, 12));
+ SHA2STEP32 (d, e, f, g, h, a, b, c, UINT32_C (0x80deb1fe), W[13] = \
+ GET_W_FROM_DATA (data, 13));
+ SHA2STEP32 (c, d, e, f, g, h, a, b, UINT32_C (0x9bdc06a7), W[14] = \
+ GET_W_FROM_DATA (data, 14));
+ SHA2STEP32 (b, c, d, e, f, g, h, a, UINT32_C (0xc19bf174), W[15] = \
+ GET_W_FROM_DATA (data, 15));
+ }
/* During last 48 steps, before making any calculations on each step,
current W element is generated from other W elements of the cyclic buffer
diff --git a/src/microhttpd/sha512_256.c b/src/microhttpd/sha512_256.c
@@ -144,9 +144,7 @@ sha512_256_transform (uint64_t H[SHA512_256_HASH_SIZE_WORDS],
+ (w)[((t) - 7) & 15] + sig0 ((w)[((t) - 15) & 15]) )
#ifndef MHD_FAVOR_SMALL_CODE
- /* During first 16 steps, before making any calculations on each step,
- the W element is read from the input data buffer as big-endian value and
- stored in the array of W elements. */
+
/* Note: instead of using K constants as array, all K values are specified
individually for each step, see FIPS PUB 180-4 clause 4.2.3 for
K values. */
@@ -156,38 +154,68 @@ sha512_256_transform (uint64_t H[SHA512_256_HASH_SIZE_WORDS],
SHA2STEP64(h, a, b, c, d, e, f, g, K[1], data[1]);
so current 'vD' will be used as 'vE' on next step,
current 'vH' will be used as 'vA' on next step. */
- SHA2STEP64 (a, b, c, d, e, f, g, h, UINT64_C (0x428a2f98d728ae22), \
- W[0] = GET_W_FROM_DATA (data, 0));
- SHA2STEP64 (h, a, b, c, d, e, f, g, UINT64_C (0x7137449123ef65cd), \
- W[1] = GET_W_FROM_DATA (data, 1));
- SHA2STEP64 (g, h, a, b, c, d, e, f, UINT64_C (0xb5c0fbcfec4d3b2f), \
- W[2] = GET_W_FROM_DATA (data, 2));
- SHA2STEP64 (f, g, h, a, b, c, d, e, UINT64_C (0xe9b5dba58189dbbc), \
- W[3] = GET_W_FROM_DATA (data, 3));
- SHA2STEP64 (e, f, g, h, a, b, c, d, UINT64_C (0x3956c25bf348b538), \
- W[4] = GET_W_FROM_DATA (data, 4));
- SHA2STEP64 (d, e, f, g, h, a, b, c, UINT64_C (0x59f111f1b605d019), \
- W[5] = GET_W_FROM_DATA (data, 5));
- SHA2STEP64 (c, d, e, f, g, h, a, b, UINT64_C (0x923f82a4af194f9b), \
- W[6] = GET_W_FROM_DATA (data, 6));
- SHA2STEP64 (b, c, d, e, f, g, h, a, UINT64_C (0xab1c5ed5da6d8118), \
- W[7] = GET_W_FROM_DATA (data, 7));
- SHA2STEP64 (a, b, c, d, e, f, g, h, UINT64_C (0xd807aa98a3030242), \
- W[8] = GET_W_FROM_DATA (data, 8));
- SHA2STEP64 (h, a, b, c, d, e, f, g, UINT64_C (0x12835b0145706fbe), \
- W[9] = GET_W_FROM_DATA (data, 9));
- SHA2STEP64 (g, h, a, b, c, d, e, f, UINT64_C (0x243185be4ee4b28c), \
- W[10] = GET_W_FROM_DATA (data, 10));
- SHA2STEP64 (f, g, h, a, b, c, d, e, UINT64_C (0x550c7dc3d5ffb4e2), \
- W[11] = GET_W_FROM_DATA (data, 11));
- SHA2STEP64 (e, f, g, h, a, b, c, d, UINT64_C (0x72be5d74f27b896f), \
- W[12] = GET_W_FROM_DATA (data, 12));
- SHA2STEP64 (d, e, f, g, h, a, b, c, UINT64_C (0x80deb1fe3b1696b1), \
- W[13] = GET_W_FROM_DATA (data, 13));
- SHA2STEP64 (c, d, e, f, g, h, a, b, UINT64_C (0x9bdc06a725c71235), \
- W[14] = GET_W_FROM_DATA (data, 14));
- SHA2STEP64 (b, c, d, e, f, g, h, a, UINT64_C (0xc19bf174cf692694), \
- W[15] = GET_W_FROM_DATA (data, 15));
+#if _MHD_BYTE_ORDER == _MHD_BIG_ENDIAN
+ if ((const void *) W == data)
+ {
+ /* The input data is already in the cyclic data buffer W[] in correct bytes
+ order. */
+ SHA2STEP64 (a, b, c, d, e, f, g, h, UINT64_C (0x428a2f98d728ae22), W[0]);
+ SHA2STEP64 (h, a, b, c, d, e, f, g, UINT64_C (0x7137449123ef65cd), W[1]);
+ SHA2STEP64 (g, h, a, b, c, d, e, f, UINT64_C (0xb5c0fbcfec4d3b2f), W[2]);
+ SHA2STEP64 (f, g, h, a, b, c, d, e, UINT64_C (0xe9b5dba58189dbbc), W[3]);
+ SHA2STEP64 (e, f, g, h, a, b, c, d, UINT64_C (0x3956c25bf348b538), W[4]);
+ SHA2STEP64 (d, e, f, g, h, a, b, c, UINT64_C (0x59f111f1b605d019), W[5]);
+ SHA2STEP64 (c, d, e, f, g, h, a, b, UINT64_C (0x923f82a4af194f9b), W[6]);
+ SHA2STEP64 (b, c, d, e, f, g, h, a, UINT64_C (0xab1c5ed5da6d8118), W[7]);
+ SHA2STEP64 (a, b, c, d, e, f, g, h, UINT64_C (0xd807aa98a3030242), W[8]);
+ SHA2STEP64 (h, a, b, c, d, e, f, g, UINT64_C (0x12835b0145706fbe), W[9]);
+ SHA2STEP64 (g, h, a, b, c, d, e, f, UINT64_C (0x243185be4ee4b28c), W[10]);
+ SHA2STEP64 (f, g, h, a, b, c, d, e, UINT64_C (0x550c7dc3d5ffb4e2), W[11]);
+ SHA2STEP64 (e, f, g, h, a, b, c, d, UINT64_C (0x72be5d74f27b896f), W[12]);
+ SHA2STEP64 (d, e, f, g, h, a, b, c, UINT64_C (0x80deb1fe3b1696b1), W[13]);
+ SHA2STEP64 (c, d, e, f, g, h, a, b, UINT64_C (0x9bdc06a725c71235), W[14]);
+ SHA2STEP64 (b, c, d, e, f, g, h, a, UINT64_C (0xc19bf174cf692694), W[15]);
+ }
+ else /* Combined with the next 'if' */
+#endif /* _MHD_BYTE_ORDER == _MHD_BIG_ENDIAN */
+ if (1)
+ {
+ /* During first 16 steps, before making any calculations on each step,
+ the W element is read from the input data buffer as big-endian value and
+ stored in the array of W elements. */
+ SHA2STEP64 (a, b, c, d, e, f, g, h, UINT64_C (0x428a2f98d728ae22), \
+ W[0] = GET_W_FROM_DATA (data, 0));
+ SHA2STEP64 (h, a, b, c, d, e, f, g, UINT64_C (0x7137449123ef65cd), \
+ W[1] = GET_W_FROM_DATA (data, 1));
+ SHA2STEP64 (g, h, a, b, c, d, e, f, UINT64_C (0xb5c0fbcfec4d3b2f), \
+ W[2] = GET_W_FROM_DATA (data, 2));
+ SHA2STEP64 (f, g, h, a, b, c, d, e, UINT64_C (0xe9b5dba58189dbbc), \
+ W[3] = GET_W_FROM_DATA (data, 3));
+ SHA2STEP64 (e, f, g, h, a, b, c, d, UINT64_C (0x3956c25bf348b538), \
+ W[4] = GET_W_FROM_DATA (data, 4));
+ SHA2STEP64 (d, e, f, g, h, a, b, c, UINT64_C (0x59f111f1b605d019), \
+ W[5] = GET_W_FROM_DATA (data, 5));
+ SHA2STEP64 (c, d, e, f, g, h, a, b, UINT64_C (0x923f82a4af194f9b), \
+ W[6] = GET_W_FROM_DATA (data, 6));
+ SHA2STEP64 (b, c, d, e, f, g, h, a, UINT64_C (0xab1c5ed5da6d8118), \
+ W[7] = GET_W_FROM_DATA (data, 7));
+ SHA2STEP64 (a, b, c, d, e, f, g, h, UINT64_C (0xd807aa98a3030242), \
+ W[8] = GET_W_FROM_DATA (data, 8));
+ SHA2STEP64 (h, a, b, c, d, e, f, g, UINT64_C (0x12835b0145706fbe), \
+ W[9] = GET_W_FROM_DATA (data, 9));
+ SHA2STEP64 (g, h, a, b, c, d, e, f, UINT64_C (0x243185be4ee4b28c), \
+ W[10] = GET_W_FROM_DATA (data, 10));
+ SHA2STEP64 (f, g, h, a, b, c, d, e, UINT64_C (0x550c7dc3d5ffb4e2), \
+ W[11] = GET_W_FROM_DATA (data, 11));
+ SHA2STEP64 (e, f, g, h, a, b, c, d, UINT64_C (0x72be5d74f27b896f), \
+ W[12] = GET_W_FROM_DATA (data, 12));
+ SHA2STEP64 (d, e, f, g, h, a, b, c, UINT64_C (0x80deb1fe3b1696b1), \
+ W[13] = GET_W_FROM_DATA (data, 13));
+ SHA2STEP64 (c, d, e, f, g, h, a, b, UINT64_C (0x9bdc06a725c71235), \
+ W[14] = GET_W_FROM_DATA (data, 14));
+ SHA2STEP64 (b, c, d, e, f, g, h, a, UINT64_C (0xc19bf174cf692694), \
+ W[15] = GET_W_FROM_DATA (data, 15));
+ }
/* During last 64 steps, before making any calculations on each step,
current W element is generated from other W elements of the cyclic buffer