taler-rust

GNU Taler code in Rust. Largely core banking integrations.
Log | Files | Refs | Submodules | README | LICENSE

base32.rs (8248B)


      1 /*
      2   This file is part of TALER
      3   Copyright (C) 2024, 2025, 2026 Taler Systems SA
      4 
      5   TALER is free software; you can redistribute it and/or modify it under the
      6   terms of the GNU Affero General Public License as published by the Free Software
      7   Foundation; either version 3, or (at your option) any later version.
      8 
      9   TALER is distributed in the hope that it will be useful, but WITHOUT ANY
     10   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
     11   A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more details.
     12 
     13   You should have received a copy of the GNU Affero General Public License along with
     14   TALER; see the file COPYING.  If not, see <http://www.gnu.org/licenses/>
     15 */
     16 
     17 use std::fmt::Display;
     18 
     19 pub const CROCKFORD_ALPHABET: &[u8] = b"0123456789ABCDEFGHJKMNPQRSTVWXYZ";
     20 
     21 /** Encoded bytes len of Crockford's base32 */
     22 #[inline]
     23 const fn encoded_len(len: usize) -> usize {
     24     (len * 8).div_ceil(5)
     25 }
     26 
     27 /** Buffer bytes len of Crockford's base32 using a batch of 8 chars */
     28 #[inline]
     29 pub(crate) const fn encoded_buf_len(len: usize) -> usize {
     30     (len / 5 + 1) * 8
     31 }
     32 
     33 /** Encode bytes using Crockford's base32 */
     34 pub fn encode_static<'a, const N: usize>(bytes: &[u8; N], out: &'a mut [u8]) -> &'a str {
     35     // Batch encoded
     36     encode_batch(bytes, out);
     37 
     38     // Truncate incomplete ending chunk
     39     let truncated = &out[..encoded_len(bytes.len())];
     40 
     41     // SAFETY: only contains valid ASCII characters from CROCKFORD_ALPHABET
     42     unsafe { std::str::from_utf8_unchecked(truncated) }
     43 }
     44 
     45 /** Encode bytes using Crockford's base32 */
     46 pub fn encode(bytes: impl AsRef<[u8]>) -> String {
     47     let bytes = bytes.as_ref();
     48     let mut buf = vec![0u8; encoded_buf_len(bytes.len())];
     49     // Batch encoded
     50     encode_batch(bytes, &mut buf);
     51 
     52     // Truncate incomplete ending chunk
     53     buf.truncate(encoded_len(bytes.len()));
     54 
     55     // SAFETY: only contains valid ASCII characters from CROCKFORD_ALPHABET
     56     unsafe { std::string::String::from_utf8_unchecked(buf) }
     57 }
     58 
     59 /** Format bytes using Crockford's base32 */
     60 pub fn fmt(bytes: impl AsRef<[u8]>) -> impl Display {
     61     std::fmt::from_fn(move |f| {
     62         for chunk in bytes.as_ref().chunks(5) {
     63             let mut out_buf = [0u8; 8];
     64             encode_chunk(chunk, &mut out_buf);
     65 
     66             let n = encoded_len(chunk.len());
     67             // SAFETY: encode_chunk populates out_buf using CROCKFORD_ALPHABET,
     68             // which consists of valid ASCII characters.
     69             let s = unsafe { std::str::from_utf8_unchecked(&out_buf[..n]) };
     70             f.write_str(s)?;
     71         }
     72         Ok(())
     73     })
     74 }
     75 
     76 /** Encode a chunk using Crockford's base32 */
     77 #[inline(always)]
     78 pub(crate) fn encode_chunk(chunk: &[u8], encoded: &mut [u8]) {
     79     let mut buf = [0u8; 5];
     80     for (i, &b) in chunk.iter().enumerate() {
     81         buf[i] = b;
     82     }
     83     encoded[0] = CROCKFORD_ALPHABET[((buf[0] & 0xF8) >> 3) as usize];
     84     encoded[1] = CROCKFORD_ALPHABET[(((buf[0] & 0x07) << 2) | ((buf[1] & 0xC0) >> 6)) as usize];
     85     encoded[2] = CROCKFORD_ALPHABET[((buf[1] & 0x3E) >> 1) as usize];
     86     encoded[3] = CROCKFORD_ALPHABET[(((buf[1] & 0x01) << 4) | ((buf[2] & 0xF0) >> 4)) as usize];
     87     encoded[4] = CROCKFORD_ALPHABET[(((buf[2] & 0x0F) << 1) | (buf[3] >> 7)) as usize];
     88     encoded[5] = CROCKFORD_ALPHABET[((buf[3] & 0x7C) >> 2) as usize];
     89     encoded[6] = CROCKFORD_ALPHABET[(((buf[3] & 0x03) << 3) | ((buf[4] & 0xE0) >> 5)) as usize];
     90     encoded[7] = CROCKFORD_ALPHABET[(buf[4] & 0x1F) as usize];
     91 }
     92 
     93 /** Batch encode bytes using Crockford's base32 */
     94 #[inline]
     95 fn encode_batch(bytes: &[u8], encoded: &mut [u8]) {
     96     // Check buffer len
     97     assert!(encoded.len() >= encoded_buf_len(bytes.len()));
     98 
     99     // Encode chunks of 5B for 8 chars
    100     for (chunk, encoded) in bytes.chunks(5).zip(encoded.chunks_exact_mut(8)) {
    101         encode_chunk(chunk, encoded);
    102     }
    103 }
    104 
    105 #[derive(Debug, Clone, Copy, PartialEq, Eq, thiserror::Error)]
    106 pub enum Base32Error<const N: usize> {
    107     #[error("invalid Crockford's base32 format")]
    108     Format,
    109     #[error("invalid length expected {N} bytes got {0}")]
    110     Length(usize),
    111 }
    112 
    113 /** Crockford's base32 inverse table, case insentitive and with substitution */
    114 const CROCKFORD_INV: [u8; 256] = {
    115     let mut table = [255; 256];
    116 
    117     // Fill the canonical alphabet
    118     let mut i = 0;
    119     while i < CROCKFORD_ALPHABET.len() {
    120         let b = CROCKFORD_ALPHABET[i];
    121         table[b as usize] = i as u8;
    122         i += 1;
    123     }
    124 
    125     // Add substitution
    126     table[b'O' as usize] = table[b'0' as usize];
    127     table[b'I' as usize] = table[b'1' as usize];
    128     table[b'L' as usize] = table[b'1' as usize];
    129     table[b'U' as usize] = table[b'V' as usize];
    130 
    131     // Make the table case insensitive
    132     let mut i = 0;
    133     while i < CROCKFORD_ALPHABET.len() {
    134         let b = CROCKFORD_ALPHABET[i];
    135         table[b.to_ascii_lowercase() as usize] = table[b as usize];
    136         i += 1;
    137     }
    138 
    139     // Add substitution
    140     table[b'o' as usize] = table[b'0' as usize];
    141     table[b'i' as usize] = table[b'1' as usize];
    142     table[b'l' as usize] = table[b'1' as usize];
    143     table[b'u' as usize] = table[b'v' as usize];
    144 
    145     table
    146 };
    147 
    148 /** Decoded bytes len of Crockford's base32 */
    149 #[inline]
    150 const fn decoded_len(len: usize) -> usize {
    151     len * 5 / 8
    152 }
    153 
    154 /** Buffer bytes len of Crockford's base32 using a batch of 5 bytes */
    155 #[inline]
    156 pub(crate) const fn decoded_buf_len(len: usize) -> usize {
    157     (len / 8 + 1) * 5
    158 }
    159 
    160 /** Decode N bytes from a Crockford's base32 string */
    161 pub fn decode_static<const N: usize>(encoded: &[u8]) -> Result<[u8; N], Base32Error<N>> {
    162     // Check decode length
    163     let output_length = decoded_len(encoded.len());
    164     if output_length != N {
    165         return Err(Base32Error::Length(output_length));
    166     }
    167 
    168     let mut decoded = vec![0u8; decoded_buf_len(encoded.len())]; // TODO use a stack allocated buffer when supported
    169 
    170     if !decode_batch(encoded, &mut decoded) {
    171         return Err(Base32Error::Format);
    172     }
    173     Ok(decoded[..N].try_into().unwrap())
    174 }
    175 
    176 /** Decode bytes from a Crockford's base32 string */
    177 pub fn decode(encoded: impl AsRef<[u8]>) -> Result<Vec<u8>, Base32Error<0>> {
    178     let encoded = encoded.as_ref();
    179     let mut decoded = vec![0u8; decoded_buf_len(encoded.len())];
    180 
    181     if !decode_batch(encoded, &mut decoded) {
    182         return Err(Base32Error::Format);
    183     }
    184     decoded.truncate(decoded_len(encoded.len()));
    185     Ok(decoded)
    186 }
    187 
    188 /** Batch decode bytes using Crockford's base32 */
    189 #[inline]
    190 fn decode_batch(encoded: &[u8], decoded: &mut [u8]) -> bool {
    191     let mut invalid = false;
    192 
    193     // Encode chunks of 8 chars for 5B
    194     for (chunk, decoded) in encoded.chunks(8).zip(decoded.chunks_exact_mut(5)) {
    195         let mut buf = [0; 8];
    196 
    197         // Lookup chunk
    198         for (i, &b) in chunk.iter().enumerate() {
    199             buf[i] = CROCKFORD_INV[b as usize];
    200         }
    201 
    202         // Check chunk validity
    203         invalid |= buf.contains(&255);
    204 
    205         // Decode chunk
    206         decoded[0] = (buf[0] << 3) | (buf[1] >> 2);
    207         decoded[1] = (buf[1] << 6) | (buf[2] << 1) | (buf[3] >> 4);
    208         decoded[2] = (buf[3] << 4) | (buf[4] >> 1);
    209         decoded[3] = (buf[4] << 7) | (buf[5] << 2) | (buf[6] >> 3);
    210         decoded[4] = (buf[6] << 5) | buf[7];
    211     }
    212 
    213     !invalid
    214 }
    215 
    216 #[cfg(test)]
    217 mod test {
    218     use crate::encoding::base32::{Base32Error, decode, encode, fmt};
    219 
    220     #[test]
    221     fn base32() {
    222         // RFC test vectors
    223         for (decoded, encoded) in [
    224             ("", ""),
    225             ("f", "CR"),
    226             ("fo", "CSQG"),
    227             ("foo", "CSQPY"),
    228             ("foob", "CSQPYRG"),
    229             ("fooba", "CSQPYRK1"),
    230             ("foobar", "CSQPYRK1E8"),
    231         ] {
    232             assert_eq!(encode(decoded.as_bytes()), encoded);
    233             assert_eq!(fmt(decoded).to_string(), encoded);
    234             assert_eq!(decode(encoded.as_bytes()).unwrap(), decoded.as_bytes());
    235         }
    236 
    237         // Crockford allows case-insensitive decoding
    238         assert_eq!(decode(b"oilu").unwrap(), decode(b"OILU").unwrap());
    239 
    240         // Crockford remaps ambiguous characters on decode
    241         assert_eq!(decode(b"OILU").unwrap(), decode(b"011V").unwrap());
    242 
    243         // Invalid characters
    244         assert_eq!(decode(b"C!"), Err(Base32Error::Format));
    245         assert_eq!(decode(b"C\x00R"), Err(Base32Error::Format));
    246     }
    247 }