base32.rs (8248B)
1 /* 2 This file is part of TALER 3 Copyright (C) 2024, 2025, 2026 Taler Systems SA 4 5 TALER is free software; you can redistribute it and/or modify it under the 6 terms of the GNU Affero General Public License as published by the Free Software 7 Foundation; either version 3, or (at your option) any later version. 8 9 TALER is distributed in the hope that it will be useful, but WITHOUT ANY 10 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 11 A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. 12 13 You should have received a copy of the GNU Affero General Public License along with 14 TALER; see the file COPYING. If not, see <http://www.gnu.org/licenses/> 15 */ 16 17 use std::fmt::Display; 18 19 pub const CROCKFORD_ALPHABET: &[u8] = b"0123456789ABCDEFGHJKMNPQRSTVWXYZ"; 20 21 /** Encoded bytes len of Crockford's base32 */ 22 #[inline] 23 const fn encoded_len(len: usize) -> usize { 24 (len * 8).div_ceil(5) 25 } 26 27 /** Buffer bytes len of Crockford's base32 using a batch of 8 chars */ 28 #[inline] 29 pub(crate) const fn encoded_buf_len(len: usize) -> usize { 30 (len / 5 + 1) * 8 31 } 32 33 /** Encode bytes using Crockford's base32 */ 34 pub fn encode_static<'a, const N: usize>(bytes: &[u8; N], out: &'a mut [u8]) -> &'a str { 35 // Batch encoded 36 encode_batch(bytes, out); 37 38 // Truncate incomplete ending chunk 39 let truncated = &out[..encoded_len(bytes.len())]; 40 41 // SAFETY: only contains valid ASCII characters from CROCKFORD_ALPHABET 42 unsafe { std::str::from_utf8_unchecked(truncated) } 43 } 44 45 /** Encode bytes using Crockford's base32 */ 46 pub fn encode(bytes: impl AsRef<[u8]>) -> String { 47 let bytes = bytes.as_ref(); 48 let mut buf = vec![0u8; encoded_buf_len(bytes.len())]; 49 // Batch encoded 50 encode_batch(bytes, &mut buf); 51 52 // Truncate incomplete ending chunk 53 buf.truncate(encoded_len(bytes.len())); 54 55 // SAFETY: only contains valid ASCII characters from CROCKFORD_ALPHABET 56 unsafe { std::string::String::from_utf8_unchecked(buf) } 57 } 58 59 /** Format bytes using Crockford's base32 */ 60 pub fn fmt(bytes: impl AsRef<[u8]>) -> impl Display { 61 std::fmt::from_fn(move |f| { 62 for chunk in bytes.as_ref().chunks(5) { 63 let mut out_buf = [0u8; 8]; 64 encode_chunk(chunk, &mut out_buf); 65 66 let n = encoded_len(chunk.len()); 67 // SAFETY: encode_chunk populates out_buf using CROCKFORD_ALPHABET, 68 // which consists of valid ASCII characters. 69 let s = unsafe { std::str::from_utf8_unchecked(&out_buf[..n]) }; 70 f.write_str(s)?; 71 } 72 Ok(()) 73 }) 74 } 75 76 /** Encode a chunk using Crockford's base32 */ 77 #[inline(always)] 78 pub(crate) fn encode_chunk(chunk: &[u8], encoded: &mut [u8]) { 79 let mut buf = [0u8; 5]; 80 for (i, &b) in chunk.iter().enumerate() { 81 buf[i] = b; 82 } 83 encoded[0] = CROCKFORD_ALPHABET[((buf[0] & 0xF8) >> 3) as usize]; 84 encoded[1] = CROCKFORD_ALPHABET[(((buf[0] & 0x07) << 2) | ((buf[1] & 0xC0) >> 6)) as usize]; 85 encoded[2] = CROCKFORD_ALPHABET[((buf[1] & 0x3E) >> 1) as usize]; 86 encoded[3] = CROCKFORD_ALPHABET[(((buf[1] & 0x01) << 4) | ((buf[2] & 0xF0) >> 4)) as usize]; 87 encoded[4] = CROCKFORD_ALPHABET[(((buf[2] & 0x0F) << 1) | (buf[3] >> 7)) as usize]; 88 encoded[5] = CROCKFORD_ALPHABET[((buf[3] & 0x7C) >> 2) as usize]; 89 encoded[6] = CROCKFORD_ALPHABET[(((buf[3] & 0x03) << 3) | ((buf[4] & 0xE0) >> 5)) as usize]; 90 encoded[7] = CROCKFORD_ALPHABET[(buf[4] & 0x1F) as usize]; 91 } 92 93 /** Batch encode bytes using Crockford's base32 */ 94 #[inline] 95 fn encode_batch(bytes: &[u8], encoded: &mut [u8]) { 96 // Check buffer len 97 assert!(encoded.len() >= encoded_buf_len(bytes.len())); 98 99 // Encode chunks of 5B for 8 chars 100 for (chunk, encoded) in bytes.chunks(5).zip(encoded.chunks_exact_mut(8)) { 101 encode_chunk(chunk, encoded); 102 } 103 } 104 105 #[derive(Debug, Clone, Copy, PartialEq, Eq, thiserror::Error)] 106 pub enum Base32Error<const N: usize> { 107 #[error("invalid Crockford's base32 format")] 108 Format, 109 #[error("invalid length expected {N} bytes got {0}")] 110 Length(usize), 111 } 112 113 /** Crockford's base32 inverse table, case insentitive and with substitution */ 114 const CROCKFORD_INV: [u8; 256] = { 115 let mut table = [255; 256]; 116 117 // Fill the canonical alphabet 118 let mut i = 0; 119 while i < CROCKFORD_ALPHABET.len() { 120 let b = CROCKFORD_ALPHABET[i]; 121 table[b as usize] = i as u8; 122 i += 1; 123 } 124 125 // Add substitution 126 table[b'O' as usize] = table[b'0' as usize]; 127 table[b'I' as usize] = table[b'1' as usize]; 128 table[b'L' as usize] = table[b'1' as usize]; 129 table[b'U' as usize] = table[b'V' as usize]; 130 131 // Make the table case insensitive 132 let mut i = 0; 133 while i < CROCKFORD_ALPHABET.len() { 134 let b = CROCKFORD_ALPHABET[i]; 135 table[b.to_ascii_lowercase() as usize] = table[b as usize]; 136 i += 1; 137 } 138 139 // Add substitution 140 table[b'o' as usize] = table[b'0' as usize]; 141 table[b'i' as usize] = table[b'1' as usize]; 142 table[b'l' as usize] = table[b'1' as usize]; 143 table[b'u' as usize] = table[b'v' as usize]; 144 145 table 146 }; 147 148 /** Decoded bytes len of Crockford's base32 */ 149 #[inline] 150 const fn decoded_len(len: usize) -> usize { 151 len * 5 / 8 152 } 153 154 /** Buffer bytes len of Crockford's base32 using a batch of 5 bytes */ 155 #[inline] 156 pub(crate) const fn decoded_buf_len(len: usize) -> usize { 157 (len / 8 + 1) * 5 158 } 159 160 /** Decode N bytes from a Crockford's base32 string */ 161 pub fn decode_static<const N: usize>(encoded: &[u8]) -> Result<[u8; N], Base32Error<N>> { 162 // Check decode length 163 let output_length = decoded_len(encoded.len()); 164 if output_length != N { 165 return Err(Base32Error::Length(output_length)); 166 } 167 168 let mut decoded = vec![0u8; decoded_buf_len(encoded.len())]; // TODO use a stack allocated buffer when supported 169 170 if !decode_batch(encoded, &mut decoded) { 171 return Err(Base32Error::Format); 172 } 173 Ok(decoded[..N].try_into().unwrap()) 174 } 175 176 /** Decode bytes from a Crockford's base32 string */ 177 pub fn decode(encoded: impl AsRef<[u8]>) -> Result<Vec<u8>, Base32Error<0>> { 178 let encoded = encoded.as_ref(); 179 let mut decoded = vec![0u8; decoded_buf_len(encoded.len())]; 180 181 if !decode_batch(encoded, &mut decoded) { 182 return Err(Base32Error::Format); 183 } 184 decoded.truncate(decoded_len(encoded.len())); 185 Ok(decoded) 186 } 187 188 /** Batch decode bytes using Crockford's base32 */ 189 #[inline] 190 fn decode_batch(encoded: &[u8], decoded: &mut [u8]) -> bool { 191 let mut invalid = false; 192 193 // Encode chunks of 8 chars for 5B 194 for (chunk, decoded) in encoded.chunks(8).zip(decoded.chunks_exact_mut(5)) { 195 let mut buf = [0; 8]; 196 197 // Lookup chunk 198 for (i, &b) in chunk.iter().enumerate() { 199 buf[i] = CROCKFORD_INV[b as usize]; 200 } 201 202 // Check chunk validity 203 invalid |= buf.contains(&255); 204 205 // Decode chunk 206 decoded[0] = (buf[0] << 3) | (buf[1] >> 2); 207 decoded[1] = (buf[1] << 6) | (buf[2] << 1) | (buf[3] >> 4); 208 decoded[2] = (buf[3] << 4) | (buf[4] >> 1); 209 decoded[3] = (buf[4] << 7) | (buf[5] << 2) | (buf[6] >> 3); 210 decoded[4] = (buf[6] << 5) | buf[7]; 211 } 212 213 !invalid 214 } 215 216 #[cfg(test)] 217 mod test { 218 use crate::encoding::base32::{Base32Error, decode, encode, fmt}; 219 220 #[test] 221 fn base32() { 222 // RFC test vectors 223 for (decoded, encoded) in [ 224 ("", ""), 225 ("f", "CR"), 226 ("fo", "CSQG"), 227 ("foo", "CSQPY"), 228 ("foob", "CSQPYRG"), 229 ("fooba", "CSQPYRK1"), 230 ("foobar", "CSQPYRK1E8"), 231 ] { 232 assert_eq!(encode(decoded.as_bytes()), encoded); 233 assert_eq!(fmt(decoded).to_string(), encoded); 234 assert_eq!(decode(encoded.as_bytes()).unwrap(), decoded.as_bytes()); 235 } 236 237 // Crockford allows case-insensitive decoding 238 assert_eq!(decode(b"oilu").unwrap(), decode(b"OILU").unwrap()); 239 240 // Crockford remaps ambiguous characters on decode 241 assert_eq!(decode(b"OILU").unwrap(), decode(b"011V").unwrap()); 242 243 // Invalid characters 244 assert_eq!(decode(b"C!"), Err(Base32Error::Format)); 245 assert_eq!(decode(b"C\x00R"), Err(Base32Error::Format)); 246 } 247 }