Notes on Programming: base36 in rust

base36 are a-z and 0-9. This is great when computers and humans are working together as it is easy to spell out. It is also short. I can just do an integer auto-increment and get quite small ids. Here is a reusable rust implementation
const BASE36_CHARS: &[u8; 36] = b"0123456789abcdefghijklmnopqrstuvwxyz";

// Lookup table for fast base36 to digit conversion
const BASE36_DECODE: [u8; 256] = {
    let mut table = [255u8; 256];
    let mut i = 0;
    while i < 10 {
        table[(b'0' + i) as usize] = i;
        i += 1;
    }
    let mut i = 0;
    while i < 26 {
        table[(b'a' + i) as usize] = i + 10;
        i += 1;
    }
    table
};

#[inline(always)]
pub fn u64_to_base36(value: u64) -> String {
    // Specialized implementations for different ranges for maximum performance
    match value {
        0 => "0".to_string(),
        1..=35 => unsafe { String::from_utf8_unchecked(vec![BASE36_CHARS[value as usize]]) },
        36..=1295 => {
            let d1 = (value % 36) as usize;
            let d0 = (value / 36) as usize;
            unsafe { String::from_utf8_unchecked(vec![BASE36_CHARS[d0], BASE36_CHARS[d1]]) }
        }
        1296..=46655 => {
            let mut v = value;
            let d2 = (v % 36) as usize;
            v /= 36;
            let d1 = (v % 36) as usize;
            let d0 = (v / 36) as usize;
            unsafe {
                String::from_utf8_unchecked(vec![
                    BASE36_CHARS[d0],
                    BASE36_CHARS[d1],
                    BASE36_CHARS[d2],
                ])
            }
        }
        _ => {
            // General case for larger numbers
            let mut val = value;
            let mut buffer: [u8; 13] = [0; 13]; // Maximum length of u64 in base36 is 13
            let mut pos = 13;

            // Optimized division loop - compiler can optimize division by constant
            while val > 0 {
                pos -= 1;
                buffer[pos] = BASE36_CHARS[(val % 36) as usize];
                val /= 36;
            }

            // SAFETY: We only write valid ASCII characters
            unsafe { String::from_utf8_unchecked(buffer[pos..].to_vec()) }
        }
    }
}

#[inline(always)]
pub fn base36_to_u64(s: &str) -> Result {
    if s.is_empty() {
        return Err("Empty string".to_string());
    }

    let bytes = s.as_bytes();

    // Fast path for common single digit case
    if bytes.len() == 1 {
        let digit = BASE36_DECODE[bytes[0] as usize];
        if digit == 255 {
            return Err(format!(
                "Invalid character '{}' in base36 string",
                bytes[0] as char
            ));
        }
        return Ok(digit as u64);
    }

    let mut result: u64 = 0;
    let mut i = 0;

    // Process 2 digits at a time when possible for better performance
    while i + 1 < bytes.len() {
        let d0 = BASE36_DECODE[bytes[i] as usize];
        let d1 = BASE36_DECODE[bytes[i + 1] as usize];

        if d0 == 255 || d1 == 255 {
            return Err("Invalid character in base36 string".to_string());
        }

        // result = result * 36^2 + d0 * 36 + d1
        result = match result
            .checked_mul(1296)
            .and_then(|r| r.checked_add((d0 as u64) * 36 + d1 as u64))
        {
            Some(val) => val,
            None => return Err("Overflow while parsing base36 string".to_string()),
        };

        i += 2;
    }

    // Handle remaining single digit
    if i < bytes.len() {
        let digit = BASE36_DECODE[bytes[i] as usize];
        if digit == 255 {
            return Err("Invalid character in base36 string".to_string());
        }

        result = match result
            .checked_mul(36)
            .and_then(|r| r.checked_add(digit as u64))
        {
            Some(val) => val,
            None => return Err("Overflow while parsing base36 string".to_string()),
        };
    }

    Ok(result)
}

/// Encode a u128 value to base36 string (useful for UUIDs which are 128 bits)
#[inline]
pub fn u128_to_base36(value: u128) -> String {
    if value == 0 {
        return "0".to_string();
    }

    // Maximum length of u128 in base36 is 25 characters
    let mut buffer: [u8; 25] = [0; 25];
    let mut pos = 25;
    let mut val = value;

    while val > 0 {
        pos -= 1;
        buffer[pos] = BASE36_CHARS[(val % 36) as usize];
        val /= 36;
    }

    // SAFETY: We only write valid ASCII characters
    unsafe { String::from_utf8_unchecked(buffer[pos..].to_vec()) }
}

/// Decode a base36 string to u128 (useful for UUIDs which are 128 bits)
#[inline]
pub fn base36_to_u128(s: &str) -> Result {
    if s.is_empty() {
        return Err("Empty string".to_string());
    }

    let bytes = s.as_bytes();
    let mut result: u128 = 0;

    for &byte in bytes {
        let digit = BASE36_DECODE[byte as usize];
        if digit == 255 {
            return Err(format!(
                "Invalid character '{}' in base36 string",
                byte as char
            ));
        }

        result = match result
            .checked_mul(36)
            .and_then(|r| r.checked_add(digit as u128))
        {
            Some(val) => val,
            None => return Err("Overflow while parsing base36 string".to_string()),
        };
    }

    Ok(result)
}

/// Encode arbitrary binary data to base36 string
/// Treats the byte slice as a big-endian unsigned integer
#[inline]
pub fn bytes_to_base36(bytes: &[u8]) -> String {
    if bytes.is_empty() || bytes.iter().all(|&b| b == 0) {
        return "0".to_string();
    }

    // Skip leading zeros
    let start = bytes.iter().position(|&b| b != 0).unwrap_or(0);
    let bytes = &bytes[start..];

    if bytes.is_empty() {
        return "0".to_string();
    }

    // For small byte arrays, use u128 fast path
    if bytes.len() <= 16 {
        let mut value: u128 = 0;
        for &byte in bytes {
            value = (value << 8) | (byte as u128);
        }
        return u128_to_base36(value);
    }

    // For larger byte arrays, use arbitrary precision arithmetic
    // We'll work with the bytes directly using schoolbook division
    let mut digits = bytes.to_vec();
    let mut result = Vec::new();

    while !digits.is_empty() && !(digits.len() == 1 && digits[0] == 0) {
        let mut remainder: u16 = 0;
        let mut new_digits = Vec::with_capacity(digits.len());

        for &digit in &digits {
            let current = (remainder << 8) | (digit as u16);
            let quotient = current / 36;
            remainder = current % 36;

            if !new_digits.is_empty() || quotient > 0 {
                new_digits.push(quotient as u8);
            }
        }

        result.push(BASE36_CHARS[remainder as usize]);
        digits = new_digits;
    }

    if result.is_empty() {
        return "0".to_string();
    }

    result.reverse();
    // SAFETY: We only use valid ASCII characters from BASE36_CHARS
    unsafe { String::from_utf8_unchecked(result) }
}

/// Decode a base36 string to binary data
/// Returns the minimal byte representation (no leading zeros)
#[inline]
pub fn base36_to_bytes(s: &str) -> Result, String> {
    if s.is_empty() {
        return Err("Empty string".to_string());
    }

    // For short strings that fit in u128, use fast path
    if s.len() <= 25 {
        let value = base36_to_u128(s)?;
        if value == 0 {
            return Ok(vec![0]);
        }

        // Convert u128 to bytes, removing leading zeros
        let all_bytes = value.to_be_bytes();
        let start = all_bytes.iter().position(|&b| b != 0).unwrap_or(15);
        return Ok(all_bytes[start..].to_vec());
    }

    // For longer strings, use arbitrary precision arithmetic
    let input_bytes = s.as_bytes();

    // Validate all characters first
    for &byte in input_bytes {
        if BASE36_DECODE[byte as usize] == 255 {
            return Err(format!(
                "Invalid character '{}' in base36 string",
                byte as char
            ));
        }
    }

    // Convert base36 digits to a vector
    let mut digits: Vec = input_bytes
        .iter()
        .map(|&b| BASE36_DECODE[b as usize])
        .collect();

    let mut result = Vec::new();

    while !digits.is_empty() && !(digits.len() == 1 && digits[0] == 0) {
        let mut remainder: u16 = 0;
        let mut new_digits = Vec::with_capacity(digits.len());

        for &digit in &digits {
            let current = remainder * 36 + (digit as u16);
            let quotient = current / 256;
            remainder = current % 256;

            if !new_digits.is_empty() || quotient > 0 {
                new_digits.push(quotient as u8);
            }
        }

        result.push(remainder as u8);
        digits = new_digits;
    }

    if result.is_empty() {
        return Ok(vec![0]);
    }

    result.reverse();
    Ok(result)
}

/// Decode a base36 string to a fixed-size byte array
/// Pads with leading zeros if necessary, returns error if result is too large
#[inline]
pub fn base36_to_bytes_fixed(s: &str) -> Result<[u8; N], String> {
    let bytes = base36_to_bytes(s)?;

    if bytes.len() > N {
        return Err(format!(
            "Value too large: {} bytes, expected at most {}",
            bytes.len(),
            N
        ));
    }

    let mut result = [0u8; N];
    let offset = N - bytes.len();
    result[offset..].copy_from_slice(&bytes);
    Ok(result)
}

// Ultra-fast versions without bounds checking for internal use
#[inline(always)]
pub fn u64_to_base36_unchecked(value: u64) -> String {
    // Use the same optimized approach but without error checking
    u64_to_base36(value) // Safe to call since we know input is valid
}

#[inline(always)]
pub fn base36_to_u64_unchecked(s: &str) -> u64 {
    let bytes = s.as_bytes();

    // Unroll for common short cases
    match bytes.len() {
        1 => BASE36_DECODE[bytes[0] as usize] as u64,
        2 => {
            let d0 = BASE36_DECODE[bytes[0] as usize] as u64;
            let d1 = BASE36_DECODE[bytes[1] as usize] as u64;
            d0 * 36 + d1
        }
        3 => {
            let d0 = BASE36_DECODE[bytes[0] as usize] as u64;
            let d1 = BASE36_DECODE[bytes[1] as usize] as u64;
            let d2 = BASE36_DECODE[bytes[2] as usize] as u64;
            (d0 * 36 + d1) * 36 + d2
        }
        _ => {
            let mut result: u64 = 0;
            for &byte in bytes {
                result = result * 36 + (BASE36_DECODE[byte as usize] as u64);
            }
            result
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_u64_to_base36() {
        assert_eq!(u64_to_base36(0), "0");
        assert_eq!(u64_to_base36(1), "1");
        assert_eq!(u64_to_base36(35), "z");
        assert_eq!(u64_to_base36(36), "10");
        assert_eq!(u64_to_base36(1296), "100");
        assert_eq!(u64_to_base36(46656), "1000");
    }

    #[test]
    fn test_base36_to_u64() {
        assert_eq!(base36_to_u64("0").unwrap(), 0);
        assert_eq!(base36_to_u64("1").unwrap(), 1);
        assert_eq!(base36_to_u64("z").unwrap(), 35);
        assert_eq!(base36_to_u64("10").unwrap(), 36);
        assert_eq!(base36_to_u64("100").unwrap(), 1296);
        assert_eq!(base36_to_u64("1000").unwrap(), 46656);
    }

    #[test]
    fn test_base36_to_u64_errors() {
        assert!(base36_to_u64("").is_err());
        assert!(base36_to_u64("Z").is_err());
        assert!(base36_to_u64("!").is_err());
        assert!(base36_to_u64("abc123xyz!").is_err());
    }

    #[test]
    fn test_roundtrip_small_values() {
        for i in 0..1000 {
            let base36 = u64_to_base36(i);
            let back = base36_to_u64(&base36).unwrap();
            assert_eq!(i, back, "Roundtrip failed for {i}");
        }
    }

    #[test]
    fn test_roundtrip_edge_cases() {
        let test_values = vec![
            0,
            1,
            35,
            36,
            1295,
            1296,
            46655,
            46656,
            u64::MAX / 2,
            u64::MAX - 1,
            u64::MAX,
        ];

        for value in test_values {
            let base36 = u64_to_base36(value);
            let back = base36_to_u64(&base36).unwrap();
            assert_eq!(value, back, "Roundtrip failed for {value}");
        }
    }

    #[test]
    fn test_roundtrip_random_samples() {
        let test_values = vec![
            123456789,
            987654321,
            1_000_000_000,
            10_000_000_000,
            100_000_000_000,
            1_000_000_000_000,
        ];

        for value in test_values {
            let base36 = u64_to_base36(value);
            let back = base36_to_u64(&base36).unwrap();
            assert_eq!(value, back, "Roundtrip failed for {value}");
        }
    }

    #[test]
    fn test_max_u64_base36() {
        let max_base36 = u64_to_base36(u64::MAX);
        assert_eq!(max_base36, "3w5e11264sgsf");
        assert_eq!(base36_to_u64(&max_base36).unwrap(), u64::MAX);
    }

    #[test]
    fn test_overflow_detection() {
        assert!(base36_to_u64("3w5e11264sgsg").is_err());
        assert!(base36_to_u64("zzzzzzzzzzzzzz").is_err());
    }

    #[test]
    fn test_unchecked_versions() {
        // Test unchecked versions match checked versions for valid inputs
        for i in 0..10000 {
            assert_eq!(u64_to_base36(i), u64_to_base36_unchecked(i));
            let s = u64_to_base36(i);
            assert_eq!(base36_to_u64(&s).unwrap(), base36_to_u64_unchecked(&s));
        }
    }

    #[test]
    fn test_u64_to_base36_specific_ranges() {
        // Test value 0 (line 23)
        assert_eq!(u64_to_base36(0), "0");

        // Test single digit range 1..=35 (lines 24)
        assert_eq!(u64_to_base36(1), "1");
        assert_eq!(u64_to_base36(35), "z");

        // Test two digit range 36..=1295 (lines 25-29)
        assert_eq!(u64_to_base36(36), "10");
        assert_eq!(u64_to_base36(1295), "zz");
        assert_eq!(u64_to_base36(100), "2s");

        // Test three digit range 1296..=46655 (lines 30-42)
        assert_eq!(u64_to_base36(1296), "100");
        assert_eq!(u64_to_base36(46655), "zzz");
        assert_eq!(u64_to_base36(5000), "3uw");

        // Test general case _ (lines 44-59)
        assert_eq!(u64_to_base36(46656), "1000");
        assert_eq!(u64_to_base36(1000000), "lfls");
        assert_eq!(u64_to_base36(u64::MAX), "3w5e11264sgsf");
    }

    #[test]
    fn test_base36_to_u64_specific_cases() {
        // Test empty string error (lines 65-67)
        match base36_to_u64("") {
            Err(msg) => assert_eq!(msg, "Empty string"),
            Ok(_) => panic!("Should have failed"),
        }

        // Test single digit path (lines 72-80)
        assert_eq!(base36_to_u64("0").unwrap(), 0);
        assert_eq!(base36_to_u64("9").unwrap(), 9);
        assert_eq!(base36_to_u64("a").unwrap(), 10);
        assert_eq!(base36_to_u64("z").unwrap(), 35);

        // Test invalid single character (lines 74-78)
        match base36_to_u64("@") {
            Err(msg) => assert!(msg.contains("Invalid character")),
            Ok(_) => panic!("Should have failed"),
        }

        // Test two digit processing (lines 87-104)
        assert_eq!(base36_to_u64("10").unwrap(), 36);
        assert_eq!(base36_to_u64("zz").unwrap(), 1295);

        // Test invalid character in two digit (lines 91-93)
        match base36_to_u64("a@") {
            Err(msg) => assert_eq!(msg, "Invalid character in base36 string"),
            Ok(_) => panic!("Should have failed"),
        }

        // Test overflow in multiplication (lines 96-102)
        let overflow_str = "zzzzzzzzzzzzzzzzzzzz"; // Very long string
        match base36_to_u64(overflow_str) {
            Err(msg) => assert_eq!(msg, "Overflow while parsing base36 string"),
            Ok(_) => panic!("Should have failed"),
        }

        // Test single remaining digit (lines 108-123)
        assert_eq!(base36_to_u64("abc").unwrap(), 13368); // 10*36*36 + 11*36 + 12

        // Test invalid remaining digit (lines 110-112)
        match base36_to_u64("ab@") {
            Err(msg) => assert_eq!(msg, "Invalid character in base36 string"),
            Ok(_) => panic!("Should have failed"),
        }
    }

    #[test]
    fn test_base36_to_u64_unchecked_specific_cases() {
        // Test 1 character case (line 139)
        assert_eq!(base36_to_u64_unchecked("0"), 0);
        assert_eq!(base36_to_u64_unchecked("z"), 35);

        // Test 2 character case (lines 140-143)
        assert_eq!(base36_to_u64_unchecked("10"), 36);
        assert_eq!(base36_to_u64_unchecked("zz"), 1295);

        // Test 3 character case (lines 145-149)
        assert_eq!(base36_to_u64_unchecked("100"), 1296);
        assert_eq!(base36_to_u64_unchecked("zzz"), 46655);

        // Test general case (lines 151-156)
        assert_eq!(base36_to_u64_unchecked("1000"), 46656);
        assert_eq!(base36_to_u64_unchecked("abc123"), 623698779);
    }

    #[test]
    fn test_u64_to_base36_unchecked_coverage() {
        // Test that unchecked version calls the regular version (line 130)
        assert_eq!(u64_to_base36_unchecked(12345), u64_to_base36(12345));
        assert_eq!(u64_to_base36_unchecked(0), "0");
        assert_eq!(u64_to_base36_unchecked(u64::MAX), "3w5e11264sgsf");
    }

    #[test]
    fn test_lookup_table_bounds() {
        // Test that our lookup table handles all possible byte values
        for i in 0..=255 {
            let val = BASE36_DECODE[i];
            if i >= b'0' as usize && i <= b'9' as usize {
                assert_eq!(val, (i - b'0' as usize) as u8);
            } else if i >= b'a' as usize && i <= b'z' as usize {
                assert_eq!(val, (i - b'a' as usize + 10) as u8);
            } else {
                assert_eq!(val, 255);
            }
        }
    }

    #[test]
    fn test_base36_chars_array() {
        // Test that BASE36_CHARS contains expected characters
        assert_eq!(BASE36_CHARS[0], b'0');
        assert_eq!(BASE36_CHARS[9], b'9');
        assert_eq!(BASE36_CHARS[10], b'a');
        assert_eq!(BASE36_CHARS[35], b'z');
        assert_eq!(BASE36_CHARS.len(), 36);
    }

    #[test]
    fn test_base36_decode_lookup_table_comprehensive() {
        // Test that BASE36_DECODE lookup table is correctly initialized
        // Test digits 0-9
        for i in 0..10 {
            let char_byte = b'0' + i;
            assert_eq!(BASE36_DECODE[char_byte as usize], i);
        }

        // Test letters a-z
        for i in 0..26 {
            let char_byte = b'a' + i;
            assert_eq!(BASE36_DECODE[char_byte as usize], i + 10);
        }

        // Test that uppercase letters are invalid (should be 255)
        for i in 0..26 {
            let char_byte = b'A' + i;
            assert_eq!(BASE36_DECODE[char_byte as usize], 255);
        }

        // Test that other characters are invalid (should be 255)
        assert_eq!(BASE36_DECODE[b'@' as usize], 255);
        assert_eq!(BASE36_DECODE[b'[' as usize], 255);
        assert_eq!(BASE36_DECODE[b'`' as usize], 255);
        assert_eq!(BASE36_DECODE[b'{' as usize], 255);
        assert_eq!(BASE36_DECODE[255], 255);
    }

    #[test]
    fn test_u64_to_base36_boundary_values() {
        // Test exact boundary values for the optimized ranges
        assert_eq!(u64_to_base36(0), "0");
        assert_eq!(u64_to_base36(1), "1");
        assert_eq!(u64_to_base36(35), "z");
        assert_eq!(u64_to_base36(36), "10");
        assert_eq!(u64_to_base36(1295), "zz");
        assert_eq!(u64_to_base36(1296), "100");
        assert_eq!(u64_to_base36(46655), "zzz");
        assert_eq!(u64_to_base36(46656), "1000");
    }

    #[test]
    fn test_base36_to_u64_single_digit_edge_cases() {
        // Test all valid single digits
        for i in 0..36 {
            let base36_char = BASE36_CHARS[i] as char;
            let base36_string = base36_char.to_string();
            let result = base36_to_u64(&base36_string).unwrap();
            assert_eq!(result, i as u64);
        }
    }

    #[test]
    fn test_base36_to_u64_two_digit_processing() {
        // Test the two-digit processing path (lines 87-104)
        assert_eq!(base36_to_u64("10").unwrap(), 36);
        assert_eq!(base36_to_u64("11").unwrap(), 37);
        assert_eq!(base36_to_u64("zz").unwrap(), 1295);
        assert_eq!(base36_to_u64("az").unwrap(), 10 * 36 + 35);
        assert_eq!(base36_to_u64("za").unwrap(), 35 * 36 + 10);
    }

    #[test]
    fn test_base36_to_u64_overflow_boundary() {
        // Test values near the overflow boundary
        let max_valid = u64_to_base36(u64::MAX);
        assert_eq!(base36_to_u64(&max_valid).unwrap(), u64::MAX);

        // Test a string that should cause overflow
        let overflow_str = "zzzzzzzzzzzzzzzz"; // 16 z's - much larger than u64::MAX
        match base36_to_u64(overflow_str) {
            Err(msg) => assert_eq!(msg, "Overflow while parsing base36 string"),
            Ok(_) => panic!("Should have overflowed"),
        }
    }

    #[test]
    fn test_base36_to_u64_invalid_characters() {
        // Test various invalid characters
        let invalid_cases = vec![
            ("A", "Invalid character 'A' in base36 string"),
            ("Z", "Invalid character 'Z' in base36 string"),
            ("@", "Invalid character '@' in base36 string"),
            ("[", "Invalid character '[' in base36 string"),
            ("`", "Invalid character '`' in base36 string"),
            ("{", "Invalid character '{' in base36 string"),
            ("!", "Invalid character '!' in base36 string"),
            ("a!", "Invalid character in base36 string"),
            ("!a", "Invalid character in base36 string"),
            ("a@b", "Invalid character in base36 string"),
        ];

        for (input, expected_error) in invalid_cases {
            match base36_to_u64(input) {
                Err(msg) => {
                    if expected_error.contains("Invalid character '") {
                        assert!(msg.starts_with("Invalid character"));
                    } else {
                        assert_eq!(msg, expected_error);
                    }
                }
                Ok(_) => panic!("Should have failed for input: {input}"),
            }
        }
    }

    #[test]
    fn test_base36_to_u64_odd_length_strings() {
        // Test strings with odd lengths to ensure the remaining digit handling works
        assert_eq!(base36_to_u64("1").unwrap(), 1);
        assert_eq!(base36_to_u64("123").unwrap(), 36 * 36 + 2 * 36 + 3);
        assert_eq!(
            base36_to_u64("12345").unwrap(),
            36 * 36 * 36 * 36 + 2 * 36 * 36 * 36 + 3 * 36 * 36 + 4 * 36 + 5
        );
    }

    #[test]
    fn test_base36_to_u64_unchecked_specific_lengths() {
        // Test all the specific length cases in base36_to_u64_unchecked
        // 1 character (line 139)
        assert_eq!(base36_to_u64_unchecked("5"), 5);
        assert_eq!(base36_to_u64_unchecked("z"), 35);

        // 2 characters (lines 140-143)
        assert_eq!(base36_to_u64_unchecked("10"), 36);
        assert_eq!(base36_to_u64_unchecked("zz"), 1295);

        // 3 characters (lines 145-149)
        assert_eq!(base36_to_u64_unchecked("100"), 1296);
        assert_eq!(base36_to_u64_unchecked("zzz"), 46655);

        // 4+ characters (lines 151-156)
        assert_eq!(base36_to_u64_unchecked("1000"), 46656);
        assert_eq!(
            base36_to_u64_unchecked("abcd"),
            10 * 36 * 36 * 36 + 11 * 36 * 36 + 12 * 36 + 13
        );
    }

    #[test]
    fn test_performance_comparison() {
        // Test that checked and unchecked versions produce the same results
        let test_values = vec![0, 1, 35, 36, 1000, 46656, 1000000];

        for value in test_values {
            let base36_str = u64_to_base36(value);
            assert_eq!(u64_to_base36_unchecked(value), base36_str);
            assert_eq!(
                base36_to_u64(&base36_str).unwrap(),
                base36_to_u64_unchecked(&base36_str)
            );
        }
    }

    #[test]
    fn test_edge_case_error_messages() {
        // Test specific error message formatting
        match base36_to_u64("@") {
            Err(msg) => assert!(msg.contains("Invalid character '@' in base36 string")),
            Ok(_) => panic!("Should have failed"),
        }

        // Test empty string error message
        match base36_to_u64("") {
            Err(msg) => assert_eq!(msg, "Empty string"),
            Ok(_) => panic!("Should have failed"),
        }
    }

    #[test]
    fn test_large_string_handling() {
        // Test handling of strings that would definitely overflow
        let very_large_string = "z".repeat(20);
        match base36_to_u64(&very_large_string) {
            Err(msg) => assert_eq!(msg, "Overflow while parsing base36 string"),
            Ok(_) => panic!("Should have overflowed"),
        }
    }

    #[test]
    fn test_buffer_usage_in_general_case() {
        // Test values that use the general case buffer (> 46655)
        let large_values = vec![100000, 1000000, 10000000, 100000000, 1000000000];

        for value in large_values {
            let base36_str = u64_to_base36(value);
            let decoded = base36_to_u64(&base36_str).unwrap();
            assert_eq!(decoded, value);
            // Verify the string is reasonable length (should be <= 13 for u64::MAX)
            assert!(base36_str.len() <= 13);
        }
    }

    #[test]
    fn test_u128_to_base36() {
        assert_eq!(u128_to_base36(0), "0");
        assert_eq!(u128_to_base36(1), "1");
        assert_eq!(u128_to_base36(35), "z");
        assert_eq!(u128_to_base36(36), "10");
        assert_eq!(u128_to_base36(u64::MAX as u128), "3w5e11264sgsf");
        // Test value larger than u64::MAX
        let large_value: u128 = (u64::MAX as u128) * 2;
        let encoded = u128_to_base36(large_value);
        let decoded = base36_to_u128(&encoded).unwrap();
        assert_eq!(decoded, large_value);
    }

    #[test]
    fn test_base36_to_u128() {
        assert_eq!(base36_to_u128("0").unwrap(), 0);
        assert_eq!(base36_to_u128("1").unwrap(), 1);
        assert_eq!(base36_to_u128("z").unwrap(), 35);
        assert_eq!(base36_to_u128("10").unwrap(), 36);
        assert_eq!(base36_to_u128("3w5e11264sgsf").unwrap(), u64::MAX as u128);
    }

    #[test]
    fn test_u128_roundtrip() {
        let test_values: Vec = vec![
            0,
            1,
            35,
            36,
            u64::MAX as u128,
            u64::MAX as u128 + 1,
            u128::MAX / 2,
            u128::MAX - 1,
            u128::MAX,
        ];

        for value in test_values {
            let encoded = u128_to_base36(value);
            let decoded = base36_to_u128(&encoded).unwrap();
            assert_eq!(decoded, value, "Roundtrip failed for {value}");
        }
    }

    #[test]
    fn test_bytes_to_base36() {
        // Empty and zero cases
        assert_eq!(bytes_to_base36(&[]), "0");
        assert_eq!(bytes_to_base36(&[0]), "0");
        assert_eq!(bytes_to_base36(&[0, 0, 0]), "0");

        // Single byte values
        assert_eq!(bytes_to_base36(&[1]), "1");
        assert_eq!(bytes_to_base36(&[35]), "z");
        assert_eq!(bytes_to_base36(&[36]), "10");
        assert_eq!(bytes_to_base36(&[255]), "73"); // 255 in base36

        // Multi-byte values
        assert_eq!(bytes_to_base36(&[1, 0]), "74"); // 256 in base36
        assert_eq!(bytes_to_base36(&[0, 1, 0]), "74"); // Leading zeros ignored

        // Known value: 0xDEADBEEF
        assert_eq!(bytes_to_base36(&[0xDE, 0xAD, 0xBE, 0xEF]), "1ps9wxb");
    }

    #[test]
    fn test_base36_to_bytes() {
        assert_eq!(base36_to_bytes("0").unwrap(), vec![0]);
        assert_eq!(base36_to_bytes("1").unwrap(), vec![1]);
        assert_eq!(base36_to_bytes("z").unwrap(), vec![35]);
        assert_eq!(base36_to_bytes("10").unwrap(), vec![36]);
        assert_eq!(base36_to_bytes("73").unwrap(), vec![255]);
        assert_eq!(base36_to_bytes("74").unwrap(), vec![1, 0]); // 256

        // Known value: 0xDEADBEEF
        assert_eq!(
            base36_to_bytes("1ps9wxb").unwrap(),
            vec![0xDE, 0xAD, 0xBE, 0xEF]
        );
    }

    #[test]
    fn test_bytes_roundtrip() {
        let test_cases: Vec> = vec![
            vec![0],
            vec![1],
            vec![255],
            vec![1, 0],
            vec![255, 255],
            vec![0xDE, 0xAD, 0xBE, 0xEF],
            vec![1, 2, 3, 4, 5, 6, 7, 8],
            vec![255; 16], // 16 bytes of 0xFF
        ];

        for bytes in test_cases {
            let encoded = bytes_to_base36(&bytes);
            let decoded = base36_to_bytes(&encoded).unwrap();
            // Skip leading zeros in original for comparison
            let start = bytes.iter().position(|&b| b != 0).unwrap_or(bytes.len() - 1);
            let expected = if start == bytes.len() {
                vec![0]
            } else {
                bytes[start..].to_vec()
            };
            assert_eq!(decoded, expected, "Roundtrip failed for {bytes:?}");
        }
    }

    #[test]
    fn test_base36_to_bytes_fixed() {
        // Test fixed-size decoding with padding
        let result: [u8; 4] = base36_to_bytes_fixed("1").unwrap();
        assert_eq!(result, [0, 0, 0, 1]);

        let result: [u8; 4] = base36_to_bytes_fixed("1ps9wxb").unwrap();
        assert_eq!(result, [0xDE, 0xAD, 0xBE, 0xEF]);

        // Test error when value is too large
        let result: Result<[u8; 2], String> = base36_to_bytes_fixed("1ps9wxb");
        assert!(result.is_err());
    }

    #[test]
    fn test_uuid_v4_encode_decode() {
        // UUID v4 example: 550e8400-e29b-41d4-a716-446655440000
        // As bytes (big-endian): [0x55, 0x0e, 0x84, 0x00, 0xe2, 0x9b, 0x41, 0xd4,
        //                         0xa7, 0x16, 0x44, 0x66, 0x55, 0x44, 0x00, 0x00]
        let uuid_bytes: [u8; 16] = [
            0x55, 0x0e, 0x84, 0x00, 0xe2, 0x9b, 0x41, 0xd4, 0xa7, 0x16, 0x44, 0x66, 0x55, 0x44,
            0x00, 0x00,
        ];

        // Encode to base36
        let encoded = bytes_to_base36(&uuid_bytes);

        // Decode back to bytes
        let decoded: [u8; 16] = base36_to_bytes_fixed(&encoded).unwrap();

        assert_eq!(decoded, uuid_bytes);

        // Also test via u128 path
        let uuid_as_u128 = u128::from_be_bytes(uuid_bytes);
        let encoded_u128 = u128_to_base36(uuid_as_u128);
        let decoded_u128 = base36_to_u128(&encoded_u128).unwrap();

        assert_eq!(decoded_u128, uuid_as_u128);
        assert_eq!(decoded_u128.to_be_bytes(), uuid_bytes);

        // Verify both methods produce the same encoding
        assert_eq!(encoded, encoded_u128);
    }

    #[test]
    fn test_uuid_v4_random_samples() {
        // Test several UUID v4 patterns
        // UUID v4 has version 4 in bits 12-15 of time_hi_and_version (byte 6)
        // and variant bits 10xx in byte 8
        let uuid_samples: Vec<[u8; 16]> = vec![
            // Standard UUID v4 format
            [
                0x6b, 0xa7, 0xb8, 0x10, 0x9d, 0xad, 0x41, 0xd2, 0x80, 0xb4, 0x00, 0xc0, 0x4f, 0xd4,
                0x30, 0xc8,
            ],
            // All zeros except version/variant bits
            [
                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
                0x00, 0x00,
            ],
            // Near max values
            [
                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x4f, 0xff, 0xbf, 0xff, 0xff, 0xff, 0xff, 0xff,
                0xff, 0xff,
            ],
        ];

        for uuid_bytes in uuid_samples {
            // Test bytes_to_base36 / base36_to_bytes_fixed roundtrip
            let encoded = bytes_to_base36(&uuid_bytes);
            let decoded: [u8; 16] = base36_to_bytes_fixed(&encoded).unwrap();
            assert_eq!(decoded, uuid_bytes);

            // Test u128_to_base36 / base36_to_u128 roundtrip
            let uuid_u128 = u128::from_be_bytes(uuid_bytes);
            let encoded_u128 = u128_to_base36(uuid_u128);
            let decoded_u128 = base36_to_u128(&encoded_u128).unwrap();
            assert_eq!(decoded_u128.to_be_bytes(), uuid_bytes);

            // Verify encoding is compact (UUID should be ~25 chars max in base36)
            assert!(encoded.len() <= 25);
        }
    }

    #[test]
    fn test_uuid_v4_max_value() {
        // Maximum possible UUID value (all 0xFF)
        let max_uuid: [u8; 16] = [0xff; 16];

        let encoded = bytes_to_base36(&max_uuid);
        let decoded: [u8; 16] = base36_to_bytes_fixed(&encoded).unwrap();
        assert_eq!(decoded, max_uuid);

        // Verify via u128
        let max_u128 = u128::MAX;
        assert_eq!(u128::from_be_bytes(max_uuid), max_u128);
        let encoded_u128 = u128_to_base36(max_u128);
        assert_eq!(encoded, encoded_u128);
    }

    #[test]
    fn test_uuid_v4_min_nonzero() {
        // Minimum non-zero UUID (just 1 in the last byte)
        let min_uuid: [u8; 16] = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1];

        let encoded = bytes_to_base36(&min_uuid);
        assert_eq!(encoded, "1");

        let decoded: [u8; 16] = base36_to_bytes_fixed(&encoded).unwrap();
        assert_eq!(decoded, min_uuid);
    }
}
Notes on Programming

Monday, 2 February 2026

base36 in rust

No comments:

Post a Comment

Iterative Mona Lisa E2E development

About Me

Blog Archive