base36 are a-z and 0-9. This is great when computers and humans are working together as it is easy to spell out. It is also short. I can just do an integer auto-increment and get quite small ids. Here is a reusable rust implementation
const BASE36_CHARS: &[u8; 36] = b"0123456789abcdefghijklmnopqrstuvwxyz";
// Lookup table for fast base36 to digit conversion
const BASE36_DECODE: [u8; 256] = {
let mut table = [255u8; 256];
let mut i = 0;
while i < 10 {
table[(b'0' + i) as usize] = i;
i += 1;
}
let mut i = 0;
while i < 26 {
table[(b'a' + i) as usize] = i + 10;
i += 1;
}
table
};
#[inline(always)]
pub fn u64_to_base36(value: u64) -> String {
// Specialized implementations for different ranges for maximum performance
match value {
0 => "0".to_string(),
1..=35 => unsafe { String::from_utf8_unchecked(vec![BASE36_CHARS[value as usize]]) },
36..=1295 => {
let d1 = (value % 36) as usize;
let d0 = (value / 36) as usize;
unsafe { String::from_utf8_unchecked(vec![BASE36_CHARS[d0], BASE36_CHARS[d1]]) }
}
1296..=46655 => {
let mut v = value;
let d2 = (v % 36) as usize;
v /= 36;
let d1 = (v % 36) as usize;
let d0 = (v / 36) as usize;
unsafe {
String::from_utf8_unchecked(vec![
BASE36_CHARS[d0],
BASE36_CHARS[d1],
BASE36_CHARS[d2],
])
}
}
_ => {
// General case for larger numbers
let mut val = value;
let mut buffer: [u8; 13] = [0; 13]; // Maximum length of u64 in base36 is 13
let mut pos = 13;
// Optimized division loop - compiler can optimize division by constant
while val > 0 {
pos -= 1;
buffer[pos] = BASE36_CHARS[(val % 36) as usize];
val /= 36;
}
// SAFETY: We only write valid ASCII characters
unsafe { String::from_utf8_unchecked(buffer[pos..].to_vec()) }
}
}
}
#[inline(always)]
pub fn base36_to_u64(s: &str) -> Result {
if s.is_empty() {
return Err("Empty string".to_string());
}
let bytes = s.as_bytes();
// Fast path for common single digit case
if bytes.len() == 1 {
let digit = BASE36_DECODE[bytes[0] as usize];
if digit == 255 {
return Err(format!(
"Invalid character '{}' in base36 string",
bytes[0] as char
));
}
return Ok(digit as u64);
}
let mut result: u64 = 0;
let mut i = 0;
// Process 2 digits at a time when possible for better performance
while i + 1 < bytes.len() {
let d0 = BASE36_DECODE[bytes[i] as usize];
let d1 = BASE36_DECODE[bytes[i + 1] as usize];
if d0 == 255 || d1 == 255 {
return Err("Invalid character in base36 string".to_string());
}
// result = result * 36^2 + d0 * 36 + d1
result = match result
.checked_mul(1296)
.and_then(|r| r.checked_add((d0 as u64) * 36 + d1 as u64))
{
Some(val) => val,
None => return Err("Overflow while parsing base36 string".to_string()),
};
i += 2;
}
// Handle remaining single digit
if i < bytes.len() {
let digit = BASE36_DECODE[bytes[i] as usize];
if digit == 255 {
return Err("Invalid character in base36 string".to_string());
}
result = match result
.checked_mul(36)
.and_then(|r| r.checked_add(digit as u64))
{
Some(val) => val,
None => return Err("Overflow while parsing base36 string".to_string()),
};
}
Ok(result)
}
/// Encode a u128 value to base36 string (useful for UUIDs which are 128 bits)
#[inline]
pub fn u128_to_base36(value: u128) -> String {
if value == 0 {
return "0".to_string();
}
// Maximum length of u128 in base36 is 25 characters
let mut buffer: [u8; 25] = [0; 25];
let mut pos = 25;
let mut val = value;
while val > 0 {
pos -= 1;
buffer[pos] = BASE36_CHARS[(val % 36) as usize];
val /= 36;
}
// SAFETY: We only write valid ASCII characters
unsafe { String::from_utf8_unchecked(buffer[pos..].to_vec()) }
}
/// Decode a base36 string to u128 (useful for UUIDs which are 128 bits)
#[inline]
pub fn base36_to_u128(s: &str) -> Result {
if s.is_empty() {
return Err("Empty string".to_string());
}
let bytes = s.as_bytes();
let mut result: u128 = 0;
for &byte in bytes {
let digit = BASE36_DECODE[byte as usize];
if digit == 255 {
return Err(format!(
"Invalid character '{}' in base36 string",
byte as char
));
}
result = match result
.checked_mul(36)
.and_then(|r| r.checked_add(digit as u128))
{
Some(val) => val,
None => return Err("Overflow while parsing base36 string".to_string()),
};
}
Ok(result)
}
/// Encode arbitrary binary data to base36 string
/// Treats the byte slice as a big-endian unsigned integer
#[inline]
pub fn bytes_to_base36(bytes: &[u8]) -> String {
if bytes.is_empty() || bytes.iter().all(|&b| b == 0) {
return "0".to_string();
}
// Skip leading zeros
let start = bytes.iter().position(|&b| b != 0).unwrap_or(0);
let bytes = &bytes[start..];
if bytes.is_empty() {
return "0".to_string();
}
// For small byte arrays, use u128 fast path
if bytes.len() <= 16 {
let mut value: u128 = 0;
for &byte in bytes {
value = (value << 8) | (byte as u128);
}
return u128_to_base36(value);
}
// For larger byte arrays, use arbitrary precision arithmetic
// We'll work with the bytes directly using schoolbook division
let mut digits = bytes.to_vec();
let mut result = Vec::new();
while !digits.is_empty() && !(digits.len() == 1 && digits[0] == 0) {
let mut remainder: u16 = 0;
let mut new_digits = Vec::with_capacity(digits.len());
for &digit in &digits {
let current = (remainder << 8) | (digit as u16);
let quotient = current / 36;
remainder = current % 36;
if !new_digits.is_empty() || quotient > 0 {
new_digits.push(quotient as u8);
}
}
result.push(BASE36_CHARS[remainder as usize]);
digits = new_digits;
}
if result.is_empty() {
return "0".to_string();
}
result.reverse();
// SAFETY: We only use valid ASCII characters from BASE36_CHARS
unsafe { String::from_utf8_unchecked(result) }
}
/// Decode a base36 string to binary data
/// Returns the minimal byte representation (no leading zeros)
#[inline]
pub fn base36_to_bytes(s: &str) -> Result, String> {
if s.is_empty() {
return Err("Empty string".to_string());
}
// For short strings that fit in u128, use fast path
if s.len() <= 25 {
let value = base36_to_u128(s)?;
if value == 0 {
return Ok(vec![0]);
}
// Convert u128 to bytes, removing leading zeros
let all_bytes = value.to_be_bytes();
let start = all_bytes.iter().position(|&b| b != 0).unwrap_or(15);
return Ok(all_bytes[start..].to_vec());
}
// For longer strings, use arbitrary precision arithmetic
let input_bytes = s.as_bytes();
// Validate all characters first
for &byte in input_bytes {
if BASE36_DECODE[byte as usize] == 255 {
return Err(format!(
"Invalid character '{}' in base36 string",
byte as char
));
}
}
// Convert base36 digits to a vector
let mut digits: Vec = input_bytes
.iter()
.map(|&b| BASE36_DECODE[b as usize])
.collect();
let mut result = Vec::new();
while !digits.is_empty() && !(digits.len() == 1 && digits[0] == 0) {
let mut remainder: u16 = 0;
let mut new_digits = Vec::with_capacity(digits.len());
for &digit in &digits {
let current = remainder * 36 + (digit as u16);
let quotient = current / 256;
remainder = current % 256;
if !new_digits.is_empty() || quotient > 0 {
new_digits.push(quotient as u8);
}
}
result.push(remainder as u8);
digits = new_digits;
}
if result.is_empty() {
return Ok(vec![0]);
}
result.reverse();
Ok(result)
}
/// Decode a base36 string to a fixed-size byte array
/// Pads with leading zeros if necessary, returns error if result is too large
#[inline]
pub fn base36_to_bytes_fixed(s: &str) -> Result<[u8; N], String> {
let bytes = base36_to_bytes(s)?;
if bytes.len() > N {
return Err(format!(
"Value too large: {} bytes, expected at most {}",
bytes.len(),
N
));
}
let mut result = [0u8; N];
let offset = N - bytes.len();
result[offset..].copy_from_slice(&bytes);
Ok(result)
}
// Ultra-fast versions without bounds checking for internal use
#[inline(always)]
pub fn u64_to_base36_unchecked(value: u64) -> String {
// Use the same optimized approach but without error checking
u64_to_base36(value) // Safe to call since we know input is valid
}
#[inline(always)]
pub fn base36_to_u64_unchecked(s: &str) -> u64 {
let bytes = s.as_bytes();
// Unroll for common short cases
match bytes.len() {
1 => BASE36_DECODE[bytes[0] as usize] as u64,
2 => {
let d0 = BASE36_DECODE[bytes[0] as usize] as u64;
let d1 = BASE36_DECODE[bytes[1] as usize] as u64;
d0 * 36 + d1
}
3 => {
let d0 = BASE36_DECODE[bytes[0] as usize] as u64;
let d1 = BASE36_DECODE[bytes[1] as usize] as u64;
let d2 = BASE36_DECODE[bytes[2] as usize] as u64;
(d0 * 36 + d1) * 36 + d2
}
_ => {
let mut result: u64 = 0;
for &byte in bytes {
result = result * 36 + (BASE36_DECODE[byte as usize] as u64);
}
result
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_u64_to_base36() {
assert_eq!(u64_to_base36(0), "0");
assert_eq!(u64_to_base36(1), "1");
assert_eq!(u64_to_base36(35), "z");
assert_eq!(u64_to_base36(36), "10");
assert_eq!(u64_to_base36(1296), "100");
assert_eq!(u64_to_base36(46656), "1000");
}
#[test]
fn test_base36_to_u64() {
assert_eq!(base36_to_u64("0").unwrap(), 0);
assert_eq!(base36_to_u64("1").unwrap(), 1);
assert_eq!(base36_to_u64("z").unwrap(), 35);
assert_eq!(base36_to_u64("10").unwrap(), 36);
assert_eq!(base36_to_u64("100").unwrap(), 1296);
assert_eq!(base36_to_u64("1000").unwrap(), 46656);
}
#[test]
fn test_base36_to_u64_errors() {
assert!(base36_to_u64("").is_err());
assert!(base36_to_u64("Z").is_err());
assert!(base36_to_u64("!").is_err());
assert!(base36_to_u64("abc123xyz!").is_err());
}
#[test]
fn test_roundtrip_small_values() {
for i in 0..1000 {
let base36 = u64_to_base36(i);
let back = base36_to_u64(&base36).unwrap();
assert_eq!(i, back, "Roundtrip failed for {i}");
}
}
#[test]
fn test_roundtrip_edge_cases() {
let test_values = vec![
0,
1,
35,
36,
1295,
1296,
46655,
46656,
u64::MAX / 2,
u64::MAX - 1,
u64::MAX,
];
for value in test_values {
let base36 = u64_to_base36(value);
let back = base36_to_u64(&base36).unwrap();
assert_eq!(value, back, "Roundtrip failed for {value}");
}
}
#[test]
fn test_roundtrip_random_samples() {
let test_values = vec![
123456789,
987654321,
1_000_000_000,
10_000_000_000,
100_000_000_000,
1_000_000_000_000,
];
for value in test_values {
let base36 = u64_to_base36(value);
let back = base36_to_u64(&base36).unwrap();
assert_eq!(value, back, "Roundtrip failed for {value}");
}
}
#[test]
fn test_max_u64_base36() {
let max_base36 = u64_to_base36(u64::MAX);
assert_eq!(max_base36, "3w5e11264sgsf");
assert_eq!(base36_to_u64(&max_base36).unwrap(), u64::MAX);
}
#[test]
fn test_overflow_detection() {
assert!(base36_to_u64("3w5e11264sgsg").is_err());
assert!(base36_to_u64("zzzzzzzzzzzzzz").is_err());
}
#[test]
fn test_unchecked_versions() {
// Test unchecked versions match checked versions for valid inputs
for i in 0..10000 {
assert_eq!(u64_to_base36(i), u64_to_base36_unchecked(i));
let s = u64_to_base36(i);
assert_eq!(base36_to_u64(&s).unwrap(), base36_to_u64_unchecked(&s));
}
}
#[test]
fn test_u64_to_base36_specific_ranges() {
// Test value 0 (line 23)
assert_eq!(u64_to_base36(0), "0");
// Test single digit range 1..=35 (lines 24)
assert_eq!(u64_to_base36(1), "1");
assert_eq!(u64_to_base36(35), "z");
// Test two digit range 36..=1295 (lines 25-29)
assert_eq!(u64_to_base36(36), "10");
assert_eq!(u64_to_base36(1295), "zz");
assert_eq!(u64_to_base36(100), "2s");
// Test three digit range 1296..=46655 (lines 30-42)
assert_eq!(u64_to_base36(1296), "100");
assert_eq!(u64_to_base36(46655), "zzz");
assert_eq!(u64_to_base36(5000), "3uw");
// Test general case _ (lines 44-59)
assert_eq!(u64_to_base36(46656), "1000");
assert_eq!(u64_to_base36(1000000), "lfls");
assert_eq!(u64_to_base36(u64::MAX), "3w5e11264sgsf");
}
#[test]
fn test_base36_to_u64_specific_cases() {
// Test empty string error (lines 65-67)
match base36_to_u64("") {
Err(msg) => assert_eq!(msg, "Empty string"),
Ok(_) => panic!("Should have failed"),
}
// Test single digit path (lines 72-80)
assert_eq!(base36_to_u64("0").unwrap(), 0);
assert_eq!(base36_to_u64("9").unwrap(), 9);
assert_eq!(base36_to_u64("a").unwrap(), 10);
assert_eq!(base36_to_u64("z").unwrap(), 35);
// Test invalid single character (lines 74-78)
match base36_to_u64("@") {
Err(msg) => assert!(msg.contains("Invalid character")),
Ok(_) => panic!("Should have failed"),
}
// Test two digit processing (lines 87-104)
assert_eq!(base36_to_u64("10").unwrap(), 36);
assert_eq!(base36_to_u64("zz").unwrap(), 1295);
// Test invalid character in two digit (lines 91-93)
match base36_to_u64("a@") {
Err(msg) => assert_eq!(msg, "Invalid character in base36 string"),
Ok(_) => panic!("Should have failed"),
}
// Test overflow in multiplication (lines 96-102)
let overflow_str = "zzzzzzzzzzzzzzzzzzzz"; // Very long string
match base36_to_u64(overflow_str) {
Err(msg) => assert_eq!(msg, "Overflow while parsing base36 string"),
Ok(_) => panic!("Should have failed"),
}
// Test single remaining digit (lines 108-123)
assert_eq!(base36_to_u64("abc").unwrap(), 13368); // 10*36*36 + 11*36 + 12
// Test invalid remaining digit (lines 110-112)
match base36_to_u64("ab@") {
Err(msg) => assert_eq!(msg, "Invalid character in base36 string"),
Ok(_) => panic!("Should have failed"),
}
}
#[test]
fn test_base36_to_u64_unchecked_specific_cases() {
// Test 1 character case (line 139)
assert_eq!(base36_to_u64_unchecked("0"), 0);
assert_eq!(base36_to_u64_unchecked("z"), 35);
// Test 2 character case (lines 140-143)
assert_eq!(base36_to_u64_unchecked("10"), 36);
assert_eq!(base36_to_u64_unchecked("zz"), 1295);
// Test 3 character case (lines 145-149)
assert_eq!(base36_to_u64_unchecked("100"), 1296);
assert_eq!(base36_to_u64_unchecked("zzz"), 46655);
// Test general case (lines 151-156)
assert_eq!(base36_to_u64_unchecked("1000"), 46656);
assert_eq!(base36_to_u64_unchecked("abc123"), 623698779);
}
#[test]
fn test_u64_to_base36_unchecked_coverage() {
// Test that unchecked version calls the regular version (line 130)
assert_eq!(u64_to_base36_unchecked(12345), u64_to_base36(12345));
assert_eq!(u64_to_base36_unchecked(0), "0");
assert_eq!(u64_to_base36_unchecked(u64::MAX), "3w5e11264sgsf");
}
#[test]
fn test_lookup_table_bounds() {
// Test that our lookup table handles all possible byte values
for i in 0..=255 {
let val = BASE36_DECODE[i];
if i >= b'0' as usize && i <= b'9' as usize {
assert_eq!(val, (i - b'0' as usize) as u8);
} else if i >= b'a' as usize && i <= b'z' as usize {
assert_eq!(val, (i - b'a' as usize + 10) as u8);
} else {
assert_eq!(val, 255);
}
}
}
#[test]
fn test_base36_chars_array() {
// Test that BASE36_CHARS contains expected characters
assert_eq!(BASE36_CHARS[0], b'0');
assert_eq!(BASE36_CHARS[9], b'9');
assert_eq!(BASE36_CHARS[10], b'a');
assert_eq!(BASE36_CHARS[35], b'z');
assert_eq!(BASE36_CHARS.len(), 36);
}
#[test]
fn test_base36_decode_lookup_table_comprehensive() {
// Test that BASE36_DECODE lookup table is correctly initialized
// Test digits 0-9
for i in 0..10 {
let char_byte = b'0' + i;
assert_eq!(BASE36_DECODE[char_byte as usize], i);
}
// Test letters a-z
for i in 0..26 {
let char_byte = b'a' + i;
assert_eq!(BASE36_DECODE[char_byte as usize], i + 10);
}
// Test that uppercase letters are invalid (should be 255)
for i in 0..26 {
let char_byte = b'A' + i;
assert_eq!(BASE36_DECODE[char_byte as usize], 255);
}
// Test that other characters are invalid (should be 255)
assert_eq!(BASE36_DECODE[b'@' as usize], 255);
assert_eq!(BASE36_DECODE[b'[' as usize], 255);
assert_eq!(BASE36_DECODE[b'`' as usize], 255);
assert_eq!(BASE36_DECODE[b'{' as usize], 255);
assert_eq!(BASE36_DECODE[255], 255);
}
#[test]
fn test_u64_to_base36_boundary_values() {
// Test exact boundary values for the optimized ranges
assert_eq!(u64_to_base36(0), "0");
assert_eq!(u64_to_base36(1), "1");
assert_eq!(u64_to_base36(35), "z");
assert_eq!(u64_to_base36(36), "10");
assert_eq!(u64_to_base36(1295), "zz");
assert_eq!(u64_to_base36(1296), "100");
assert_eq!(u64_to_base36(46655), "zzz");
assert_eq!(u64_to_base36(46656), "1000");
}
#[test]
fn test_base36_to_u64_single_digit_edge_cases() {
// Test all valid single digits
for i in 0..36 {
let base36_char = BASE36_CHARS[i] as char;
let base36_string = base36_char.to_string();
let result = base36_to_u64(&base36_string).unwrap();
assert_eq!(result, i as u64);
}
}
#[test]
fn test_base36_to_u64_two_digit_processing() {
// Test the two-digit processing path (lines 87-104)
assert_eq!(base36_to_u64("10").unwrap(), 36);
assert_eq!(base36_to_u64("11").unwrap(), 37);
assert_eq!(base36_to_u64("zz").unwrap(), 1295);
assert_eq!(base36_to_u64("az").unwrap(), 10 * 36 + 35);
assert_eq!(base36_to_u64("za").unwrap(), 35 * 36 + 10);
}
#[test]
fn test_base36_to_u64_overflow_boundary() {
// Test values near the overflow boundary
let max_valid = u64_to_base36(u64::MAX);
assert_eq!(base36_to_u64(&max_valid).unwrap(), u64::MAX);
// Test a string that should cause overflow
let overflow_str = "zzzzzzzzzzzzzzzz"; // 16 z's - much larger than u64::MAX
match base36_to_u64(overflow_str) {
Err(msg) => assert_eq!(msg, "Overflow while parsing base36 string"),
Ok(_) => panic!("Should have overflowed"),
}
}
#[test]
fn test_base36_to_u64_invalid_characters() {
// Test various invalid characters
let invalid_cases = vec![
("A", "Invalid character 'A' in base36 string"),
("Z", "Invalid character 'Z' in base36 string"),
("@", "Invalid character '@' in base36 string"),
("[", "Invalid character '[' in base36 string"),
("`", "Invalid character '`' in base36 string"),
("{", "Invalid character '{' in base36 string"),
("!", "Invalid character '!' in base36 string"),
("a!", "Invalid character in base36 string"),
("!a", "Invalid character in base36 string"),
("a@b", "Invalid character in base36 string"),
];
for (input, expected_error) in invalid_cases {
match base36_to_u64(input) {
Err(msg) => {
if expected_error.contains("Invalid character '") {
assert!(msg.starts_with("Invalid character"));
} else {
assert_eq!(msg, expected_error);
}
}
Ok(_) => panic!("Should have failed for input: {input}"),
}
}
}
#[test]
fn test_base36_to_u64_odd_length_strings() {
// Test strings with odd lengths to ensure the remaining digit handling works
assert_eq!(base36_to_u64("1").unwrap(), 1);
assert_eq!(base36_to_u64("123").unwrap(), 36 * 36 + 2 * 36 + 3);
assert_eq!(
base36_to_u64("12345").unwrap(),
36 * 36 * 36 * 36 + 2 * 36 * 36 * 36 + 3 * 36 * 36 + 4 * 36 + 5
);
}
#[test]
fn test_base36_to_u64_unchecked_specific_lengths() {
// Test all the specific length cases in base36_to_u64_unchecked
// 1 character (line 139)
assert_eq!(base36_to_u64_unchecked("5"), 5);
assert_eq!(base36_to_u64_unchecked("z"), 35);
// 2 characters (lines 140-143)
assert_eq!(base36_to_u64_unchecked("10"), 36);
assert_eq!(base36_to_u64_unchecked("zz"), 1295);
// 3 characters (lines 145-149)
assert_eq!(base36_to_u64_unchecked("100"), 1296);
assert_eq!(base36_to_u64_unchecked("zzz"), 46655);
// 4+ characters (lines 151-156)
assert_eq!(base36_to_u64_unchecked("1000"), 46656);
assert_eq!(
base36_to_u64_unchecked("abcd"),
10 * 36 * 36 * 36 + 11 * 36 * 36 + 12 * 36 + 13
);
}
#[test]
fn test_performance_comparison() {
// Test that checked and unchecked versions produce the same results
let test_values = vec![0, 1, 35, 36, 1000, 46656, 1000000];
for value in test_values {
let base36_str = u64_to_base36(value);
assert_eq!(u64_to_base36_unchecked(value), base36_str);
assert_eq!(
base36_to_u64(&base36_str).unwrap(),
base36_to_u64_unchecked(&base36_str)
);
}
}
#[test]
fn test_edge_case_error_messages() {
// Test specific error message formatting
match base36_to_u64("@") {
Err(msg) => assert!(msg.contains("Invalid character '@' in base36 string")),
Ok(_) => panic!("Should have failed"),
}
// Test empty string error message
match base36_to_u64("") {
Err(msg) => assert_eq!(msg, "Empty string"),
Ok(_) => panic!("Should have failed"),
}
}
#[test]
fn test_large_string_handling() {
// Test handling of strings that would definitely overflow
let very_large_string = "z".repeat(20);
match base36_to_u64(&very_large_string) {
Err(msg) => assert_eq!(msg, "Overflow while parsing base36 string"),
Ok(_) => panic!("Should have overflowed"),
}
}
#[test]
fn test_buffer_usage_in_general_case() {
// Test values that use the general case buffer (> 46655)
let large_values = vec![100000, 1000000, 10000000, 100000000, 1000000000];
for value in large_values {
let base36_str = u64_to_base36(value);
let decoded = base36_to_u64(&base36_str).unwrap();
assert_eq!(decoded, value);
// Verify the string is reasonable length (should be <= 13 for u64::MAX)
assert!(base36_str.len() <= 13);
}
}
#[test]
fn test_u128_to_base36() {
assert_eq!(u128_to_base36(0), "0");
assert_eq!(u128_to_base36(1), "1");
assert_eq!(u128_to_base36(35), "z");
assert_eq!(u128_to_base36(36), "10");
assert_eq!(u128_to_base36(u64::MAX as u128), "3w5e11264sgsf");
// Test value larger than u64::MAX
let large_value: u128 = (u64::MAX as u128) * 2;
let encoded = u128_to_base36(large_value);
let decoded = base36_to_u128(&encoded).unwrap();
assert_eq!(decoded, large_value);
}
#[test]
fn test_base36_to_u128() {
assert_eq!(base36_to_u128("0").unwrap(), 0);
assert_eq!(base36_to_u128("1").unwrap(), 1);
assert_eq!(base36_to_u128("z").unwrap(), 35);
assert_eq!(base36_to_u128("10").unwrap(), 36);
assert_eq!(base36_to_u128("3w5e11264sgsf").unwrap(), u64::MAX as u128);
}
#[test]
fn test_u128_roundtrip() {
let test_values: Vec = vec![
0,
1,
35,
36,
u64::MAX as u128,
u64::MAX as u128 + 1,
u128::MAX / 2,
u128::MAX - 1,
u128::MAX,
];
for value in test_values {
let encoded = u128_to_base36(value);
let decoded = base36_to_u128(&encoded).unwrap();
assert_eq!(decoded, value, "Roundtrip failed for {value}");
}
}
#[test]
fn test_bytes_to_base36() {
// Empty and zero cases
assert_eq!(bytes_to_base36(&[]), "0");
assert_eq!(bytes_to_base36(&[0]), "0");
assert_eq!(bytes_to_base36(&[0, 0, 0]), "0");
// Single byte values
assert_eq!(bytes_to_base36(&[1]), "1");
assert_eq!(bytes_to_base36(&[35]), "z");
assert_eq!(bytes_to_base36(&[36]), "10");
assert_eq!(bytes_to_base36(&[255]), "73"); // 255 in base36
// Multi-byte values
assert_eq!(bytes_to_base36(&[1, 0]), "74"); // 256 in base36
assert_eq!(bytes_to_base36(&[0, 1, 0]), "74"); // Leading zeros ignored
// Known value: 0xDEADBEEF
assert_eq!(bytes_to_base36(&[0xDE, 0xAD, 0xBE, 0xEF]), "1ps9wxb");
}
#[test]
fn test_base36_to_bytes() {
assert_eq!(base36_to_bytes("0").unwrap(), vec![0]);
assert_eq!(base36_to_bytes("1").unwrap(), vec![1]);
assert_eq!(base36_to_bytes("z").unwrap(), vec![35]);
assert_eq!(base36_to_bytes("10").unwrap(), vec![36]);
assert_eq!(base36_to_bytes("73").unwrap(), vec![255]);
assert_eq!(base36_to_bytes("74").unwrap(), vec![1, 0]); // 256
// Known value: 0xDEADBEEF
assert_eq!(
base36_to_bytes("1ps9wxb").unwrap(),
vec![0xDE, 0xAD, 0xBE, 0xEF]
);
}
#[test]
fn test_bytes_roundtrip() {
let test_cases: Vec> = vec![
vec![0],
vec![1],
vec![255],
vec![1, 0],
vec![255, 255],
vec![0xDE, 0xAD, 0xBE, 0xEF],
vec![1, 2, 3, 4, 5, 6, 7, 8],
vec![255; 16], // 16 bytes of 0xFF
];
for bytes in test_cases {
let encoded = bytes_to_base36(&bytes);
let decoded = base36_to_bytes(&encoded).unwrap();
// Skip leading zeros in original for comparison
let start = bytes.iter().position(|&b| b != 0).unwrap_or(bytes.len() - 1);
let expected = if start == bytes.len() {
vec![0]
} else {
bytes[start..].to_vec()
};
assert_eq!(decoded, expected, "Roundtrip failed for {bytes:?}");
}
}
#[test]
fn test_base36_to_bytes_fixed() {
// Test fixed-size decoding with padding
let result: [u8; 4] = base36_to_bytes_fixed("1").unwrap();
assert_eq!(result, [0, 0, 0, 1]);
let result: [u8; 4] = base36_to_bytes_fixed("1ps9wxb").unwrap();
assert_eq!(result, [0xDE, 0xAD, 0xBE, 0xEF]);
// Test error when value is too large
let result: Result<[u8; 2], String> = base36_to_bytes_fixed("1ps9wxb");
assert!(result.is_err());
}
#[test]
fn test_uuid_v4_encode_decode() {
// UUID v4 example: 550e8400-e29b-41d4-a716-446655440000
// As bytes (big-endian): [0x55, 0x0e, 0x84, 0x00, 0xe2, 0x9b, 0x41, 0xd4,
// 0xa7, 0x16, 0x44, 0x66, 0x55, 0x44, 0x00, 0x00]
let uuid_bytes: [u8; 16] = [
0x55, 0x0e, 0x84, 0x00, 0xe2, 0x9b, 0x41, 0xd4, 0xa7, 0x16, 0x44, 0x66, 0x55, 0x44,
0x00, 0x00,
];
// Encode to base36
let encoded = bytes_to_base36(&uuid_bytes);
// Decode back to bytes
let decoded: [u8; 16] = base36_to_bytes_fixed(&encoded).unwrap();
assert_eq!(decoded, uuid_bytes);
// Also test via u128 path
let uuid_as_u128 = u128::from_be_bytes(uuid_bytes);
let encoded_u128 = u128_to_base36(uuid_as_u128);
let decoded_u128 = base36_to_u128(&encoded_u128).unwrap();
assert_eq!(decoded_u128, uuid_as_u128);
assert_eq!(decoded_u128.to_be_bytes(), uuid_bytes);
// Verify both methods produce the same encoding
assert_eq!(encoded, encoded_u128);
}
#[test]
fn test_uuid_v4_random_samples() {
// Test several UUID v4 patterns
// UUID v4 has version 4 in bits 12-15 of time_hi_and_version (byte 6)
// and variant bits 10xx in byte 8
let uuid_samples: Vec<[u8; 16]> = vec![
// Standard UUID v4 format
[
0x6b, 0xa7, 0xb8, 0x10, 0x9d, 0xad, 0x41, 0xd2, 0x80, 0xb4, 0x00, 0xc0, 0x4f, 0xd4,
0x30, 0xc8,
],
// All zeros except version/variant bits
[
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00,
],
// Near max values
[
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x4f, 0xff, 0xbf, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff,
],
];
for uuid_bytes in uuid_samples {
// Test bytes_to_base36 / base36_to_bytes_fixed roundtrip
let encoded = bytes_to_base36(&uuid_bytes);
let decoded: [u8; 16] = base36_to_bytes_fixed(&encoded).unwrap();
assert_eq!(decoded, uuid_bytes);
// Test u128_to_base36 / base36_to_u128 roundtrip
let uuid_u128 = u128::from_be_bytes(uuid_bytes);
let encoded_u128 = u128_to_base36(uuid_u128);
let decoded_u128 = base36_to_u128(&encoded_u128).unwrap();
assert_eq!(decoded_u128.to_be_bytes(), uuid_bytes);
// Verify encoding is compact (UUID should be ~25 chars max in base36)
assert!(encoded.len() <= 25);
}
}
#[test]
fn test_uuid_v4_max_value() {
// Maximum possible UUID value (all 0xFF)
let max_uuid: [u8; 16] = [0xff; 16];
let encoded = bytes_to_base36(&max_uuid);
let decoded: [u8; 16] = base36_to_bytes_fixed(&encoded).unwrap();
assert_eq!(decoded, max_uuid);
// Verify via u128
let max_u128 = u128::MAX;
assert_eq!(u128::from_be_bytes(max_uuid), max_u128);
let encoded_u128 = u128_to_base36(max_u128);
assert_eq!(encoded, encoded_u128);
}
#[test]
fn test_uuid_v4_min_nonzero() {
// Minimum non-zero UUID (just 1 in the last byte)
let min_uuid: [u8; 16] = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1];
let encoded = bytes_to_base36(&min_uuid);
assert_eq!(encoded, "1");
let decoded: [u8; 16] = base36_to_bytes_fixed(&encoded).unwrap();
assert_eq!(decoded, min_uuid);
}
}