Decoding Base62 with Golang
Ever heard of base62 encoding? I hadn’t until recently. Generally, it’s a clever way to add more complicated strings (in my particular experience, UUIDs) to a URL. But how do we decode a base62 string in Golang?
I found an example of base62 decoding, but I needed to tweak it a bit to work for me.
The big issue was the example didn’t handle longer strings. Since it was using uint64 for calculations, a base62-encoded UUID (which has 128 bits of data) would overflow the integer and cycle back around to 0. Can’t have that! I also pulled a bit of code from Google’s UUID implementation so the conversion wouldn’t need any external dependencies.
Anyway…here’s some code to decode base62-encoded UUIDs in Golang:
package main
import (
"encoding/hex"
"fmt"
"math/big"
"strings"
)
const (
characterSet = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
)
/*
* Pulled from https://github.com/google/uuid/blob/master/uuid.go
* Done to remove vendor dependencies and simplify maintenance
*/
func encodeHex(dst []byte, uuid []byte) {
hex.Encode(dst, uuid[:4])
dst[8] = '-'
hex.Encode(dst[9:13], uuid[4:6])
dst[13] = '-'
hex.Encode(dst[14:18], uuid[6:8])
dst[18] = '-'
hex.Encode(dst[19:23], uuid[8:10])
dst[23] = '-'
hex.Encode(dst[24:], uuid[10:])
}
func uuidUnmarshal(data []byte) (string, error) {
/*
* Convert a long string of bytes into a proper UUID representation
* e.g. 7HY5DazOSGnPrmyP8juyhN becomes ef4c6827-f405-491c-a349-d1ede87549c1
*/
if len(data) != 16 {
return "", fmt.Errorf("invalid UUID (length %d bytes instead of 16)", len(data))
}
var buf [36 + 9]byte
encodeHex(buf[:], data)
// occasionally get null characters appended, make sure to remove
return strings.ReplaceAll(string(buf[:]), "\x00", ""), nil
}
/*
* End of pulled code
*/
func fromBase62(encoded string) (string, error) {
/*
* Basic logic pulled from https://medium.com/@anabhishek.jha/base-62-text-encoding-decoding-b43921c7a954
* But we actually use math/big instead of just uint64. Less likely to overflow when dealing
* with UUIDs (which have 128 bits, while uint64s have...64 bits)
*/
base := big.NewInt(62)
empty := big.NewInt(0)
var val big.Int
length := len(encoded)
for index, char := range encoded {
pow := big.NewInt(int64(length - (index + 1)))
pos := strings.IndexRune(characterSet, char)
if pos == -1 {
return "", fmt.Errorf("invalid character: %c", char)
}
var exp big.Int
var multBase big.Int
// the less obfuscated version of this line? val += pos * (base ^ pow)
val.Add(&val, multBase.Mul(big.NewInt(int64(pos)), exp.Exp(base, pow, empty)))
}
// create variables for uuid conversion here to ensure types are correct
var err error
var uuidStr string
uuidStr, err = uuidUnmarshal(val.Bytes())
if err != nil {
return "", fmt.Errorf("Could not convert base62 to UUID: %s :: %s", encoded, err)
}
return uuidStr, nil
}