Decoding Base62 with Golang

Ever heard of base62 encoding? I hadn’t until recently. Generally, it’s a clever way to add more complicated strings (in my particular experience, UUIDs) to a URL. But how do we decode a base62 string in Golang?

I found an example of base62 decoding, but I needed to tweak it a bit to work for me.

The big issue was the example didn’t handle longer strings. Since it was using uint64 for calculations, a base62-encoded UUID (which has 128 bits of data) would overflow the integer and cycle back around to 0. Can’t have that! I also pulled a bit of code from Google’s UUID implementation so the conversion wouldn’t need any external dependencies.

Anyway…here’s some code to decode base62-encoded UUIDs in Golang:

package main

import (
    "encoding/hex"
    "fmt"
    "math/big"
    "strings"
)

const ( 
    characterSet = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
)


/*
 * Pulled from https://github.com/google/uuid/blob/master/uuid.go
 * Done to remove vendor dependencies and simplify maintenance
 */
func encodeHex(dst []byte, uuid []byte) {
    hex.Encode(dst, uuid[:4])
    dst[8] = '-'
    hex.Encode(dst[9:13], uuid[4:6])
    dst[13] = '-'
    hex.Encode(dst[14:18], uuid[6:8])
    dst[18] = '-'
    hex.Encode(dst[19:23], uuid[8:10])
    dst[23] = '-'
    hex.Encode(dst[24:], uuid[10:])
}

func uuidUnmarshal(data []byte) (string, error) {
    /*
     * Convert a long string of bytes into a proper UUID representation
     * e.g. 7HY5DazOSGnPrmyP8juyhN becomes ef4c6827-f405-491c-a349-d1ede87549c1
     */

    if len(data) != 16 {
        return "", fmt.Errorf("invalid UUID (length %d bytes instead of 16)", len(data))
    }
    var buf [36 + 9]byte
    encodeHex(buf[:], data)
    // occasionally get null characters appended, make sure to remove
    return strings.ReplaceAll(string(buf[:]), "\x00", ""), nil
}

/*
 * End of pulled code
 */

func fromBase62(encoded string) (string, error) {
    /*
     * Basic logic pulled from https://medium.com/@anabhishek.jha/base-62-text-encoding-decoding-b43921c7a954
     * But we actually use math/big instead of just uint64. Less likely to overflow when dealing
     * with UUIDs (which have 128 bits, while uint64s have...64 bits)
     */
    base := big.NewInt(62)
    empty := big.NewInt(0)
    var val big.Int
    length := len(encoded)
    for index, char := range encoded {
        pow := big.NewInt(int64(length - (index + 1)))
        pos := strings.IndexRune(characterSet, char)
        if pos == -1 {
            return "", fmt.Errorf("invalid character: %c", char)
        }
        var exp big.Int
        var multBase big.Int
        // the less obfuscated version of this line? val += pos * (base ^ pow)
        val.Add(&val, multBase.Mul(big.NewInt(int64(pos)), exp.Exp(base, pow, empty)))
    }
    // create variables for uuid conversion here to ensure types are correct
    var err error
    var uuidStr string
    uuidStr, err = uuidUnmarshal(val.Bytes())
    if err != nil {
        return "", fmt.Errorf("Could not convert base62 to UUID: %s :: %s", encoded, err)
    }
    return uuidStr, nil
}