bech32: add new EncodeM and DecodeGeneric functions for bech32

In this commit, we add two new package level functions: `EncodeM`, and
`DecodeGeneric`. The new encode method is intended to allow callers to
specify that they want to use the new bech32m checksum. This should be
used when encoding segwit addresses with version 1 and beyond. The new
`DecodeGeneric` function allows a caller to decode a bech32 and bech32m
string with a single function. A new return value is added which is the
version of the returned bech32 string, which allows callers to perform
additional segwit addr validation (v1+ should use bech32m etc).

We opted to add new functions rather than modifying the existing
functions to not cause a breaking API change, as most uses in the wild
can just use the existing functions, and only taproot related logic/code
needs to worry about the new methods.

A series of tests have been added to ensure that `DecodeGeneric`
extracts the proper bech version, and we've also adopted the bech32m
tests from BIP 350.
This commit is contained in:
Olaoluwa Osuntokun 2021-07-16 17:45:01 -07:00
parent 14f90e5946
commit 23db973afa
No known key found for this signature in database
GPG key ID: 3BBD59E99B280306
2 changed files with 231 additions and 45 deletions

View file

@ -123,8 +123,11 @@ func bech32Polymod(hrp string, values, checksum []byte) int {
// and 126), otherwise the results are undefined.
//
// For more details on the checksum calculation, please refer to BIP 173.
func writeBech32Checksum(hrp string, data []byte, bldr *strings.Builder) {
polymod := bech32Polymod(hrp, data, nil) ^ 1
func writeBech32Checksum(hrp string, data []byte, bldr *strings.Builder,
version Version) {
bech32Const := int(VersionToConsts[version])
polymod := bech32Polymod(hrp, data, nil) ^ bech32Const
for i := 0; i < 6; i++ {
b := byte((polymod >> uint(5*(5-i))) & 31)
@ -137,38 +140,47 @@ func writeBech32Checksum(hrp string, data []byte, bldr *strings.Builder) {
// bech32VerifyChecksum verifies whether the bech32 string specified by the
// provided hrp and payload data (encoded as 5 bits per element byte slice) has
// the correct checksum suffix.
// the correct checksum suffix. The version of bech32 used (bech32 OG, or
// bech32m) is also returned to allow the caller to perform proper address
// validation (segwitv0 should use bech32, v1+ should use bech32m).
//
// Data MUST have more than 6 elements, otherwise this function panics.
//
// For more details on the checksum verification, please refer to BIP 173.
func bech32VerifyChecksum(hrp string, data []byte) bool {
func bech32VerifyChecksum(hrp string, data []byte) (Version, bool) {
checksum := data[len(data)-6:]
values := data[:len(data)-6]
polymod := bech32Polymod(hrp, values, checksum)
return polymod == 1
// Before BIP-350, we'd always check this against a static constant of
// 1 to know if the checksum was computed properly. As we want to
// generically support decoding for bech32m as well as bech32, we'll
// look up the returned value and compare it to the set of defined
// constants.
bech32Version, ok := ConstsToVersion[ChecksumConst(polymod)]
if ok {
return bech32Version, true
}
return VersionUnknown, false
}
// DecodeNoLimit decodes a bech32 encoded string, returning the human-readable
// part and the data part excluding the checksum. This function does NOT
// validate against the BIP-173 maximum length allowed for bech32 strings and
// is meant for use in custom applications (such as lightning network payment
// requests), NOT on-chain addresses.
//
// Note that the returned data is 5-bit (base32) encoded and the human-readable
// part will be lowercase.
func DecodeNoLimit(bech string) (string, []byte, error) {
// DecodeNoLimit is a bech32 checksum version aware arbitrary string length
// decoder. This function will return the version of the decoded checksum
// constant so higher level validation can be performed to ensure the correct
// version of bech32 was used when encoding.
func decodeNoLimit(bech string) (string, []byte, Version, error) {
// The minimum allowed size of a bech32 string is 8 characters, since it
// needs a non-empty HRP, a separator, and a 6 character checksum.
if len(bech) < 8 {
return "", nil, ErrInvalidLength(len(bech))
return "", nil, VersionUnknown, ErrInvalidLength(len(bech))
}
// Only ASCII characters between 33 and 126 are allowed.
var hasLower, hasUpper bool
for i := 0; i < len(bech); i++ {
if bech[i] < 33 || bech[i] > 126 {
return "", nil, ErrInvalidCharacter(bech[i])
return "", nil, VersionUnknown, ErrInvalidCharacter(bech[i])
}
// The characters must be either all lowercase or all uppercase. Testing
@ -176,7 +188,7 @@ func DecodeNoLimit(bech string) (string, []byte, error) {
hasLower = hasLower || (bech[i] >= 97 && bech[i] <= 122)
hasUpper = hasUpper || (bech[i] >= 65 && bech[i] <= 90)
if hasLower && hasUpper {
return "", nil, ErrMixedCase{}
return "", nil, VersionUnknown, ErrMixedCase{}
}
}
@ -191,7 +203,7 @@ func DecodeNoLimit(bech string) (string, []byte, error) {
// last 6 characters of the string (since checksum cannot contain '1').
one := strings.LastIndexByte(bech, '1')
if one < 1 || one+7 > len(bech) {
return "", nil, ErrInvalidSeparatorIndex(one)
return "", nil, VersionUnknown, ErrInvalidSeparatorIndex(one)
}
// The human-readable part is everything before the last '1'.
@ -202,12 +214,13 @@ func DecodeNoLimit(bech string) (string, []byte, error) {
// 'charset'.
decoded, err := toBytes(data)
if err != nil {
return "", nil, err
return "", nil, VersionUnknown, err
}
// Verify if the checksum (stored inside decoded[:]) is valid, given the
// previously decoded hrp.
if !bech32VerifyChecksum(hrp, decoded) {
bech32Version, ok := bech32VerifyChecksum(hrp, decoded)
if !ok {
// Invalid checksum. Calculate what it should have been, so that the
// error contains this information.
@ -215,21 +228,42 @@ func DecodeNoLimit(bech string) (string, []byte, error) {
actual := bech[len(bech)-6:]
payload := decoded[:len(decoded)-6]
// Calculate the expected checksum, given the hrp and payload data.
// Calculate the expected checksum, given the hrp and payload
// data. We'll actually compute _both_ possibly valid checksum
// to further aide in debugging.
var expectedBldr strings.Builder
expectedBldr.Grow(6)
writeBech32Checksum(hrp, payload, &expectedBldr)
expected := expectedBldr.String()
writeBech32Checksum(hrp, payload, &expectedBldr, Version0)
expectedVersion0 := expectedBldr.String()
var b strings.Builder
b.Grow(6)
writeBech32Checksum(hrp, payload, &expectedBldr, VersionM)
expectedVersionM := expectedBldr.String()
err = ErrInvalidChecksum{
Expected: expected,
Actual: actual,
Expected: expectedVersion0,
ExpectedM: expectedVersionM,
Actual: actual,
}
return "", nil, err
return "", nil, VersionUnknown, err
}
// We exclude the last 6 bytes, which is the checksum.
return hrp, decoded[:len(decoded)-6], nil
return hrp, decoded[:len(decoded)-6], bech32Version, nil
}
// DecodeNoLimit decodes a bech32 encoded string, returning the human-readable
// part and the data part excluding the checksum. This function does NOT
// validate against the BIP-173 maximum length allowed for bech32 strings and
// is meant for use in custom applications (such as lightning network payment
// requests), NOT on-chain addresses.
//
// Note that the returned data is 5-bit (base32) encoded and the human-readable
// part will be lowercase.
func DecodeNoLimit(bech string) (string, []byte, error) {
hrp, data, _, err := decodeNoLimit(bech)
return hrp, data, err
}
// Decode decodes a bech32 encoded string, returning the human-readable part and
@ -243,16 +277,31 @@ func Decode(bech string) (string, []byte, error) {
return "", nil, ErrInvalidLength(len(bech))
}
return DecodeNoLimit(bech)
hrp, data, _, err := decodeNoLimit(bech)
return hrp, data, err
}
// Encode encodes a byte slice into a bech32 string with the given
// human-readable part (HRP). The HRP will be converted to lowercase if needed
// since mixed cased encodings are not permitted and lowercase is used for
// checksum purposes. Note that the bytes must each encode 5 bits (base32).
func Encode(hrp string, data []byte) (string, error) {
// The resulting bech32 string is the concatenation of the lowercase hrp,
// the separator 1, data and the 6-byte checksum.
// DecodeGeneric is identical to the existing Decode method, but will also
// return bech32 version that matches the decoded checksum. This method should
// be used when decoding segwit addresses, as it enables additional
// verification to ensure the proper checksum is used.
func DecodeGeneric(bech string) (string, []byte, Version, error) {
// The maximum allowed length for a bech32 string is 90.
if len(bech) > 90 {
return "", nil, VersionUnknown, ErrInvalidLength(len(bech))
}
return decodeNoLimit(bech)
}
// encodeGeneric is the base bech32 encoding function that is aware of the
// existence of the checksum versions. This method is private, as the Encode
// and EncodeM methods are intended to be used instead.
func encodeGeneric(hrp string, data []byte,
version Version) (string, error) {
// The resulting bech32 string is the concatenation of the lowercase
// hrp, the separator 1, data and the 6-byte checksum.
hrp = strings.ToLower(hrp)
var bldr strings.Builder
bldr.Grow(len(hrp) + 1 + len(data) + 6)
@ -268,11 +317,26 @@ func Encode(hrp string, data []byte) (string, error) {
}
// Calculate and write the checksum of the data.
writeBech32Checksum(hrp, data, &bldr)
writeBech32Checksum(hrp, data, &bldr, version)
return bldr.String(), nil
}
// Encode encodes a byte slice into a bech32 string with the given
// human-readable part (HRP). The HRP will be converted to lowercase if needed
// since mixed cased encodings are not permitted and lowercase is used for
// checksum purposes. Note that the bytes must each encode 5 bits (base32).
func Encode(hrp string, data []byte) (string, error) {
return encodeGeneric(hrp, data, Version0)
}
// EncodeM is the exactly same as the Encode method, but it uses the new
// bech32m constant instead of the original one. It should be used whenever one
// attempts to encode a segwit address of v1 and beyond.
func EncodeM(hrp string, data []byte) (string, error) {
return encodeGeneric(hrp, data, VersionM)
}
// ConvertBits converts a byte slice where each byte is encoding fromBits bits,
// to a byte slice where each byte is encoding toBits bits.
func ConvertBits(data []byte, fromBits, toBits uint8, pad bool) ([]byte, error) {

View file

@ -22,17 +22,18 @@ func TestBech32(t *testing.T) {
expectedError error
}{
{"A12UEL5L", nil},
{"a12uel5l", nil},
{"an83characterlonghumanreadablepartthatcontainsthenumber1andtheexcludedcharactersbio1tt5tgs", nil},
{"abcdef1qpzry9x8gf2tvdw0s3jn54khce6mua7lmqqqxw", nil},
{"11qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqc8247j", nil},
{"split1checkupstagehandshakeupstreamerranterredcaperred2y9e3w", nil},
{"split1checkupstagehandshakeupstreamerranterredcaperred2y9e2w", ErrInvalidChecksum{"2y9e3w", "2y9e2w"}}, // invalid checksum
{"s lit1checkupstagehandshakeupstreamerranterredcaperredp8hs2p", ErrInvalidCharacter(' ')}, // invalid character (space) in hrp
{"spl\x7Ft1checkupstagehandshakeupstreamerranterredcaperred2y9e3w", ErrInvalidCharacter(127)}, // invalid character (DEL) in hrp
{"split1cheo2y9e2w", ErrNonCharsetChar('o')}, // invalid character (o) in data part
{"split1a2y9w", ErrInvalidSeparatorIndex(5)}, // too short data part
{"1checkupstagehandshakeupstreamerranterredcaperred2y9e3w", ErrInvalidSeparatorIndex(0)}, // empty hrp
{"11qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqsqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqc8247j", ErrInvalidLength(91)}, // too long
{"split1checkupstagehandshakeupstreamerranterredcaperred2y9e2w", ErrInvalidChecksum{"2y9e3w", "2y9e3wlc445v", "2y9e2w"}}, // invalid checksum
{"s lit1checkupstagehandshakeupstreamerranterredcaperredp8hs2p", ErrInvalidCharacter(' ')}, // invalid character (space) in hrp
{"spl\x7Ft1checkupstagehandshakeupstreamerranterredcaperred2y9e3w", ErrInvalidCharacter(127)}, // invalid character (DEL) in hrp
{"split1cheo2y9e2w", ErrNonCharsetChar('o')}, // invalid character (o) in data part
{"split1a2y9w", ErrInvalidSeparatorIndex(5)}, // too short data part
{"1checkupstagehandshakeupstreamerranterredcaperred2y9e3w", ErrInvalidSeparatorIndex(0)}, // empty hrp
{"11qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqsqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqc8247j", ErrInvalidLength(91)}, // too long
// Additional test vectors used in bitcoin core
{" 1nwldj5", ErrInvalidCharacter(' ')},
@ -44,7 +45,7 @@ func TestBech32(t *testing.T) {
{"x1b4n0q5v", ErrNonCharsetChar(98)},
{"li1dgmt3", ErrInvalidSeparatorIndex(2)},
{"de1lg7wt\xff", ErrInvalidCharacter(0xff)},
{"A1G7SGD8", ErrInvalidChecksum{"2uel5l", "g7sgd8"}},
{"A1G7SGD8", ErrInvalidChecksum{"2uel5l", "2uel5llqfn3a", "g7sgd8"}},
{"10a06t8", ErrInvalidLength(7)},
{"1qzzfhee", ErrInvalidSeparatorIndex(0)},
{"a12UEL5L", ErrMixedCase{}},
@ -86,6 +87,127 @@ func TestBech32(t *testing.T) {
}
}
// TestBech32M tests that the following set of strings, based on the test
// vectors in BIP-350 are either valid or invalid using the new bech32m
// checksum algo. Some of these strings are similar to the set of above test
// vectors, but end up with different checksums.
func TestBech32M(t *testing.T) {
tests := []struct {
str string
expectedError error
}{
{"A1LQFN3A", nil},
{"a1lqfn3a", nil},
{"an83characterlonghumanreadablepartthatcontainsthetheexcludedcharactersbioandnumber11sg7hg6", nil},
{"abcdef1l7aum6echk45nj3s0wdvt2fg8x9yrzpqzd3ryx", nil},
{"11llllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllludsr8", nil},
{"split1checkupstagehandshakeupstreamerranterredcaperredlc445v", nil},
{"?1v759aa", nil},
// Additional test vectors used in bitcoin core
{"\x201xj0phk", ErrInvalidCharacter('\x20')},
{"\x7f1g6xzxy", ErrInvalidCharacter('\x7f')},
{"\x801vctc34", ErrInvalidCharacter('\x80')},
{"an84characterslonghumanreadablepartthatcontainsthetheexcludedcharactersbioandnumber11d6pts4", ErrInvalidLength(91)},
{"qyrz8wqd2c9m", ErrInvalidSeparatorIndex(-1)},
{"1qyrz8wqd2c9m", ErrInvalidSeparatorIndex(0)},
{"y1b0jsk6g", ErrNonCharsetChar(98)},
{"lt1igcx5c0", ErrNonCharsetChar(105)},
{"in1muywd", ErrInvalidSeparatorIndex(2)},
{"mm1crxm3i", ErrNonCharsetChar(105)},
{"au1s5cgom", ErrNonCharsetChar(111)},
{"M1VUXWEZ", ErrInvalidChecksum{"mzl49c", "mzl49cw70eq6", "vuxwez"}},
{"16plkw9", ErrInvalidLength(7)},
{"1p2gdwpf", ErrInvalidSeparatorIndex(0)},
{" 1nwldj5", ErrInvalidCharacter(' ')},
{"\x7f" + "1axkwrx", ErrInvalidCharacter(0x7f)},
{"\x801eym55h", ErrInvalidCharacter(0x80)},
}
for i, test := range tests {
str := test.str
hrp, decoded, err := Decode(str)
if test.expectedError != err {
t.Errorf("%d: (%v) expected decoding error %v "+
"instead got %v", i, str, test.expectedError,
err)
continue
}
if err != nil {
// End test case here if a decoding error was expected.
continue
}
// Check that it encodes to the same string, using bech32 m.
encoded, err := EncodeM(hrp, decoded)
if err != nil {
t.Errorf("encoding failed: %v", err)
}
if encoded != strings.ToLower(str) {
t.Errorf("expected data to encode to %v, but got %v",
str, encoded)
}
// Flip a bit in the string an make sure it is caught.
pos := strings.LastIndexAny(str, "1")
flipped := str[:pos+1] + string((str[pos+1] ^ 1)) + str[pos+2:]
_, _, err = Decode(flipped)
if err == nil {
t.Error("expected decoding to fail")
}
}
}
// TestBech32DecodeGeneric tests that given a bech32 string, or a bech32m
// string, the proper checksum version is returned so that callers can perform
// segwit addr validation.
func TestBech32DecodeGeneric(t *testing.T) {
tests := []struct {
str string
version Version
}{
{"A1LQFN3A", VersionM},
{"a1lqfn3a", VersionM},
{"an83characterlonghumanreadablepartthatcontainsthetheexcludedcharactersbioandnumber11sg7hg6", VersionM},
{"abcdef1l7aum6echk45nj3s0wdvt2fg8x9yrzpqzd3ryx", VersionM},
{"11llllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllludsr8", VersionM},
{"split1checkupstagehandshakeupstreamerranterredcaperredlc445v", VersionM},
{"?1v759aa", VersionM},
{"A12UEL5L", Version0},
{"a12uel5l", Version0},
{"an83characterlonghumanreadablepartthatcontainsthenumber1andtheexcludedcharactersbio1tt5tgs", Version0},
{"abcdef1qpzry9x8gf2tvdw0s3jn54khce6mua7lmqqqxw", Version0},
{"11qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqc8247j", Version0},
{"split1checkupstagehandshakeupstreamerranterredcaperred2y9e3w", Version0},
{"BC1QW508D6QEJXTDG4Y5R3ZARVARY0C5XW7KV8F3T4", Version0},
{"tb1qrp33g0q5c5txsp9arysrx4k6zdkfs4nce4xj0gdcccefvpysxf3q0sl5k7", Version0},
{"bc1pw508d6qejxtdg4y5r3zarvary0c5xw7kw508d6qejxtdg4y5r3zarvary0c5xw7kt5nd6y", VersionM},
{"BC1SW50QGDZ25J", VersionM},
{"bc1zw508d6qejxtdg4y5r3zarvaryvaxxpcs", VersionM},
{"tb1qqqqqp399et2xygdj5xreqhjjvcmzhxw4aywxecjdzew6hylgvsesrxh6hy", Version0},
{"tb1pqqqqp399et2xygdj5xreqhjjvcmzhxw4aywxecjdzew6hylgvsesf3hn0c", VersionM},
{"bc1p0xlxvlhemja6c4dqv22uapctqupfhlxm9h8z3k2e72q4k9hcz7vqzk5jj0", VersionM},
}
for i, test := range tests {
_, _, version, err := DecodeGeneric(test.str)
if err != nil {
t.Errorf("%d: (%v) unexpected error during "+
"decoding: %v", i, test.str, err)
continue
}
if version != test.version {
t.Errorf("(%v): invalid version: expected %v, got %v",
test.str, test.version, version)
}
}
}
// TestMixedCaseEncode ensures mixed case HRPs are converted to lowercase as
// expected when encoding and that decoding the produced encoding when converted
// to all uppercase produces the lowercase HRP and original data.
@ -242,7 +364,7 @@ func TestBech32Base256(t *testing.T) {
}, {
name: "same as previous but with checksum invalidated",
encoded: "split1checkupstagehandshakeupstreamerranterredcaperred2y9e2w",
err: ErrInvalidChecksum{"2y9e3w", "2y9e2w"},
err: ErrInvalidChecksum{"2y9e3w", "2y9e3wlc445v", "2y9e2w"},
}, {
name: "hrp with invalid character (space)",
encoded: "s lit1checkupstagehandshakeupstreamerranterredcaperredp8hs2p",