From 3ecfc35771d4d0a18dfa7ca552f82bcb7d7cce83 Mon Sep 17 00:00:00 2001 From: Olaoluwa Osuntokun Date: Fri, 16 Jul 2021 17:37:30 -0700 Subject: [PATCH 1/3] bech32: add new set of constants/versions to be used for bech32m --- bech32/version.go | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 bech32/version.go diff --git a/bech32/version.go b/bech32/version.go new file mode 100644 index 0000000..147037d --- /dev/null +++ b/bech32/version.go @@ -0,0 +1,43 @@ +package bech32 + +// ChecksumConst is a type that represents the currently defined bech32 +// checksum constants. +type ChecksumConst int + +const ( + // Version0Const is the original constant used in the checksum + // verification for bech32. + Version0Const ChecksumConst = 1 + + // VersionMConst is the new constant used for bech32m checksum + // verification. + VersionMConst ChecksumConst = 0x2bc830a3 +) + +// Version defines the current set of bech32 versions. +type Version uint8 + +const ( + // Version0 defines the original bech version. + Version0 Version = iota + + // VersionM is the new bech32 version defined in BIP-350, also known as + // bech32m. + VersionM + + // VersionUnknown denotes an unknown bech version. + VersionUnknown +) + +// VersionToConsts maps bech32 versions to the checksum constant to be used +// when encoding, and asserting a particular version when decoding. +var VersionToConsts = map[Version]ChecksumConst{ + Version0: Version0Const, + VersionM: VersionMConst, +} + +// ConstsToVersion maps a bech32 constant to the version it's associated with. +var ConstsToVersion = map[ChecksumConst]Version{ + Version0Const: Version0, + VersionMConst: VersionM, +} From 14f90e5946920a5034ed728a88de0553e984240c Mon Sep 17 00:00:00 2001 From: Olaoluwa Osuntokun Date: Fri, 16 Jul 2021 17:41:55 -0700 Subject: [PATCH 2/3] bech32: add additional field to ErrInvalidChecksum (bech32m version) In this commit, we add an additional field to the ErrInvalidChecksum, the bech32m version of a checksum. When decoding, we don't now what version they actually _intended_ to use, so we'll opt to include both checksums to aide in debugging and error reporting. --- bech32/error.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/bech32/error.go b/bech32/error.go index c987b6e..e8b1fe8 100644 --- a/bech32/error.go +++ b/bech32/error.go @@ -65,15 +65,17 @@ func (e ErrNonCharsetChar) Error() string { } // ErrInvalidChecksum is returned when the extracted checksum of the string -// is different than what was expected. +// is different than what was expected. Both the original version, as well as +// the new bech32m checksum may be specified. type ErrInvalidChecksum struct { - Expected string - Actual string + Expected string + ExpectedM string + Actual string } func (e ErrInvalidChecksum) Error() string { - return fmt.Sprintf("invalid checksum (expected %v got %v)", - e.Expected, e.Actual) + return fmt.Sprintf("invalid checksum (expected (bech32=%v, "+ + "bech32m=%v), got %v)", e.Expected, e.ExpectedM, e.Actual) } // ErrInvalidDataByte is returned when a byte outside the range required for From 23db973afaec68562e1f063cb663f6d27183d674 Mon Sep 17 00:00:00 2001 From: Olaoluwa Osuntokun Date: Fri, 16 Jul 2021 17:45:01 -0700 Subject: [PATCH 3/3] bech32: add new EncodeM and DecodeGeneric functions for bech32 In this commit, we add two new package level functions: `EncodeM`, and `DecodeGeneric`. The new encode method is intended to allow callers to specify that they want to use the new bech32m checksum. This should be used when encoding segwit addresses with version 1 and beyond. The new `DecodeGeneric` function allows a caller to decode a bech32 and bech32m string with a single function. A new return value is added which is the version of the returned bech32 string, which allows callers to perform additional segwit addr validation (v1+ should use bech32m etc). We opted to add new functions rather than modifying the existing functions to not cause a breaking API change, as most uses in the wild can just use the existing functions, and only taproot related logic/code needs to worry about the new methods. A series of tests have been added to ensure that `DecodeGeneric` extracts the proper bech version, and we've also adopted the bech32m tests from BIP 350. --- bech32/bech32.go | 136 +++++++++++++++++++++++++++++----------- bech32/bech32_test.go | 140 +++++++++++++++++++++++++++++++++++++++--- 2 files changed, 231 insertions(+), 45 deletions(-) diff --git a/bech32/bech32.go b/bech32/bech32.go index fc2c063..1bc75a3 100644 --- a/bech32/bech32.go +++ b/bech32/bech32.go @@ -123,8 +123,11 @@ func bech32Polymod(hrp string, values, checksum []byte) int { // and 126), otherwise the results are undefined. // // For more details on the checksum calculation, please refer to BIP 173. -func writeBech32Checksum(hrp string, data []byte, bldr *strings.Builder) { - polymod := bech32Polymod(hrp, data, nil) ^ 1 +func writeBech32Checksum(hrp string, data []byte, bldr *strings.Builder, + version Version) { + + bech32Const := int(VersionToConsts[version]) + polymod := bech32Polymod(hrp, data, nil) ^ bech32Const for i := 0; i < 6; i++ { b := byte((polymod >> uint(5*(5-i))) & 31) @@ -137,38 +140,47 @@ func writeBech32Checksum(hrp string, data []byte, bldr *strings.Builder) { // bech32VerifyChecksum verifies whether the bech32 string specified by the // provided hrp and payload data (encoded as 5 bits per element byte slice) has -// the correct checksum suffix. +// the correct checksum suffix. The version of bech32 used (bech32 OG, or +// bech32m) is also returned to allow the caller to perform proper address +// validation (segwitv0 should use bech32, v1+ should use bech32m). // // Data MUST have more than 6 elements, otherwise this function panics. // // For more details on the checksum verification, please refer to BIP 173. -func bech32VerifyChecksum(hrp string, data []byte) bool { +func bech32VerifyChecksum(hrp string, data []byte) (Version, bool) { checksum := data[len(data)-6:] values := data[:len(data)-6] polymod := bech32Polymod(hrp, values, checksum) - return polymod == 1 + + // Before BIP-350, we'd always check this against a static constant of + // 1 to know if the checksum was computed properly. As we want to + // generically support decoding for bech32m as well as bech32, we'll + // look up the returned value and compare it to the set of defined + // constants. + bech32Version, ok := ConstsToVersion[ChecksumConst(polymod)] + if ok { + return bech32Version, true + } + + return VersionUnknown, false } -// DecodeNoLimit decodes a bech32 encoded string, returning the human-readable -// part and the data part excluding the checksum. This function does NOT -// validate against the BIP-173 maximum length allowed for bech32 strings and -// is meant for use in custom applications (such as lightning network payment -// requests), NOT on-chain addresses. -// -// Note that the returned data is 5-bit (base32) encoded and the human-readable -// part will be lowercase. -func DecodeNoLimit(bech string) (string, []byte, error) { +// DecodeNoLimit is a bech32 checksum version aware arbitrary string length +// decoder. This function will return the version of the decoded checksum +// constant so higher level validation can be performed to ensure the correct +// version of bech32 was used when encoding. +func decodeNoLimit(bech string) (string, []byte, Version, error) { // The minimum allowed size of a bech32 string is 8 characters, since it // needs a non-empty HRP, a separator, and a 6 character checksum. if len(bech) < 8 { - return "", nil, ErrInvalidLength(len(bech)) + return "", nil, VersionUnknown, ErrInvalidLength(len(bech)) } // Only ASCII characters between 33 and 126 are allowed. var hasLower, hasUpper bool for i := 0; i < len(bech); i++ { if bech[i] < 33 || bech[i] > 126 { - return "", nil, ErrInvalidCharacter(bech[i]) + return "", nil, VersionUnknown, ErrInvalidCharacter(bech[i]) } // The characters must be either all lowercase or all uppercase. Testing @@ -176,7 +188,7 @@ func DecodeNoLimit(bech string) (string, []byte, error) { hasLower = hasLower || (bech[i] >= 97 && bech[i] <= 122) hasUpper = hasUpper || (bech[i] >= 65 && bech[i] <= 90) if hasLower && hasUpper { - return "", nil, ErrMixedCase{} + return "", nil, VersionUnknown, ErrMixedCase{} } } @@ -191,7 +203,7 @@ func DecodeNoLimit(bech string) (string, []byte, error) { // last 6 characters of the string (since checksum cannot contain '1'). one := strings.LastIndexByte(bech, '1') if one < 1 || one+7 > len(bech) { - return "", nil, ErrInvalidSeparatorIndex(one) + return "", nil, VersionUnknown, ErrInvalidSeparatorIndex(one) } // The human-readable part is everything before the last '1'. @@ -202,12 +214,13 @@ func DecodeNoLimit(bech string) (string, []byte, error) { // 'charset'. decoded, err := toBytes(data) if err != nil { - return "", nil, err + return "", nil, VersionUnknown, err } // Verify if the checksum (stored inside decoded[:]) is valid, given the // previously decoded hrp. - if !bech32VerifyChecksum(hrp, decoded) { + bech32Version, ok := bech32VerifyChecksum(hrp, decoded) + if !ok { // Invalid checksum. Calculate what it should have been, so that the // error contains this information. @@ -215,21 +228,42 @@ func DecodeNoLimit(bech string) (string, []byte, error) { actual := bech[len(bech)-6:] payload := decoded[:len(decoded)-6] - // Calculate the expected checksum, given the hrp and payload data. + // Calculate the expected checksum, given the hrp and payload + // data. We'll actually compute _both_ possibly valid checksum + // to further aide in debugging. var expectedBldr strings.Builder expectedBldr.Grow(6) - writeBech32Checksum(hrp, payload, &expectedBldr) - expected := expectedBldr.String() + writeBech32Checksum(hrp, payload, &expectedBldr, Version0) + expectedVersion0 := expectedBldr.String() + + var b strings.Builder + b.Grow(6) + writeBech32Checksum(hrp, payload, &expectedBldr, VersionM) + expectedVersionM := expectedBldr.String() err = ErrInvalidChecksum{ - Expected: expected, - Actual: actual, + Expected: expectedVersion0, + ExpectedM: expectedVersionM, + Actual: actual, } - return "", nil, err + return "", nil, VersionUnknown, err } // We exclude the last 6 bytes, which is the checksum. - return hrp, decoded[:len(decoded)-6], nil + return hrp, decoded[:len(decoded)-6], bech32Version, nil +} + +// DecodeNoLimit decodes a bech32 encoded string, returning the human-readable +// part and the data part excluding the checksum. This function does NOT +// validate against the BIP-173 maximum length allowed for bech32 strings and +// is meant for use in custom applications (such as lightning network payment +// requests), NOT on-chain addresses. +// +// Note that the returned data is 5-bit (base32) encoded and the human-readable +// part will be lowercase. +func DecodeNoLimit(bech string) (string, []byte, error) { + hrp, data, _, err := decodeNoLimit(bech) + return hrp, data, err } // Decode decodes a bech32 encoded string, returning the human-readable part and @@ -243,16 +277,31 @@ func Decode(bech string) (string, []byte, error) { return "", nil, ErrInvalidLength(len(bech)) } - return DecodeNoLimit(bech) + hrp, data, _, err := decodeNoLimit(bech) + return hrp, data, err } -// Encode encodes a byte slice into a bech32 string with the given -// human-readable part (HRP). The HRP will be converted to lowercase if needed -// since mixed cased encodings are not permitted and lowercase is used for -// checksum purposes. Note that the bytes must each encode 5 bits (base32). -func Encode(hrp string, data []byte) (string, error) { - // The resulting bech32 string is the concatenation of the lowercase hrp, - // the separator 1, data and the 6-byte checksum. +// DecodeGeneric is identical to the existing Decode method, but will also +// return bech32 version that matches the decoded checksum. This method should +// be used when decoding segwit addresses, as it enables additional +// verification to ensure the proper checksum is used. +func DecodeGeneric(bech string) (string, []byte, Version, error) { + // The maximum allowed length for a bech32 string is 90. + if len(bech) > 90 { + return "", nil, VersionUnknown, ErrInvalidLength(len(bech)) + } + + return decodeNoLimit(bech) +} + +// encodeGeneric is the base bech32 encoding function that is aware of the +// existence of the checksum versions. This method is private, as the Encode +// and EncodeM methods are intended to be used instead. +func encodeGeneric(hrp string, data []byte, + version Version) (string, error) { + + // The resulting bech32 string is the concatenation of the lowercase + // hrp, the separator 1, data and the 6-byte checksum. hrp = strings.ToLower(hrp) var bldr strings.Builder bldr.Grow(len(hrp) + 1 + len(data) + 6) @@ -268,11 +317,26 @@ func Encode(hrp string, data []byte) (string, error) { } // Calculate and write the checksum of the data. - writeBech32Checksum(hrp, data, &bldr) + writeBech32Checksum(hrp, data, &bldr, version) return bldr.String(), nil } +// Encode encodes a byte slice into a bech32 string with the given +// human-readable part (HRP). The HRP will be converted to lowercase if needed +// since mixed cased encodings are not permitted and lowercase is used for +// checksum purposes. Note that the bytes must each encode 5 bits (base32). +func Encode(hrp string, data []byte) (string, error) { + return encodeGeneric(hrp, data, Version0) +} + +// EncodeM is the exactly same as the Encode method, but it uses the new +// bech32m constant instead of the original one. It should be used whenever one +// attempts to encode a segwit address of v1 and beyond. +func EncodeM(hrp string, data []byte) (string, error) { + return encodeGeneric(hrp, data, VersionM) +} + // ConvertBits converts a byte slice where each byte is encoding fromBits bits, // to a byte slice where each byte is encoding toBits bits. func ConvertBits(data []byte, fromBits, toBits uint8, pad bool) ([]byte, error) { diff --git a/bech32/bech32_test.go b/bech32/bech32_test.go index 9da0588..1e04905 100644 --- a/bech32/bech32_test.go +++ b/bech32/bech32_test.go @@ -22,17 +22,18 @@ func TestBech32(t *testing.T) { expectedError error }{ {"A12UEL5L", nil}, + {"a12uel5l", nil}, {"an83characterlonghumanreadablepartthatcontainsthenumber1andtheexcludedcharactersbio1tt5tgs", nil}, {"abcdef1qpzry9x8gf2tvdw0s3jn54khce6mua7lmqqqxw", nil}, {"11qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqc8247j", nil}, {"split1checkupstagehandshakeupstreamerranterredcaperred2y9e3w", nil}, - {"split1checkupstagehandshakeupstreamerranterredcaperred2y9e2w", ErrInvalidChecksum{"2y9e3w", "2y9e2w"}}, // invalid checksum - {"s lit1checkupstagehandshakeupstreamerranterredcaperredp8hs2p", ErrInvalidCharacter(' ')}, // invalid character (space) in hrp - {"spl\x7Ft1checkupstagehandshakeupstreamerranterredcaperred2y9e3w", ErrInvalidCharacter(127)}, // invalid character (DEL) in hrp - {"split1cheo2y9e2w", ErrNonCharsetChar('o')}, // invalid character (o) in data part - {"split1a2y9w", ErrInvalidSeparatorIndex(5)}, // too short data part - {"1checkupstagehandshakeupstreamerranterredcaperred2y9e3w", ErrInvalidSeparatorIndex(0)}, // empty hrp - {"11qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqsqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqc8247j", ErrInvalidLength(91)}, // too long + {"split1checkupstagehandshakeupstreamerranterredcaperred2y9e2w", ErrInvalidChecksum{"2y9e3w", "2y9e3wlc445v", "2y9e2w"}}, // invalid checksum + {"s lit1checkupstagehandshakeupstreamerranterredcaperredp8hs2p", ErrInvalidCharacter(' ')}, // invalid character (space) in hrp + {"spl\x7Ft1checkupstagehandshakeupstreamerranterredcaperred2y9e3w", ErrInvalidCharacter(127)}, // invalid character (DEL) in hrp + {"split1cheo2y9e2w", ErrNonCharsetChar('o')}, // invalid character (o) in data part + {"split1a2y9w", ErrInvalidSeparatorIndex(5)}, // too short data part + {"1checkupstagehandshakeupstreamerranterredcaperred2y9e3w", ErrInvalidSeparatorIndex(0)}, // empty hrp + {"11qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqsqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqc8247j", ErrInvalidLength(91)}, // too long // Additional test vectors used in bitcoin core {" 1nwldj5", ErrInvalidCharacter(' ')}, @@ -44,7 +45,7 @@ func TestBech32(t *testing.T) { {"x1b4n0q5v", ErrNonCharsetChar(98)}, {"li1dgmt3", ErrInvalidSeparatorIndex(2)}, {"de1lg7wt\xff", ErrInvalidCharacter(0xff)}, - {"A1G7SGD8", ErrInvalidChecksum{"2uel5l", "g7sgd8"}}, + {"A1G7SGD8", ErrInvalidChecksum{"2uel5l", "2uel5llqfn3a", "g7sgd8"}}, {"10a06t8", ErrInvalidLength(7)}, {"1qzzfhee", ErrInvalidSeparatorIndex(0)}, {"a12UEL5L", ErrMixedCase{}}, @@ -86,6 +87,127 @@ func TestBech32(t *testing.T) { } } +// TestBech32M tests that the following set of strings, based on the test +// vectors in BIP-350 are either valid or invalid using the new bech32m +// checksum algo. Some of these strings are similar to the set of above test +// vectors, but end up with different checksums. +func TestBech32M(t *testing.T) { + tests := []struct { + str string + expectedError error + }{ + {"A1LQFN3A", nil}, + {"a1lqfn3a", nil}, + {"an83characterlonghumanreadablepartthatcontainsthetheexcludedcharactersbioandnumber11sg7hg6", nil}, + {"abcdef1l7aum6echk45nj3s0wdvt2fg8x9yrzpqzd3ryx", nil}, + {"11llllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllludsr8", nil}, + {"split1checkupstagehandshakeupstreamerranterredcaperredlc445v", nil}, + {"?1v759aa", nil}, + + // Additional test vectors used in bitcoin core + {"\x201xj0phk", ErrInvalidCharacter('\x20')}, + {"\x7f1g6xzxy", ErrInvalidCharacter('\x7f')}, + {"\x801vctc34", ErrInvalidCharacter('\x80')}, + {"an84characterslonghumanreadablepartthatcontainsthetheexcludedcharactersbioandnumber11d6pts4", ErrInvalidLength(91)}, + {"qyrz8wqd2c9m", ErrInvalidSeparatorIndex(-1)}, + {"1qyrz8wqd2c9m", ErrInvalidSeparatorIndex(0)}, + {"y1b0jsk6g", ErrNonCharsetChar(98)}, + {"lt1igcx5c0", ErrNonCharsetChar(105)}, + {"in1muywd", ErrInvalidSeparatorIndex(2)}, + {"mm1crxm3i", ErrNonCharsetChar(105)}, + {"au1s5cgom", ErrNonCharsetChar(111)}, + {"M1VUXWEZ", ErrInvalidChecksum{"mzl49c", "mzl49cw70eq6", "vuxwez"}}, + {"16plkw9", ErrInvalidLength(7)}, + {"1p2gdwpf", ErrInvalidSeparatorIndex(0)}, + + {" 1nwldj5", ErrInvalidCharacter(' ')}, + {"\x7f" + "1axkwrx", ErrInvalidCharacter(0x7f)}, + {"\x801eym55h", ErrInvalidCharacter(0x80)}, + } + + for i, test := range tests { + str := test.str + hrp, decoded, err := Decode(str) + if test.expectedError != err { + t.Errorf("%d: (%v) expected decoding error %v "+ + "instead got %v", i, str, test.expectedError, + err) + continue + } + + if err != nil { + // End test case here if a decoding error was expected. + continue + } + + // Check that it encodes to the same string, using bech32 m. + encoded, err := EncodeM(hrp, decoded) + if err != nil { + t.Errorf("encoding failed: %v", err) + } + + if encoded != strings.ToLower(str) { + t.Errorf("expected data to encode to %v, but got %v", + str, encoded) + } + + // Flip a bit in the string an make sure it is caught. + pos := strings.LastIndexAny(str, "1") + flipped := str[:pos+1] + string((str[pos+1] ^ 1)) + str[pos+2:] + _, _, err = Decode(flipped) + if err == nil { + t.Error("expected decoding to fail") + } + } +} + +// TestBech32DecodeGeneric tests that given a bech32 string, or a bech32m +// string, the proper checksum version is returned so that callers can perform +// segwit addr validation. +func TestBech32DecodeGeneric(t *testing.T) { + tests := []struct { + str string + version Version + }{ + {"A1LQFN3A", VersionM}, + {"a1lqfn3a", VersionM}, + {"an83characterlonghumanreadablepartthatcontainsthetheexcludedcharactersbioandnumber11sg7hg6", VersionM}, + {"abcdef1l7aum6echk45nj3s0wdvt2fg8x9yrzpqzd3ryx", VersionM}, + {"11llllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllludsr8", VersionM}, + {"split1checkupstagehandshakeupstreamerranterredcaperredlc445v", VersionM}, + {"?1v759aa", VersionM}, + + {"A12UEL5L", Version0}, + {"a12uel5l", Version0}, + {"an83characterlonghumanreadablepartthatcontainsthenumber1andtheexcludedcharactersbio1tt5tgs", Version0}, + {"abcdef1qpzry9x8gf2tvdw0s3jn54khce6mua7lmqqqxw", Version0}, + {"11qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqc8247j", Version0}, + {"split1checkupstagehandshakeupstreamerranterredcaperred2y9e3w", Version0}, + + {"BC1QW508D6QEJXTDG4Y5R3ZARVARY0C5XW7KV8F3T4", Version0}, + {"tb1qrp33g0q5c5txsp9arysrx4k6zdkfs4nce4xj0gdcccefvpysxf3q0sl5k7", Version0}, + {"bc1pw508d6qejxtdg4y5r3zarvary0c5xw7kw508d6qejxtdg4y5r3zarvary0c5xw7kt5nd6y", VersionM}, + {"BC1SW50QGDZ25J", VersionM}, + {"bc1zw508d6qejxtdg4y5r3zarvaryvaxxpcs", VersionM}, + {"tb1qqqqqp399et2xygdj5xreqhjjvcmzhxw4aywxecjdzew6hylgvsesrxh6hy", Version0}, + {"tb1pqqqqp399et2xygdj5xreqhjjvcmzhxw4aywxecjdzew6hylgvsesf3hn0c", VersionM}, + {"bc1p0xlxvlhemja6c4dqv22uapctqupfhlxm9h8z3k2e72q4k9hcz7vqzk5jj0", VersionM}, + } + for i, test := range tests { + _, _, version, err := DecodeGeneric(test.str) + if err != nil { + t.Errorf("%d: (%v) unexpected error during "+ + "decoding: %v", i, test.str, err) + continue + } + + if version != test.version { + t.Errorf("(%v): invalid version: expected %v, got %v", + test.str, test.version, version) + } + } +} + // TestMixedCaseEncode ensures mixed case HRPs are converted to lowercase as // expected when encoding and that decoding the produced encoding when converted // to all uppercase produces the lowercase HRP and original data. @@ -242,7 +364,7 @@ func TestBech32Base256(t *testing.T) { }, { name: "same as previous but with checksum invalidated", encoded: "split1checkupstagehandshakeupstreamerranterredcaperred2y9e2w", - err: ErrInvalidChecksum{"2y9e3w", "2y9e2w"}, + err: ErrInvalidChecksum{"2y9e3w", "2y9e3wlc445v", "2y9e2w"}, }, { name: "hrp with invalid character (space)", encoded: "s lit1checkupstagehandshakeupstreamerranterredcaperredp8hs2p",