From ca18f3e8a20ef1346068fdbb97d5115d6942ca72 Mon Sep 17 00:00:00 2001 From: Brannon King Date: Thu, 19 Aug 2021 16:40:37 -0400 Subject: [PATCH] fix bad rune handling formatting --- .github/workflows/create-release.yml | 4 ++++ claimtrie/node/noderepo/pebble.go | 18 +++++++++++++++--- claimtrie/normalization/case_folder.go | 7 +++++-- claimtrie/normalization/normalizer_icu_test.go | 14 +++++++++++--- 4 files changed, 35 insertions(+), 8 deletions(-) diff --git a/.github/workflows/create-release.yml b/.github/workflows/create-release.yml index bf1aa438..aa5aee37 100644 --- a/.github/workflows/create-release.yml +++ b/.github/workflows/create-release.yml @@ -14,6 +14,10 @@ jobs: go: [1.16] os: [linux, darwin, windows] ar: [amd64, arm64] + exclude: + - go: 1.16 + os: windows + ar: arm64 runs-on: ubuntu-latest steps: - name: Set up Go diff --git a/claimtrie/node/noderepo/pebble.go b/claimtrie/node/noderepo/pebble.go index b3c06c98..29f5e05f 100644 --- a/claimtrie/node/noderepo/pebble.go +++ b/claimtrie/node/noderepo/pebble.go @@ -103,8 +103,23 @@ func (repo *Pebble) IterateChildren(name []byte, f func(changes []change.Change) start := make([]byte, len(name)+1) // zeros that last byte; need a constant len for stack alloc? copy(start, name) + end := make([]byte, len(name)) // max name length is 255 + copy(end, name) + validEnd := false + for i := len(name) - 1; i >= 0; i-- { + end[i]++ + if end[i] != 0 { + validEnd = true + break + } + } + if !validEnd { + end = nil // uh, we think this means run to the end of the table + } + prefixIterOptions := &pebble.IterOptions{ LowerBound: start, + UpperBound: end, } iter := repo.db.NewIter(prefixIterOptions) @@ -112,9 +127,6 @@ func (repo *Pebble) IterateChildren(name []byte, f func(changes []change.Change) for iter.First(); iter.Valid(); iter.Next() { // NOTE! iter.Key() is ephemeral! - if len(iter.Key()) <= len(name) || !bytes.Equal(name, iter.Key()[:len(name)]) { - break - } changes, err := unmarshalChanges(iter.Key(), iter.Value()) if err != nil { return errors.Wrapf(err, "from unmarshaller at %s", iter.Key()) diff --git a/claimtrie/normalization/case_folder.go b/claimtrie/normalization/case_folder.go index 7931e2c8..de63b186 100644 --- a/claimtrie/normalization/case_folder.go +++ b/claimtrie/normalization/case_folder.go @@ -35,8 +35,10 @@ func init() { func CaseFold(name []byte) []byte { var b bytes.Buffer b.Grow(len(name)) - for _, r := range string(name) { - if r == utf8.RuneError { + for i := 0; i < len(name); { + r, w := utf8.DecodeRune(name[i:]) + if r == utf8.RuneError && w < 2 { + // HACK: their RuneError is actually a valid character if coming from a width of 2 or more return name } replacements := foldMap[r] @@ -47,6 +49,7 @@ func CaseFold(name []byte) []byte { } else { b.WriteRune(r) } + i += w } return b.Bytes() } diff --git a/claimtrie/normalization/normalizer_icu_test.go b/claimtrie/normalization/normalizer_icu_test.go index 70b12bad..31ddb4c2 100644 --- a/claimtrie/normalization/normalizer_icu_test.go +++ b/claimtrie/normalization/normalizer_icu_test.go @@ -3,8 +3,10 @@ package normalization import ( + "encoding/hex" "github.com/stretchr/testify/assert" "testing" + "unicode/utf8" ) func TestNormalizationICU(t *testing.T) { @@ -16,8 +18,14 @@ func BenchmarkNormalizeICU(b *testing.B) { } func TestBlock760150(t *testing.T) { - test := "Ꮖ-Ꮩ-Ꭺ-N--------Ꭺ-N-Ꮹ-Ꭼ-Ꮮ-Ꭺ-on-Instagram_-“Our-next-destination-is-East-and-Southeast-Asia--selfie--asia”" - a := normalizeGo([]byte(test)) - b := normalizeICU([]byte(test)) + test, _ := hex.DecodeString("43efbfbd") + assert.True(t, utf8.Valid(test)) + a := normalizeGo(test) + b := normalizeICU(test) + assert.Equal(t, a, b) + + test2 := "Ꮖ-Ꮩ-Ꭺ-N--------Ꭺ-N-Ꮹ-Ꭼ-Ꮮ-Ꭺ-on-Instagram_-“Our-next-destination-is-East-and-Southeast-Asia--selfie--asia”" + a = normalizeGo([]byte(test2)) + b = normalizeICU([]byte(test2)) assert.Equal(t, a, b) } \ No newline at end of file