fix bad rune handling

formatting
This commit is contained in:
Brannon King 2021-08-19 16:40:37 -04:00
parent 0be18205c8
commit ca18f3e8a2
4 changed files with 35 additions and 8 deletions

View file

@ -14,6 +14,10 @@ jobs:
go: [1.16] go: [1.16]
os: [linux, darwin, windows] os: [linux, darwin, windows]
ar: [amd64, arm64] ar: [amd64, arm64]
exclude:
- go: 1.16
os: windows
ar: arm64
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Set up Go - name: Set up Go

View file

@ -103,8 +103,23 @@ func (repo *Pebble) IterateChildren(name []byte, f func(changes []change.Change)
start := make([]byte, len(name)+1) // zeros that last byte; need a constant len for stack alloc? start := make([]byte, len(name)+1) // zeros that last byte; need a constant len for stack alloc?
copy(start, name) copy(start, name)
end := make([]byte, len(name)) // max name length is 255
copy(end, name)
validEnd := false
for i := len(name) - 1; i >= 0; i-- {
end[i]++
if end[i] != 0 {
validEnd = true
break
}
}
if !validEnd {
end = nil // uh, we think this means run to the end of the table
}
prefixIterOptions := &pebble.IterOptions{ prefixIterOptions := &pebble.IterOptions{
LowerBound: start, LowerBound: start,
UpperBound: end,
} }
iter := repo.db.NewIter(prefixIterOptions) iter := repo.db.NewIter(prefixIterOptions)
@ -112,9 +127,6 @@ func (repo *Pebble) IterateChildren(name []byte, f func(changes []change.Change)
for iter.First(); iter.Valid(); iter.Next() { for iter.First(); iter.Valid(); iter.Next() {
// NOTE! iter.Key() is ephemeral! // NOTE! iter.Key() is ephemeral!
if len(iter.Key()) <= len(name) || !bytes.Equal(name, iter.Key()[:len(name)]) {
break
}
changes, err := unmarshalChanges(iter.Key(), iter.Value()) changes, err := unmarshalChanges(iter.Key(), iter.Value())
if err != nil { if err != nil {
return errors.Wrapf(err, "from unmarshaller at %s", iter.Key()) return errors.Wrapf(err, "from unmarshaller at %s", iter.Key())

View file

@ -35,8 +35,10 @@ func init() {
func CaseFold(name []byte) []byte { func CaseFold(name []byte) []byte {
var b bytes.Buffer var b bytes.Buffer
b.Grow(len(name)) b.Grow(len(name))
for _, r := range string(name) { for i := 0; i < len(name); {
if r == utf8.RuneError { r, w := utf8.DecodeRune(name[i:])
if r == utf8.RuneError && w < 2 {
// HACK: their RuneError is actually a valid character if coming from a width of 2 or more
return name return name
} }
replacements := foldMap[r] replacements := foldMap[r]
@ -47,6 +49,7 @@ func CaseFold(name []byte) []byte {
} else { } else {
b.WriteRune(r) b.WriteRune(r)
} }
i += w
} }
return b.Bytes() return b.Bytes()
} }

View file

@ -3,8 +3,10 @@
package normalization package normalization
import ( import (
"encoding/hex"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"testing" "testing"
"unicode/utf8"
) )
func TestNormalizationICU(t *testing.T) { func TestNormalizationICU(t *testing.T) {
@ -16,8 +18,14 @@ func BenchmarkNormalizeICU(b *testing.B) {
} }
func TestBlock760150(t *testing.T) { func TestBlock760150(t *testing.T) {
test := "Ꮖ---N---------N-Ꮹ----on-Instagram_-“Our-next-destination-is-East-and-Southeast-Asia--selfie--asia”" test, _ := hex.DecodeString("43efbfbd")
a := normalizeGo([]byte(test)) assert.True(t, utf8.Valid(test))
b := normalizeICU([]byte(test)) a := normalizeGo(test)
b := normalizeICU(test)
assert.Equal(t, a, b)
test2 := "Ꮖ---N---------N-Ꮹ----on-Instagram_-“Our-next-destination-is-East-and-Southeast-Asia--selfie--asia”"
a = normalizeGo([]byte(test2))
b = normalizeICU([]byte(test2))
assert.Equal(t, a, b) assert.Equal(t, a, b)
} }