fix bad rune handling
formatting
This commit is contained in:
parent
0be18205c8
commit
ca18f3e8a2
4 changed files with 35 additions and 8 deletions
4
.github/workflows/create-release.yml
vendored
4
.github/workflows/create-release.yml
vendored
|
@ -14,6 +14,10 @@ jobs:
|
|||
go: [1.16]
|
||||
os: [linux, darwin, windows]
|
||||
ar: [amd64, arm64]
|
||||
exclude:
|
||||
- go: 1.16
|
||||
os: windows
|
||||
ar: arm64
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Set up Go
|
||||
|
|
|
@ -103,8 +103,23 @@ func (repo *Pebble) IterateChildren(name []byte, f func(changes []change.Change)
|
|||
start := make([]byte, len(name)+1) // zeros that last byte; need a constant len for stack alloc?
|
||||
copy(start, name)
|
||||
|
||||
end := make([]byte, len(name)) // max name length is 255
|
||||
copy(end, name)
|
||||
validEnd := false
|
||||
for i := len(name) - 1; i >= 0; i-- {
|
||||
end[i]++
|
||||
if end[i] != 0 {
|
||||
validEnd = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !validEnd {
|
||||
end = nil // uh, we think this means run to the end of the table
|
||||
}
|
||||
|
||||
prefixIterOptions := &pebble.IterOptions{
|
||||
LowerBound: start,
|
||||
UpperBound: end,
|
||||
}
|
||||
|
||||
iter := repo.db.NewIter(prefixIterOptions)
|
||||
|
@ -112,9 +127,6 @@ func (repo *Pebble) IterateChildren(name []byte, f func(changes []change.Change)
|
|||
|
||||
for iter.First(); iter.Valid(); iter.Next() {
|
||||
// NOTE! iter.Key() is ephemeral!
|
||||
if len(iter.Key()) <= len(name) || !bytes.Equal(name, iter.Key()[:len(name)]) {
|
||||
break
|
||||
}
|
||||
changes, err := unmarshalChanges(iter.Key(), iter.Value())
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "from unmarshaller at %s", iter.Key())
|
||||
|
|
|
@ -35,8 +35,10 @@ func init() {
|
|||
func CaseFold(name []byte) []byte {
|
||||
var b bytes.Buffer
|
||||
b.Grow(len(name))
|
||||
for _, r := range string(name) {
|
||||
if r == utf8.RuneError {
|
||||
for i := 0; i < len(name); {
|
||||
r, w := utf8.DecodeRune(name[i:])
|
||||
if r == utf8.RuneError && w < 2 {
|
||||
// HACK: their RuneError is actually a valid character if coming from a width of 2 or more
|
||||
return name
|
||||
}
|
||||
replacements := foldMap[r]
|
||||
|
@ -47,6 +49,7 @@ func CaseFold(name []byte) []byte {
|
|||
} else {
|
||||
b.WriteRune(r)
|
||||
}
|
||||
i += w
|
||||
}
|
||||
return b.Bytes()
|
||||
}
|
||||
|
|
|
@ -3,8 +3,10 @@
|
|||
package normalization
|
||||
|
||||
import (
|
||||
"encoding/hex"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
func TestNormalizationICU(t *testing.T) {
|
||||
|
@ -16,8 +18,14 @@ func BenchmarkNormalizeICU(b *testing.B) {
|
|||
}
|
||||
|
||||
func TestBlock760150(t *testing.T) {
|
||||
test := "Ꮖ-Ꮩ-Ꭺ-N--------Ꭺ-N-Ꮹ-Ꭼ-Ꮮ-Ꭺ-on-Instagram_-“Our-next-destination-is-East-and-Southeast-Asia--selfie--asia”"
|
||||
a := normalizeGo([]byte(test))
|
||||
b := normalizeICU([]byte(test))
|
||||
test, _ := hex.DecodeString("43efbfbd")
|
||||
assert.True(t, utf8.Valid(test))
|
||||
a := normalizeGo(test)
|
||||
b := normalizeICU(test)
|
||||
assert.Equal(t, a, b)
|
||||
|
||||
test2 := "Ꮖ-Ꮩ-Ꭺ-N--------Ꭺ-N-Ꮹ-Ꭼ-Ꮮ-Ꭺ-on-Instagram_-“Our-next-destination-is-East-and-Southeast-Asia--selfie--asia”"
|
||||
a = normalizeGo([]byte(test2))
|
||||
b = normalizeICU([]byte(test2))
|
||||
assert.Equal(t, a, b)
|
||||
}
|
Loading…
Add table
Reference in a new issue