fix bad rune handling
formatting
This commit is contained in:
parent
0be18205c8
commit
ca18f3e8a2
4 changed files with 35 additions and 8 deletions
4
.github/workflows/create-release.yml
vendored
4
.github/workflows/create-release.yml
vendored
|
@ -14,6 +14,10 @@ jobs:
|
||||||
go: [1.16]
|
go: [1.16]
|
||||||
os: [linux, darwin, windows]
|
os: [linux, darwin, windows]
|
||||||
ar: [amd64, arm64]
|
ar: [amd64, arm64]
|
||||||
|
exclude:
|
||||||
|
- go: 1.16
|
||||||
|
os: windows
|
||||||
|
ar: arm64
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Set up Go
|
- name: Set up Go
|
||||||
|
|
|
@ -103,8 +103,23 @@ func (repo *Pebble) IterateChildren(name []byte, f func(changes []change.Change)
|
||||||
start := make([]byte, len(name)+1) // zeros that last byte; need a constant len for stack alloc?
|
start := make([]byte, len(name)+1) // zeros that last byte; need a constant len for stack alloc?
|
||||||
copy(start, name)
|
copy(start, name)
|
||||||
|
|
||||||
|
end := make([]byte, len(name)) // max name length is 255
|
||||||
|
copy(end, name)
|
||||||
|
validEnd := false
|
||||||
|
for i := len(name) - 1; i >= 0; i-- {
|
||||||
|
end[i]++
|
||||||
|
if end[i] != 0 {
|
||||||
|
validEnd = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !validEnd {
|
||||||
|
end = nil // uh, we think this means run to the end of the table
|
||||||
|
}
|
||||||
|
|
||||||
prefixIterOptions := &pebble.IterOptions{
|
prefixIterOptions := &pebble.IterOptions{
|
||||||
LowerBound: start,
|
LowerBound: start,
|
||||||
|
UpperBound: end,
|
||||||
}
|
}
|
||||||
|
|
||||||
iter := repo.db.NewIter(prefixIterOptions)
|
iter := repo.db.NewIter(prefixIterOptions)
|
||||||
|
@ -112,9 +127,6 @@ func (repo *Pebble) IterateChildren(name []byte, f func(changes []change.Change)
|
||||||
|
|
||||||
for iter.First(); iter.Valid(); iter.Next() {
|
for iter.First(); iter.Valid(); iter.Next() {
|
||||||
// NOTE! iter.Key() is ephemeral!
|
// NOTE! iter.Key() is ephemeral!
|
||||||
if len(iter.Key()) <= len(name) || !bytes.Equal(name, iter.Key()[:len(name)]) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
changes, err := unmarshalChanges(iter.Key(), iter.Value())
|
changes, err := unmarshalChanges(iter.Key(), iter.Value())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return errors.Wrapf(err, "from unmarshaller at %s", iter.Key())
|
return errors.Wrapf(err, "from unmarshaller at %s", iter.Key())
|
||||||
|
|
|
@ -35,8 +35,10 @@ func init() {
|
||||||
func CaseFold(name []byte) []byte {
|
func CaseFold(name []byte) []byte {
|
||||||
var b bytes.Buffer
|
var b bytes.Buffer
|
||||||
b.Grow(len(name))
|
b.Grow(len(name))
|
||||||
for _, r := range string(name) {
|
for i := 0; i < len(name); {
|
||||||
if r == utf8.RuneError {
|
r, w := utf8.DecodeRune(name[i:])
|
||||||
|
if r == utf8.RuneError && w < 2 {
|
||||||
|
// HACK: their RuneError is actually a valid character if coming from a width of 2 or more
|
||||||
return name
|
return name
|
||||||
}
|
}
|
||||||
replacements := foldMap[r]
|
replacements := foldMap[r]
|
||||||
|
@ -47,6 +49,7 @@ func CaseFold(name []byte) []byte {
|
||||||
} else {
|
} else {
|
||||||
b.WriteRune(r)
|
b.WriteRune(r)
|
||||||
}
|
}
|
||||||
|
i += w
|
||||||
}
|
}
|
||||||
return b.Bytes()
|
return b.Bytes()
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,8 +3,10 @@
|
||||||
package normalization
|
package normalization
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"encoding/hex"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"testing"
|
"testing"
|
||||||
|
"unicode/utf8"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestNormalizationICU(t *testing.T) {
|
func TestNormalizationICU(t *testing.T) {
|
||||||
|
@ -16,8 +18,14 @@ func BenchmarkNormalizeICU(b *testing.B) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestBlock760150(t *testing.T) {
|
func TestBlock760150(t *testing.T) {
|
||||||
test := "Ꮖ-Ꮩ-Ꭺ-N--------Ꭺ-N-Ꮹ-Ꭼ-Ꮮ-Ꭺ-on-Instagram_-“Our-next-destination-is-East-and-Southeast-Asia--selfie--asia”"
|
test, _ := hex.DecodeString("43efbfbd")
|
||||||
a := normalizeGo([]byte(test))
|
assert.True(t, utf8.Valid(test))
|
||||||
b := normalizeICU([]byte(test))
|
a := normalizeGo(test)
|
||||||
|
b := normalizeICU(test)
|
||||||
|
assert.Equal(t, a, b)
|
||||||
|
|
||||||
|
test2 := "Ꮖ-Ꮩ-Ꭺ-N--------Ꭺ-N-Ꮹ-Ꭼ-Ꮮ-Ꭺ-on-Instagram_-“Our-next-destination-is-East-and-Southeast-Asia--selfie--asia”"
|
||||||
|
a = normalizeGo([]byte(test2))
|
||||||
|
b = normalizeICU([]byte(test2))
|
||||||
assert.Equal(t, a, b)
|
assert.Equal(t, a, b)
|
||||||
}
|
}
|
Loading…
Add table
Reference in a new issue