lbcd/claimtrie/normalization/normalizer_icu_test.go
Roy Lee 753f413c13 [lbry] claimtrie: import current snapshot
Sync to tip

Co-authored-by: Brannon King <countprimes@gmail.com>
2021-12-14 14:00:59 -08:00

65 lines
1.8 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//go:build use_icu_normalization
// +build use_icu_normalization
package normalization
import (
"encoding/hex"
"testing"
"unicode/utf8"
"github.com/stretchr/testify/assert"
)
func TestNormalizationICU(t *testing.T) {
testNormalization(t, normalizeICU)
}
func BenchmarkNormalizeICU(b *testing.B) {
benchmarkNormalize(b, normalizeICU)
}
var testStrings = []string{
"Les-Masques-Blancs-Die-Dead-place-Sathonay-28-Août",
"Bez-komentu-výbuch-z-vnútra,-radšej-pozri-video...-",
"၂-နစ်အကြာမှာ",
"ငရဲပြည်မှ-6",
"@happyvision",
"ကမ္ဘာပျက်ကိန်း-9",
"ဝိညာဉ်နား၊-3",
"un-amore-nuovo-o-un-ritorno-cosa-mi-dona",
"è-innamorato-di-me-anche-se-non-lo-dice",
"ပြင်ဆင်ပါ-no.1",
"ပြင်ဆင်ပါ-no.4",
"ပြင်ဆင်ပါ-no.2",
"ပြင်ဆင်ပါ-no.3",
"ငရဲပြည်မှ-5",
"ပြင်ဆင်ပါ-no.6",
"ပြင်ဆင်ပါ-no.5",
"ပြင်ဆင်ပါ-no.7",
"ပြင်ဆင်ပါ-no.8",
"အချိန်-2",
"ဝိညာဉ်နား၊-4",
"ပြင်ဆင်ပါ-no.-13",
"ပြင်ဆင်ပါ-no.15",
"ပြင်ဆင်ပါ-9",
"schilddrüsenhormonsubstitution-nach",
"Linxextremismus-JPzuG_UBtEg",
"Ꮖ---N---------N-Ꮹ----on-Instagram_-“Our-next-destination-is-East-and-Southeast-Asia--selfie--asia”",
"ABCDEFGHIJKLMNOPQRSTUVWXYZ",
}
func TestBlock760150_1020105(t *testing.T) {
test, _ := hex.DecodeString("43efbfbd")
assert.True(t, utf8.Valid(test))
a := normalizeGo(test)
b := normalizeICU(test)
assert.Equal(t, a, b)
for i, s := range testStrings {
a = normalizeGo([]byte(s))
b = normalizeICU([]byte(s))
assert.Equal(t, a, b, "%d: %s != %s", i, string(a), string(b))
// t.Logf("%s -> %s", s, string(b))
}
}