added string normalization and a unit test for it
This commit is contained in:
parent
2bf2c86c88
commit
451aa193e3
6 changed files with 97 additions and 15 deletions
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
|
||||||
|
.idea/
|
55
claim/normalization.go
Normal file
55
claim/normalization.go
Normal file
|
@ -0,0 +1,55 @@
|
||||||
|
package claim
|
||||||
|
|
||||||
|
// #cgo CFLAGS: -O2
|
||||||
|
// #cgo LDFLAGS: -licuio -licui18n -licuuc -licudata
|
||||||
|
// #include <stdio.h>
|
||||||
|
// #include <unicode/unorm2.h>
|
||||||
|
// #include <unicode/ustring.h>
|
||||||
|
// #include <unicode/uversion.h>
|
||||||
|
// int icu_version() {
|
||||||
|
// UVersionInfo info;
|
||||||
|
// u_getVersion(info);
|
||||||
|
// return ((int)(info[0]) << 16) + info[1];
|
||||||
|
// }
|
||||||
|
// int normalize(char* name, int length, char* result) {
|
||||||
|
// UErrorCode ec = U_ZERO_ERROR;
|
||||||
|
// static const UNormalizer2* normalizer = NULL;
|
||||||
|
// if (normalizer == NULL) normalizer = unorm2_getNFDInstance(&ec);
|
||||||
|
// UChar dest[256]; // maximum claim name size is 255; we won't have more UTF16 chars than bytes
|
||||||
|
// int dest_len;
|
||||||
|
// u_strFromUTF8(dest, 256, &dest_len, name, length, &ec);
|
||||||
|
// if (U_FAILURE(ec) || dest_len == 0) return 0;
|
||||||
|
// UChar normalized[256];
|
||||||
|
// dest_len = unorm2_normalize(normalizer, dest, dest_len, normalized, 256, &ec);
|
||||||
|
// if (U_FAILURE(ec) || dest_len == 0) return 0;
|
||||||
|
// dest_len = u_strFoldCase(dest, 256, normalized, dest_len, U_FOLD_CASE_DEFAULT, &ec);
|
||||||
|
// if (U_FAILURE(ec) || dest_len == 0) return 0;
|
||||||
|
// u_strToUTF8(result, 1024, &dest_len, dest, dest_len, &ec);
|
||||||
|
// return dest_len;
|
||||||
|
// }
|
||||||
|
import "C"
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
func IcuVersion() string {
|
||||||
|
// TODO: we probably need to explode if it's not 63.2 as it affects consensus
|
||||||
|
result := C.icu_version()
|
||||||
|
return fmt.Sprintf("%d.%d", result >> 16, result & 0xffff)
|
||||||
|
}
|
||||||
|
|
||||||
|
func Normalize(value []byte) []byte {
|
||||||
|
name := (*C.char)(unsafe.Pointer(&value[0]))
|
||||||
|
length := C.int(len(value))
|
||||||
|
|
||||||
|
var resultName [1024]byte // inputs are restricted to 255 chars; it shouldn't expand too much past that
|
||||||
|
result := unsafe.Pointer(&resultName[0])
|
||||||
|
|
||||||
|
resultLength := C.normalize(name, length, (*C.char)(result))
|
||||||
|
if resultLength == 0 { return value }
|
||||||
|
if resultLength == -1 { return resultName[0:200] }
|
||||||
|
|
||||||
|
// return resultName[0:resultLength] -- we want to shrink the result (not use a slice on 1024)
|
||||||
|
return C.GoBytes(result, resultLength)
|
||||||
|
}
|
|
@ -89,7 +89,7 @@ func (ct *ClaimTrie) Close() error {
|
||||||
return ct.cleanup()
|
return ct.cleanup()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Height returns the highest height of blocks commited to the ClaimTrie.
|
// Height returns the highest height of blocks committed to the ClaimTrie.
|
||||||
func (ct *ClaimTrie) Height() claim.Height {
|
func (ct *ClaimTrie) Height() claim.Height {
|
||||||
return ct.cm.Head().Meta.Height
|
return ct.cm.Head().Meta.Height
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,7 +40,7 @@ var (
|
||||||
|
|
||||||
var (
|
var (
|
||||||
flagAll = cli.BoolFlag{Name: "all, a", Usage: "Show all nodes", Destination: &all}
|
flagAll = cli.BoolFlag{Name: "all, a", Usage: "Show all nodes", Destination: &all}
|
||||||
flagCheck = cli.BoolTFlag{Name: "chk, c", Usage: "Check Merkle Hash during importing", Destination: &chk}
|
flagCheck = cli.BoolFlag{Name: "chk, c", Usage: "Check Merkle Hash during importing", Destination: &chk}
|
||||||
flagDump = cli.BoolFlag{Name: "dump, d", Usage: "Dump cmds", Destination: &dump}
|
flagDump = cli.BoolFlag{Name: "dump, d", Usage: "Dump cmds", Destination: &dump}
|
||||||
flagVerbose = cli.BoolFlag{Name: "verbose, v", Usage: "Verbose (will be replaced by loglevel)", Destination: &verbose}
|
flagVerbose = cli.BoolFlag{Name: "verbose, v", Usage: "Verbose (will be replaced by loglevel)", Destination: &verbose}
|
||||||
flagAmount = cli.Int64Flag{Name: "amount, a", Usage: "Amount", Destination: (*int64)(&amt)}
|
flagAmount = cli.Int64Flag{Name: "amount, a", Usage: "Amount", Destination: (*int64)(&amt)}
|
||||||
|
@ -62,14 +62,14 @@ func main() {
|
||||||
app.Usage = "A CLI tool for LBRY ClaimTrie"
|
app.Usage = "A CLI tool for LBRY ClaimTrie"
|
||||||
app.Version = "0.0.1"
|
app.Version = "0.0.1"
|
||||||
app.Action = cli.ShowAppHelp
|
app.Action = cli.ShowAppHelp
|
||||||
app.Commands = []cli.Command{
|
app.Commands = []*cli.Command{
|
||||||
{
|
{
|
||||||
Name: "add-claim",
|
Name: "add-claim",
|
||||||
Aliases: []string{"ac"},
|
Aliases: []string{"ac"},
|
||||||
Usage: "Claim a name.",
|
Usage: "Claim a name.",
|
||||||
Before: parseArgs,
|
Before: parseArgs,
|
||||||
Action: cmdAddClaim,
|
Action: cmdAddClaim,
|
||||||
Flags: []cli.Flag{flagName, flagOutPoint, flagAmount},
|
Flags: []cli.Flag{&flagName, &flagOutPoint, &flagAmount},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "spend-claim",
|
Name: "spend-claim",
|
||||||
|
@ -77,7 +77,7 @@ func main() {
|
||||||
Usage: "Spend a Claim.",
|
Usage: "Spend a Claim.",
|
||||||
Before: parseArgs,
|
Before: parseArgs,
|
||||||
Action: cmdSpendClaim,
|
Action: cmdSpendClaim,
|
||||||
Flags: []cli.Flag{flagName, flagOutPoint},
|
Flags: []cli.Flag{&flagName, &flagOutPoint},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "update-claim",
|
Name: "update-claim",
|
||||||
|
@ -85,7 +85,7 @@ func main() {
|
||||||
Usage: "Update a Claim.",
|
Usage: "Update a Claim.",
|
||||||
Before: parseArgs,
|
Before: parseArgs,
|
||||||
Action: cmdUpdateClaim,
|
Action: cmdUpdateClaim,
|
||||||
Flags: []cli.Flag{flagName, flagOutPoint, flagAmount, flagID},
|
Flags: []cli.Flag{&flagName, &flagOutPoint, &flagAmount, &flagID},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "add-support",
|
Name: "add-support",
|
||||||
|
@ -93,7 +93,7 @@ func main() {
|
||||||
Usage: "Support a Claim.",
|
Usage: "Support a Claim.",
|
||||||
Before: parseArgs,
|
Before: parseArgs,
|
||||||
Action: cmdAddSupport,
|
Action: cmdAddSupport,
|
||||||
Flags: []cli.Flag{flagName, flagOutPoint, flagAmount, flagID},
|
Flags: []cli.Flag{&flagName, &flagOutPoint, &flagAmount, &flagID},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "spend-support",
|
Name: "spend-support",
|
||||||
|
@ -101,7 +101,7 @@ func main() {
|
||||||
Usage: "Spend a specified Support.",
|
Usage: "Spend a specified Support.",
|
||||||
Before: parseArgs,
|
Before: parseArgs,
|
||||||
Action: cmdSpendSupport,
|
Action: cmdSpendSupport,
|
||||||
Flags: []cli.Flag{flagName, flagOutPoint},
|
Flags: []cli.Flag{&flagName, &flagOutPoint},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "show",
|
Name: "show",
|
||||||
|
@ -109,7 +109,7 @@ func main() {
|
||||||
Usage: "Show the status of nodes)",
|
Usage: "Show the status of nodes)",
|
||||||
Before: parseArgs,
|
Before: parseArgs,
|
||||||
Action: cmdShow,
|
Action: cmdShow,
|
||||||
Flags: []cli.Flag{flagAll, flagName, flagHeight, flagDump},
|
Flags: []cli.Flag{&flagAll, &flagName, &flagHeight, &flagDump},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "merkle",
|
Name: "merkle",
|
||||||
|
@ -124,7 +124,7 @@ func main() {
|
||||||
Usage: "Commit the current changes to database.",
|
Usage: "Commit the current changes to database.",
|
||||||
Before: parseArgs,
|
Before: parseArgs,
|
||||||
Action: cmdCommit,
|
Action: cmdCommit,
|
||||||
Flags: []cli.Flag{flagHeight},
|
Flags: []cli.Flag{&flagHeight},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "reset",
|
Name: "reset",
|
||||||
|
@ -132,7 +132,7 @@ func main() {
|
||||||
Usage: "Reset the Head commit and a specified commit (by Height).",
|
Usage: "Reset the Head commit and a specified commit (by Height).",
|
||||||
Before: parseArgs,
|
Before: parseArgs,
|
||||||
Action: cmdReset,
|
Action: cmdReset,
|
||||||
Flags: []cli.Flag{flagHeight},
|
Flags: []cli.Flag{&flagHeight},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "log",
|
Name: "log",
|
||||||
|
@ -147,7 +147,7 @@ func main() {
|
||||||
Usage: "Import changes from datbase.",
|
Usage: "Import changes from datbase.",
|
||||||
Before: parseArgs,
|
Before: parseArgs,
|
||||||
Action: cmdImport,
|
Action: cmdImport,
|
||||||
Flags: []cli.Flag{flagHeight, flagCheck, flagVerbose},
|
Flags: []cli.Flag{&flagHeight, &flagCheck, &flagVerbose},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "erase",
|
Name: "erase",
|
||||||
|
@ -161,7 +161,7 @@ func main() {
|
||||||
Aliases: []string{"sh"},
|
Aliases: []string{"sh"},
|
||||||
Usage: "Enter interactive mode",
|
Usage: "Enter interactive mode",
|
||||||
Before: parseArgs,
|
Before: parseArgs,
|
||||||
Action: func(c *cli.Context) { cmdShell(app) },
|
Action: func(c *cli.Context) error { cmdShell(app); return nil },
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -105,7 +105,7 @@ func (cm *CommitMgr) Save() error {
|
||||||
|
|
||||||
buf := bytes.NewBuffer(nil)
|
buf := bytes.NewBuffer(nil)
|
||||||
if err := gob.NewEncoder(buf).Encode(exported); err != nil {
|
if err := gob.NewEncoder(buf).Encode(exported); err != nil {
|
||||||
return errors.Wrapf(err, "gob.Encode()", err)
|
return errors.Wrapf(err, "gob.Encode(): %s", err)
|
||||||
}
|
}
|
||||||
if err := cm.db.Put([]byte("CommitMgr"), buf.Bytes(), nil); err != nil {
|
if err := cm.db.Put([]byte("CommitMgr"), buf.Bytes(), nil); err != nil {
|
||||||
return errors.Wrapf(err, "db.Put(CommitMgr)")
|
return errors.Wrapf(err, "db.Put(CommitMgr)")
|
||||||
|
@ -129,7 +129,7 @@ func (cm *CommitMgr) Load() error {
|
||||||
return errors.Wrapf(err, "db.Get(CommitMgr)")
|
return errors.Wrapf(err, "db.Get(CommitMgr)")
|
||||||
}
|
}
|
||||||
if err := gob.NewDecoder(bytes.NewBuffer(data)).Decode(&exported); err != nil {
|
if err := gob.NewDecoder(bytes.NewBuffer(data)).Decode(&exported); err != nil {
|
||||||
return errors.Wrapf(err, "gob.Encode()", err)
|
return errors.Wrapf(err, "gob.Encode(): %s", err)
|
||||||
}
|
}
|
||||||
cm.commits = exported.Commits
|
cm.commits = exported.Commits
|
||||||
cm.head = exported.Head
|
cm.head = exported.Head
|
||||||
|
|
25
tests/claim_test.go
Normal file
25
tests/claim_test.go
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
package tests
|
||||||
|
|
||||||
|
import (
|
||||||
|
"../claim"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func assertEqual(t *testing.T, a interface{}, b interface{}) {
|
||||||
|
if a != b {
|
||||||
|
t.Fatalf("%s != %s", a, b)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNormalization(t *testing.T) {
|
||||||
|
t.Log("ICU Version: " + claim.IcuVersion())
|
||||||
|
assertEqual(t, "test", string(claim.Normalize([]byte("TESt"))))
|
||||||
|
assertEqual(t, "test 23", string(claim.Normalize([]byte("tesT 23"))))
|
||||||
|
assertEqual(t, "\xFF", string(claim.Normalize([]byte("\xFF"))))
|
||||||
|
assertEqual(t, "\xC3\x28", string(claim.Normalize([]byte("\xC3\x28"))))
|
||||||
|
assertEqual(t, "\xCF\x89", string(claim.Normalize([]byte("\xE2\x84\xA6"))))
|
||||||
|
assertEqual(t, "\xD1\x84", string(claim.Normalize([]byte("\xD0\xA4"))))
|
||||||
|
assertEqual(t, "\xD5\xA2", string(claim.Normalize([]byte("\xD4\xB2"))))
|
||||||
|
assertEqual(t, "\xE3\x81\xB5\xE3\x82\x99", string(claim.Normalize([]byte("\xE3\x81\xB6"))))
|
||||||
|
assertEqual(t, "\xE1\x84\x81\xE1\x85\xAA\xE1\x86\xB0", string(claim.Normalize([]byte("\xEA\xBD\x91"))))
|
||||||
|
}
|
Loading…
Reference in a new issue