further improve language detection
strip URLs from description to get better results
This commit is contained in:
parent
198473b62b
commit
7c7ceed333
2 changed files with 31 additions and 1 deletions
|
@ -415,7 +415,9 @@ func (v *YoutubeVideo) publish(daemon *jsonrpc.Client, params SyncParams) (*Sync
|
|||
FeeCurrency: jsonrpc.Currency(params.Fee.Currency),
|
||||
}
|
||||
}
|
||||
info := whatlanggo.Detect(v.description)
|
||||
urlsRegex := regexp.MustCompile(`(?m) ?(f|ht)(tp)(s?)(://)(.*)[.|/](.*)`)
|
||||
descriptionSample := urlsRegex.ReplaceAllString(v.description, "")
|
||||
info := whatlanggo.Detect(descriptionSample)
|
||||
info2 := whatlanggo.Detect(v.title)
|
||||
if info.IsReliable() && info.Lang.Iso6391() != "" {
|
||||
language := info.Lang.Iso6391()
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package sources
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"testing"
|
||||
|
||||
"github.com/abadojack/whatlanggo"
|
||||
|
@ -22,4 +23,31 @@ func TestLanguageDetection(t *testing.T) {
|
|||
assert.True(t, info.IsReliable())
|
||||
assert.True(t, info.Lang.Iso6391() != "")
|
||||
assert.Equal(t, "zh", info.Lang.Iso6391())
|
||||
|
||||
description = `成為這個頻道的會員並獲得獎勵:
|
||||
https://www.youtube.com/channel/UCOQFrooz-YGHjYb7s3-MrsQ/join
|
||||
_____________________________________________
|
||||
想聽我既音樂作品可以去下面LINK
|
||||
streetvoice 街聲:
|
||||
https://streetvoice.com/CTLam331/
|
||||
_____________________________________________
|
||||
想學結他、鋼琴
|
||||
有關音樂制作工作
|
||||
都可以搵我~
|
||||
大家快D訂閱喇
|
||||
不定期出片
|
||||
|
||||
|
||||
|
||||
|
||||
Website: http://ctlam331.wixsite.com/ctlamusic
|
||||
FB PAGE:https://www.facebook.com/ctlam331
|
||||
IG:ctlamusic`
|
||||
urlsRegex := regexp.MustCompile(`(?m) ?(f|ht)(tp)(s?)(://)(.*)[.|/](.*)`)
|
||||
descriptionSample := urlsRegex.ReplaceAllString(description, "")
|
||||
info = whatlanggo.Detect(descriptionSample)
|
||||
logrus.Infof("confidence: %.2f", info.Confidence)
|
||||
assert.True(t, info.IsReliable())
|
||||
assert.True(t, info.Lang.Iso6391() != "")
|
||||
assert.Equal(t, "zh", info.Lang.Iso6391())
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue