improve language detection
This commit is contained in:
parent
d53d0a1d52
commit
198473b62b
3 changed files with 31 additions and 2 deletions
|
@ -505,7 +505,7 @@ func (s *Sync) updateRemoteDB(claims []jsonrpc.Claim, ownClaims []jsonrpc.Claim)
|
|||
log.Debugf("%s: Published but is not in database (%s - %s)", videoID, chainInfo.ClaimName, chainInfo.ClaimID)
|
||||
}
|
||||
if transferStatusMismatch {
|
||||
log.Debugf("%s: is marked as transferred %t on it's actually %t", videoID, sv.Transferred, transferred)
|
||||
log.Debugf("%s: is marked as transferred %t but it's actually %t", videoID, sv.Transferred, transferred)
|
||||
}
|
||||
|
||||
if !claimInDatabase || metadataDiffers || claimIDDiffers || claimNameDiffers || claimMarkedUnpublished || transferStatusMismatch {
|
||||
|
|
|
@ -415,10 +415,14 @@ func (v *YoutubeVideo) publish(daemon *jsonrpc.Client, params SyncParams) (*Sync
|
|||
FeeCurrency: jsonrpc.Currency(params.Fee.Currency),
|
||||
}
|
||||
}
|
||||
info := whatlanggo.Detect(v.getAbbrevDescription())
|
||||
info := whatlanggo.Detect(v.description)
|
||||
info2 := whatlanggo.Detect(v.title)
|
||||
if info.IsReliable() && info.Lang.Iso6391() != "" {
|
||||
language := info.Lang.Iso6391()
|
||||
languages = []string{language}
|
||||
} else if info2.IsReliable() && info2.Lang.Iso6391() != "" {
|
||||
language := info2.Lang.Iso6391()
|
||||
languages = []string{language}
|
||||
}
|
||||
options := jsonrpc.StreamCreateOptions{
|
||||
ClaimCreateOptions: jsonrpc.ClaimCreateOptions{
|
||||
|
|
25
sources/youtubeVideo_test.go
Normal file
25
sources/youtubeVideo_test.go
Normal file
|
@ -0,0 +1,25 @@
|
|||
package sources
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/abadojack/whatlanggo"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestLanguageDetection(t *testing.T) {
|
||||
description := `Om lättkränkta muslimer, och den bristande logiken i vad som anses vara att vanära profeten. Från Moderata riksdagspolitikern Hanif Balis podcast "God Ton", avsnitt 108, från oktober 2020, efter terrordådet där en fransk lärare fick huvudet avskuret efter att undervisat sin mångkulturella klass om frihet.`
|
||||
info := whatlanggo.Detect(description)
|
||||
logrus.Infof("confidence: %.2f", info.Confidence)
|
||||
assert.True(t, info.IsReliable())
|
||||
assert.True(t, info.Lang.Iso6391() != "")
|
||||
assert.Equal(t, "sv", info.Lang.Iso6391())
|
||||
|
||||
description = `🥳週四直播 | 晚上來開個賽車🔰歡迎各位一起來玩! - PonPonLin蹦蹦林`
|
||||
info = whatlanggo.Detect(description)
|
||||
logrus.Infof("confidence: %.2f", info.Confidence)
|
||||
assert.True(t, info.IsReliable())
|
||||
assert.True(t, info.Lang.Iso6391() != "")
|
||||
assert.Equal(t, "zh", info.Lang.Iso6391())
|
||||
}
|
Loading…
Reference in a new issue