From 198473b62bf3f489877cd87d759793cf6ce83bcc Mon Sep 17 00:00:00 2001 From: Niko Storni Date: Thu, 25 Mar 2021 18:47:34 +0100 Subject: [PATCH] improve language detection --- manager/ytsync.go | 2 +- sources/youtubeVideo.go | 6 +++++- sources/youtubeVideo_test.go | 25 +++++++++++++++++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 sources/youtubeVideo_test.go diff --git a/manager/ytsync.go b/manager/ytsync.go index 206ceeb..76833af 100644 --- a/manager/ytsync.go +++ b/manager/ytsync.go @@ -505,7 +505,7 @@ func (s *Sync) updateRemoteDB(claims []jsonrpc.Claim, ownClaims []jsonrpc.Claim) log.Debugf("%s: Published but is not in database (%s - %s)", videoID, chainInfo.ClaimName, chainInfo.ClaimID) } if transferStatusMismatch { - log.Debugf("%s: is marked as transferred %t on it's actually %t", videoID, sv.Transferred, transferred) + log.Debugf("%s: is marked as transferred %t but it's actually %t", videoID, sv.Transferred, transferred) } if !claimInDatabase || metadataDiffers || claimIDDiffers || claimNameDiffers || claimMarkedUnpublished || transferStatusMismatch { diff --git a/sources/youtubeVideo.go b/sources/youtubeVideo.go index 18185e9..8307d3e 100644 --- a/sources/youtubeVideo.go +++ b/sources/youtubeVideo.go @@ -415,10 +415,14 @@ func (v *YoutubeVideo) publish(daemon *jsonrpc.Client, params SyncParams) (*Sync FeeCurrency: jsonrpc.Currency(params.Fee.Currency), } } - info := whatlanggo.Detect(v.getAbbrevDescription()) + info := whatlanggo.Detect(v.description) + info2 := whatlanggo.Detect(v.title) if info.IsReliable() && info.Lang.Iso6391() != "" { language := info.Lang.Iso6391() languages = []string{language} + } else if info2.IsReliable() && info2.Lang.Iso6391() != "" { + language := info2.Lang.Iso6391() + languages = []string{language} } options := jsonrpc.StreamCreateOptions{ ClaimCreateOptions: jsonrpc.ClaimCreateOptions{ diff --git a/sources/youtubeVideo_test.go b/sources/youtubeVideo_test.go new file mode 100644 index 0000000..40f098f --- /dev/null +++ b/sources/youtubeVideo_test.go @@ -0,0 +1,25 @@ +package sources + +import ( + "testing" + + "github.com/abadojack/whatlanggo" + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/assert" +) + +func TestLanguageDetection(t *testing.T) { + description := `Om lättkränkta muslimer, och den bristande logiken i vad som anses vara att vanära profeten. Från Moderata riksdagspolitikern Hanif Balis podcast "God Ton", avsnitt 108, från oktober 2020, efter terrordådet där en fransk lärare fick huvudet avskuret efter att undervisat sin mångkulturella klass om frihet.` + info := whatlanggo.Detect(description) + logrus.Infof("confidence: %.2f", info.Confidence) + assert.True(t, info.IsReliable()) + assert.True(t, info.Lang.Iso6391() != "") + assert.Equal(t, "sv", info.Lang.Iso6391()) + + description = `🥳週四直播 | 晚上來開個賽車🔰歡迎各位一起來玩! - PonPonLin蹦蹦林` + info = whatlanggo.Detect(description) + logrus.Infof("confidence: %.2f", info.Confidence) + assert.True(t, info.IsReliable()) + assert.True(t, info.Lang.Iso6391() != "") + assert.Equal(t, "zh", info.Lang.Iso6391()) +}