From 08068a2a47957ce2337ff35cebb3ddd7db7d461b Mon Sep 17 00:00:00 2001 From: Niko Storni <niko@lbry.io> Date: Mon, 2 Dec 2019 17:49:58 +0100 Subject: [PATCH 1/4] add timestamp index --- gendb.sh | 2 +- server/controllers/lighthouse.js | 76 +++++++++++++++++++++++++------- server/utils/chainquery/index.js | 6 ++- 3 files changed, 65 insertions(+), 19 deletions(-) diff --git a/gendb.sh b/gendb.sh index b3c3345..3de69ad 100755 --- a/gendb.sh +++ b/gendb.sh @@ -5,6 +5,6 @@ then exit 1; else echo "Index did not exist, creating..." ; - curl -H 'Content-Type: application/json' -H 'Accept: application/json' -X PUT -d '{ "settings" : { "number_of_shards" : 1 }, "mappings" : { "claim" : { "properties" : { "value" : { "type" : "nested" }, "suggest_name": { "type": "completion" }, "suggest_desc": { "type": "completion" } } } } }' http://localhost:9200/claims; + curl -H 'Content-Type: application/json' -H 'Accept: application/json' -X PUT -d '{ "settings" : { "number_of_shards" : 1 }, "mappings" : { "claim" : { "properties" : { "value" : { "type" : "nested" }, "suggest_name": { "type": "completion" }, "suggest_desc": { "type": "completion" }, "transaction_time": { "type": "date" } } } } }' http://localhost:9200/claims; exit 0; fi diff --git a/server/controllers/lighthouse.js b/server/controllers/lighthouse.js index b549119..36b3c38 100644 --- a/server/controllers/lighthouse.js +++ b/server/controllers/lighthouse.js @@ -80,6 +80,32 @@ function getResults (input) { }, }, }; + const newerBoost = [ // New claims should come up further up + { + 'range': { + 'transaction_time': { + 'boost': 5, + 'gte' : new Date().setDate(Date.now() - 30), + }, + }, + }, + { + 'range': { + 'transaction_time': { + 'boost': 4, + 'gte' : new Date().setDate(Date.now() - 60), + }, + }, + }, + { + 'range': { + 'transaction_time': { + 'boost': 3, + 'gte' : new Date().setDate(Date.now() - 90), + }, + }, + }, + ]; const funcScoreClaimWeight = { // 100 LBC adds 1 point to the score 'function_score': { 'field_value_factor': { @@ -112,7 +138,9 @@ function getResults (input) { let conCatTerm = ''; let phraseTerm = ''; escapedQuery.split(' ').every((term, index) => { - if (index === 4) { return false } + if (index === 4) { + return false; + } phraseTerm = phraseTerm + ' ' + term; conCatTerm = conCatTerm + term; queries.push( @@ -144,10 +172,10 @@ function getResults (input) { }, }, { - 'prefix': { 'name': { 'value': '@' + escapedQuery, 'boost': 10 } }, + 'prefix': {'name': {'value': '@' + escapedQuery, 'boost': 10}}, }, { - 'prefix': { 'name': { 'value': escapedQuery, 'boost': 10 } }, + 'prefix': {'name': {'value': escapedQuery, 'boost': 10}}, }, ); }); @@ -156,7 +184,9 @@ function getResults (input) { const splitATD = () => { let queries = []; escapedQuery.split(' ').every((term, index) => { - if (index === 4) { return false } + if (index === 4) { + return false; + } queries.push({ // Contains search term in Author, Title, Description 'query_string': { 'query' : `*${term}*`, @@ -277,6 +307,7 @@ function getResults (input) { 'bool': { 'should': [ conBoost, + newerBoost, funcScoreClaimWeight, funcScoreChannelWeight, channelIdentifier, @@ -360,7 +391,7 @@ function getAutoCompleteQuery (query) { function getFilters (input) { var filters = []; - var bidStateFilter = {'bool': {'must_not': {'match': { 'bid_state': 'Accepted' }}}}; + var bidStateFilter = {'bool': {'must_not': {'match': {'bid_state': 'Accepted'}}}}; if (input.nsfw === 'true' || input.nsfw === 'false') { const nsfwFilter = {'match': {'value.stream.metadata.nsfw': input.nsfw}}; filters.push(nsfwFilter); @@ -416,7 +447,7 @@ function getFilters (input) { }, bidStateFilter]; return filterQuery; - } else { + } else { return [bidStateFilter]; } } @@ -446,7 +477,12 @@ function getStatus () { rp(`http://localhost:9200/claims/_stats`) .then(function (data) { data = JSON.parse(data); - resolve({status: getStats(), spaceUsed: pretty(data._all.total.store.size_in_bytes, true), claimsInIndex: data._all.total.indexing.index_total, totSearches: data._all.total.search.query_total}); + resolve({ + status : getStats(), + spaceUsed : pretty(data._all.total.store.size_in_bytes, true), + claimsInIndex: data._all.total.indexing.index_total, + totSearches : data._all.total.search.query_total, + }); }) .catch(function (err) { reject(err); @@ -459,11 +495,13 @@ function getWashedQuery (query) { query = query.toLowerCase().replace(/ +/g, ' ').replace('lbry://', ''); let splitBy = ['&', '$', ' ']; let regex = new RegExp(splitBy.join('|'), 'gi'); - let badWords = [ 'from', 'with', 'not', 'can', 'all', 'are', 'for', 'but', 'and', 'the' ]; + let badWords = ['from', 'with', 'not', 'can', 'all', 'are', 'for', 'but', 'and', 'the']; let words = query.split(regex); let sentence = []; words.forEach(w => { - if (!badWords.includes(w)) { sentence.push(w) } + if (!badWords.includes(w)) { + sentence.push(w); + } }); query = sentence.join(' '); @@ -474,7 +512,7 @@ function getWashedQuery (query) { function getEscapedQuery (query) { // https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#_reserved_characters // The reserved characters are: + - = && || > < ! ( ) { } [ ] ^ " ~ * ? : \ / - let badCharacters = ['+', '-', '=', '&&', '||', '>', '<', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':', '\\', '/']; + let badCharacters = ['+', '-', '=', '&&', '||', '>', '<', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':', '\\', '/']; let escapedQuery = ''; for (var i = 0; i < query.length; i++) { let char1 = query.charAt(i); @@ -502,12 +540,14 @@ async function update () { class LighthouseControllers { /* eslint-disable no-param-reassign */ + // Start syncing blocks... startSync () { winston.log('info', '[Importer] Started importer, indexing claims.'); claimSync(); // sync(); // Old Sync } + /** * Search API Endpoint. * @param {ctx} Koa Context @@ -537,8 +577,8 @@ class LighthouseControllers { cResults.push(name); } if (pResult._source.value && - pResult._source.value.stream && - pResult._source.value.stream !== undefined) { + pResult._source.value.stream && + pResult._source.value.stream !== undefined) { var title = pResult._source.value.stream.metadata.title; var author = pResult._source.value.stream.metadata.author; if (title.indexOf(ctx.query.s.trim()) > -1 && title.indexOf('http') === -1) { @@ -560,6 +600,7 @@ class LighthouseControllers { ctx.body = clean; }); } + /** * Info about the api here * @param {ctx} Koa Context @@ -583,7 +624,7 @@ class LighthouseControllers { async autoUpdate (ctx) { let travisSignature = Buffer.from(ctx.request.headers.signature, 'base64'); let payload = ctx.request.body.payload; - let travisResponse = await got('https://api.travis-ci.com/config', {timeout: 10000}); + let travisResponse = await got('https://api.travis-ci.com/config', {timeout: 10000}); let travisPublicKey = JSON.parse(travisResponse.body).config.notifications.webhook.public_key; let verifier = crypto.createVerify('sha1'); verifier.update(payload); @@ -597,15 +638,18 @@ class LighthouseControllers { ctx.body = 'OK'; } else { ctx.status = 400; - ctx.body = 'skip auto update: pull request'; logToSlack(ctx.body); + ctx.body = 'skip auto update: pull request'; + logToSlack(ctx.body); } } else { ctx.status = 400; - ctx.body = 'skip auto update: only deploys on master branch'; logToSlack(ctx.body); + ctx.body = 'skip auto update: only deploys on master branch'; + logToSlack(ctx.body); } } else { ctx.status = 500; - ctx.body = 'skip auto update: could not verify webhook'; logToSlack(ctx.body); + ctx.body = 'skip auto update: could not verify webhook'; + logToSlack(ctx.body); } } diff --git a/server/utils/chainquery/index.js b/server/utils/chainquery/index.js index 8e9e485..e89d3ac 100644 --- a/server/utils/chainquery/index.js +++ b/server/utils/chainquery/index.js @@ -19,7 +19,7 @@ import chainqueryConfig from '../../../chainquery-config.json'; let connection = null; const esLogLevel = 'info'; -const MaxClaimsToProcessPerIteration = 100000; +const MaxClaimsToProcessPerIteration = 100000000; const BatchSize = 5000; const loggerStream = winstonStream(winston, esLogLevel); const eclient = new elasticsearch.Client({ @@ -32,7 +32,7 @@ const eclient = new elasticsearch.Client({ }, }); -const queue = new ElasticQueue({elastic: eclient}); +const queue = new ElasticQueue({batchSize: 5000, concurrency: 6, elastic: eclient}); queue.on('drain', function () { console.log('elasticsearch queue is drained'); }); @@ -211,6 +211,7 @@ function getClaimsSince (time, lastID, MaxClaimsInCall) { p.claim_id as channel_id, c.bid_state, c.effective_amount, + c.transaction_time, COALESCE(p.effective_amount,1) as certificate_amount, c.claim_id as claimId, c.value_as_json as value @@ -245,6 +246,7 @@ function getClaimsSince (time, lastID, MaxClaimsInCall) { bid_state : r.bid_state, effective_amount : r.effective_amount, certificate_amount: r.certificate_amount, + transaction_time : new Date(r.transaction_time * 1000), claimId : r.claimId, value : value, }); From 7aa5cc0c4bb9d17278bc0699521bd16e16bab53e Mon Sep 17 00:00:00 2001 From: Niko Storni <niko@lbry.io> Date: Mon, 2 Dec 2019 18:46:12 +0100 Subject: [PATCH 2/4] changes to function --- server/controllers/lighthouse.js | 54 +++++++++++++++++++------------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/server/controllers/lighthouse.js b/server/controllers/lighthouse.js index 36b3c38..f98a92c 100644 --- a/server/controllers/lighthouse.js +++ b/server/controllers/lighthouse.js @@ -80,32 +80,42 @@ function getResults (input) { }, }, }; - const newerBoost = [ // New claims should come up further up - { - 'range': { - 'transaction_time': { - 'boost': 5, - 'gte' : new Date().setDate(Date.now() - 30), + const newerBoost = { + 'function_score': { + 'score_mode': 'sum', // All functions outputs get summed + 'boost_mode': 'multiply', // The documents relevance is multiplied with the sum + + 'functions': [ + { + // The relevancy of old posts is multiplied by at least one. + // Remove if you want to exclude old posts + 'weight': 1, }, - }, - }, - { - 'range': { - 'transaction_time': { - 'boost': 4, - 'gte' : new Date().setDate(Date.now() - 60), + { + // Published this month get a big boost + 'weight': 50, + 'gauss' : { + 'transaction_time': { // <- Change to your date field name + 'origin': Date.now(), // Change to current date + 'scale' : '31d', + 'decay' : 0.5, + }, + }, }, - }, - }, - { - 'range': { - 'transaction_time': { - 'boost': 3, - 'gte' : new Date().setDate(Date.now() - 90), + { + // Published this year get a boost + 'weight': 20, + 'gauss' : { + 'transaction_time': { // <- Change to your date field name + 'origin': Date.now(), // Change to current date + 'scale' : '356d', + 'decay' : 0.5, + }, + }, }, - }, + ], }, - ]; + }; const funcScoreClaimWeight = { // 100 LBC adds 1 point to the score 'function_score': { 'field_value_factor': { From 1137a6dbb455a52ac528fcf9ad1d75a19e5f336a Mon Sep 17 00:00:00 2001 From: Niko Storni <niko@lbry.io> Date: Mon, 2 Dec 2019 18:52:56 +0100 Subject: [PATCH 3/4] add weekly boost --- server/controllers/lighthouse.js | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/server/controllers/lighthouse.js b/server/controllers/lighthouse.js index f98a92c..6a70229 100644 --- a/server/controllers/lighthouse.js +++ b/server/controllers/lighthouse.js @@ -91,6 +91,17 @@ function getResults (input) { // Remove if you want to exclude old posts 'weight': 1, }, + { + // Published this week get a big boost + 'weight': 80, + 'gauss' : { + 'transaction_time': { // <- Change to your date field name + 'origin': Date.now(), // Change to current date + 'scale' : '7d', + 'decay' : 0.5, + }, + }, + }, { // Published this month get a big boost 'weight': 50, From 79aff520d009c7d61f9386507f3dde0fafca3a81 Mon Sep 17 00:00:00 2001 From: Niko Storni <niko@lbry.io> Date: Mon, 2 Dec 2019 19:00:53 +0100 Subject: [PATCH 4/4] hard tuning --- server/controllers/lighthouse.js | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/server/controllers/lighthouse.js b/server/controllers/lighthouse.js index 6a70229..f97f09d 100644 --- a/server/controllers/lighthouse.js +++ b/server/controllers/lighthouse.js @@ -89,16 +89,27 @@ function getResults (input) { { // The relevancy of old posts is multiplied by at least one. // Remove if you want to exclude old posts - 'weight': 1, + 'weight': 0.6, }, { // Published this week get a big boost - 'weight': 80, + 'weight': 85, 'gauss' : { 'transaction_time': { // <- Change to your date field name 'origin': Date.now(), // Change to current date 'scale' : '7d', - 'decay' : 0.5, + 'decay' : 0.6, + }, + }, + }, + { + // Published this month get a big boost + 'weight': 60, + 'gauss' : { + 'transaction_time': { // <- Change to your date field name + 'origin': Date.now(), // Change to current date + 'scale' : '31d', + 'decay' : 0.55, }, }, }, @@ -108,19 +119,19 @@ function getResults (input) { 'gauss' : { 'transaction_time': { // <- Change to your date field name 'origin': Date.now(), // Change to current date - 'scale' : '31d', + 'scale' : '62d', 'decay' : 0.5, }, }, }, { // Published this year get a boost - 'weight': 20, + 'weight': 40, 'gauss' : { 'transaction_time': { // <- Change to your date field name 'origin': Date.now(), // Change to current date 'scale' : '356d', - 'decay' : 0.5, + 'decay' : 0.2, }, }, }, @@ -576,6 +587,7 @@ class LighthouseControllers { async search (ctx) { await getResults(ctx.query).then(function (result) { let results = result.hits.hits; + console.log(results); let cResults = []; for (let pResult of results) { cResults.push(pResult._source);