Compare commits

...

4 commits

Author SHA1 Message Date
Niko Storni
79aff520d0 hard tuning 2019-12-02 19:00:53 +01:00
Niko Storni
1137a6dbb4 add weekly boost 2019-12-02 18:52:56 +01:00
Niko Storni
7aa5cc0c4b changes to function 2019-12-02 18:46:12 +01:00
Niko Storni
08068a2a47 add timestamp index 2019-12-02 18:01:20 +01:00
3 changed files with 98 additions and 19 deletions

View file

@ -5,6 +5,6 @@ then
exit 1;
else
echo "Index did not exist, creating..." ;
curl -H 'Content-Type: application/json' -H 'Accept: application/json' -X PUT -d '{ "settings" : { "number_of_shards" : 1 }, "mappings" : { "claim" : { "properties" : { "value" : { "type" : "nested" }, "suggest_name": { "type": "completion" }, "suggest_desc": { "type": "completion" } } } } }' http://localhost:9200/claims;
curl -H 'Content-Type: application/json' -H 'Accept: application/json' -X PUT -d '{ "settings" : { "number_of_shards" : 1 }, "mappings" : { "claim" : { "properties" : { "value" : { "type" : "nested" }, "suggest_name": { "type": "completion" }, "suggest_desc": { "type": "completion" }, "transaction_time": { "type": "date" } } } } }' http://localhost:9200/claims;
exit 0;
fi

View file

@ -80,6 +80,64 @@ function getResults (input) {
},
},
};
const newerBoost = {
'function_score': {
'score_mode': 'sum', // All functions outputs get summed
'boost_mode': 'multiply', // The documents relevance is multiplied with the sum
'functions': [
{
// The relevancy of old posts is multiplied by at least one.
// Remove if you want to exclude old posts
'weight': 0.6,
},
{
// Published this week get a big boost
'weight': 85,
'gauss' : {
'transaction_time': { // <- Change to your date field name
'origin': Date.now(), // Change to current date
'scale' : '7d',
'decay' : 0.6,
},
},
},
{
// Published this month get a big boost
'weight': 60,
'gauss' : {
'transaction_time': { // <- Change to your date field name
'origin': Date.now(), // Change to current date
'scale' : '31d',
'decay' : 0.55,
},
},
},
{
// Published this month get a big boost
'weight': 50,
'gauss' : {
'transaction_time': { // <- Change to your date field name
'origin': Date.now(), // Change to current date
'scale' : '62d',
'decay' : 0.5,
},
},
},
{
// Published this year get a boost
'weight': 40,
'gauss' : {
'transaction_time': { // <- Change to your date field name
'origin': Date.now(), // Change to current date
'scale' : '356d',
'decay' : 0.2,
},
},
},
],
},
};
const funcScoreClaimWeight = { // 100 LBC adds 1 point to the score
'function_score': {
'field_value_factor': {
@ -112,7 +170,9 @@ function getResults (input) {
let conCatTerm = '';
let phraseTerm = '';
escapedQuery.split(' ').every((term, index) => {
if (index === 4) { return false }
if (index === 4) {
return false;
}
phraseTerm = phraseTerm + ' ' + term;
conCatTerm = conCatTerm + term;
queries.push(
@ -144,10 +204,10 @@ function getResults (input) {
},
},
{
'prefix': { 'name': { 'value': '@' + escapedQuery, 'boost': 10 } },
'prefix': {'name': {'value': '@' + escapedQuery, 'boost': 10}},
},
{
'prefix': { 'name': { 'value': escapedQuery, 'boost': 10 } },
'prefix': {'name': {'value': escapedQuery, 'boost': 10}},
},
);
});
@ -156,7 +216,9 @@ function getResults (input) {
const splitATD = () => {
let queries = [];
escapedQuery.split(' ').every((term, index) => {
if (index === 4) { return false }
if (index === 4) {
return false;
}
queries.push({ // Contains search term in Author, Title, Description
'query_string': {
'query' : `*${term}*`,
@ -277,6 +339,7 @@ function getResults (input) {
'bool': {
'should': [
conBoost,
newerBoost,
funcScoreClaimWeight,
funcScoreChannelWeight,
channelIdentifier,
@ -360,7 +423,7 @@ function getAutoCompleteQuery (query) {
function getFilters (input) {
var filters = [];
var bidStateFilter = {'bool': {'must_not': {'match': { 'bid_state': 'Accepted' }}}};
var bidStateFilter = {'bool': {'must_not': {'match': {'bid_state': 'Accepted'}}}};
if (input.nsfw === 'true' || input.nsfw === 'false') {
const nsfwFilter = {'match': {'value.stream.metadata.nsfw': input.nsfw}};
filters.push(nsfwFilter);
@ -446,7 +509,12 @@ function getStatus () {
rp(`http://localhost:9200/claims/_stats`)
.then(function (data) {
data = JSON.parse(data);
resolve({status: getStats(), spaceUsed: pretty(data._all.total.store.size_in_bytes, true), claimsInIndex: data._all.total.indexing.index_total, totSearches: data._all.total.search.query_total});
resolve({
status : getStats(),
spaceUsed : pretty(data._all.total.store.size_in_bytes, true),
claimsInIndex: data._all.total.indexing.index_total,
totSearches : data._all.total.search.query_total,
});
})
.catch(function (err) {
reject(err);
@ -459,11 +527,13 @@ function getWashedQuery (query) {
query = query.toLowerCase().replace(/ +/g, ' ').replace('lbry://', '');
let splitBy = ['&', '$', ' '];
let regex = new RegExp(splitBy.join('|'), 'gi');
let badWords = [ 'from', 'with', 'not', 'can', 'all', 'are', 'for', 'but', 'and', 'the' ];
let badWords = ['from', 'with', 'not', 'can', 'all', 'are', 'for', 'but', 'and', 'the'];
let words = query.split(regex);
let sentence = [];
words.forEach(w => {
if (!badWords.includes(w)) { sentence.push(w) }
if (!badWords.includes(w)) {
sentence.push(w);
}
});
query = sentence.join(' ');
@ -502,12 +572,14 @@ async function update () {
class LighthouseControllers {
/* eslint-disable no-param-reassign */
// Start syncing blocks...
startSync () {
winston.log('info', '[Importer] Started importer, indexing claims.');
claimSync();
// sync(); // Old Sync
}
/**
* Search API Endpoint.
* @param {ctx} Koa Context
@ -515,6 +587,7 @@ class LighthouseControllers {
async search (ctx) {
await getResults(ctx.query).then(function (result) {
let results = result.hits.hits;
console.log(results);
let cResults = [];
for (let pResult of results) {
cResults.push(pResult._source);
@ -560,6 +633,7 @@ class LighthouseControllers {
ctx.body = clean;
});
}
/**
* Info about the api here
* @param {ctx} Koa Context
@ -597,15 +671,18 @@ class LighthouseControllers {
ctx.body = 'OK';
} else {
ctx.status = 400;
ctx.body = 'skip auto update: pull request'; logToSlack(ctx.body);
ctx.body = 'skip auto update: pull request';
logToSlack(ctx.body);
}
} else {
ctx.status = 400;
ctx.body = 'skip auto update: only deploys on master branch'; logToSlack(ctx.body);
ctx.body = 'skip auto update: only deploys on master branch';
logToSlack(ctx.body);
}
} else {
ctx.status = 500;
ctx.body = 'skip auto update: could not verify webhook'; logToSlack(ctx.body);
ctx.body = 'skip auto update: could not verify webhook';
logToSlack(ctx.body);
}
}

View file

@ -19,7 +19,7 @@ import chainqueryConfig from '../../../chainquery-config.json';
let connection = null;
const esLogLevel = 'info';
const MaxClaimsToProcessPerIteration = 100000;
const MaxClaimsToProcessPerIteration = 100000000;
const BatchSize = 5000;
const loggerStream = winstonStream(winston, esLogLevel);
const eclient = new elasticsearch.Client({
@ -32,7 +32,7 @@ const eclient = new elasticsearch.Client({
},
});
const queue = new ElasticQueue({elastic: eclient});
const queue = new ElasticQueue({batchSize: 5000, concurrency: 6, elastic: eclient});
queue.on('drain', function () {
console.log('elasticsearch queue is drained');
});
@ -211,6 +211,7 @@ function getClaimsSince (time, lastID, MaxClaimsInCall) {
p.claim_id as channel_id,
c.bid_state,
c.effective_amount,
c.transaction_time,
COALESCE(p.effective_amount,1) as certificate_amount,
c.claim_id as claimId,
c.value_as_json as value
@ -245,6 +246,7 @@ function getClaimsSince (time, lastID, MaxClaimsInCall) {
bid_state : r.bid_state,
effective_amount : r.effective_amount,
certificate_amount: r.certificate_amount,
transaction_time : new Date(r.transaction_time * 1000),
claimId : r.claimId,
value : value,
});