f4115b0edc
index channel claim id for claims
606 lines
17 KiB
JavaScript
606 lines
17 KiB
JavaScript
import 'babel-polyfill';
|
|
import winston from 'winston';
|
|
import winstonStream from 'winston-stream';
|
|
import elasticsearch from 'elasticsearch';
|
|
import rp from 'request-promise';
|
|
import pretty from 'prettysize';
|
|
import {claimSync, getStats} from '../utils/chainquery';
|
|
import crypto from 'crypto';
|
|
import got from 'got';
|
|
import {logToSlack} from '../index';
|
|
|
|
const loggerStream = winstonStream(winston, 'info');
|
|
|
|
const eclient = new elasticsearch.Client({
|
|
host: process.env.ELASTIC_URL || 'http://localhost:9200',
|
|
|
|
log: {
|
|
level : 'info',
|
|
type : 'stream',
|
|
stream: loggerStream,
|
|
},
|
|
});
|
|
|
|
function getResults (input) {
|
|
if (input.size === undefined) input.size = 10;
|
|
if (input.from === undefined) input.from = 0;
|
|
// Beamer - temp fix for https://github.com/lbryio/lighthouse/issues/67
|
|
if (input.size > 10000) {
|
|
input.size = 10000;
|
|
input.from = 0;
|
|
}
|
|
if (input.from > 10000) {
|
|
input.from = 9999;
|
|
input.size = 1;
|
|
}
|
|
if (input.from + input.size > 10000) {
|
|
input.from = 10000 - input.size;
|
|
}
|
|
let trimmedQuery = input.s.trim();
|
|
let escapedQuery = getEscapedQuery(trimmedQuery);
|
|
let washedQuery = getEscapedQuery(getWashedQuery(trimmedQuery));
|
|
let effectiveFactor = '0.00000000001';
|
|
const dynamicFilters = () => {
|
|
let queries = [];
|
|
// Search is split up into different parts, all search parts goes under this line.
|
|
if (input.channel_id !== undefined) {
|
|
const channelidSearch = { // If we got a channel_id argument, lets filter out only that channel_id
|
|
'bool': {
|
|
'must': {
|
|
'query_string': {
|
|
'fields': ['channel_claim_id'],
|
|
'query' : getEscapedQuery(input.channel_id.trim()),
|
|
},
|
|
},
|
|
},
|
|
};
|
|
queries.push(channelidSearch);
|
|
}
|
|
if (input.channel !== undefined) { // If we got a channel argument, lets filter out only that channel
|
|
const channelSearch = {
|
|
'bool': {
|
|
'must': {
|
|
'query_string': {
|
|
'fields': ['channel'],
|
|
'query' : getEscapedQuery(input.channel.trim()),
|
|
},
|
|
},
|
|
},
|
|
};
|
|
queries.push(channelSearch);
|
|
}
|
|
return queries;
|
|
};
|
|
|
|
const conBoost = { // Controlling claims should get higher placement in search results.
|
|
'match': {
|
|
'bid_state': {
|
|
'query': 'Controlling',
|
|
'boost': 20,
|
|
},
|
|
},
|
|
};
|
|
const funcScoreClaimWeight = { // 100 LBC adds 1 point to the score
|
|
'function_score': {
|
|
'field_value_factor': {
|
|
'field' : 'effective_amount',
|
|
'factor' : effectiveFactor,
|
|
'missing': 1,
|
|
},
|
|
},
|
|
};
|
|
const funcScoreChannelWeight = { // 100 LBC adds 1 point to the score
|
|
'function_score': {
|
|
'field_value_factor': {
|
|
'field' : 'certificate_amount',
|
|
'factor' : effectiveFactor / 10,
|
|
'missing': 1,
|
|
},
|
|
},
|
|
};
|
|
|
|
const splitName = () => {
|
|
let queries = [];
|
|
let conCatTerm = '';
|
|
let phraseTerm = '';
|
|
escapedQuery.split(' ').every((term, index) => {
|
|
if (index === 4) { return false }
|
|
phraseTerm = phraseTerm + ' ' + term;
|
|
conCatTerm = conCatTerm + term;
|
|
queries.push(
|
|
{ // Contains an individual search term
|
|
'query_string': {
|
|
'query' : `*${term}*`,
|
|
'fields': [
|
|
'name',
|
|
],
|
|
'boost': 1,
|
|
},
|
|
},
|
|
{ // Contains 1..n of the terms as a phrase
|
|
'query_string': {
|
|
'query' : `*${phraseTerm}*`,
|
|
'fields': [
|
|
'name',
|
|
],
|
|
'boost': 1,
|
|
},
|
|
},
|
|
{ // Contains 1..n of the terms together
|
|
'query_string': {
|
|
'query' : `*${conCatTerm}*`,
|
|
'fields': [
|
|
'name',
|
|
],
|
|
'boost': 300,
|
|
},
|
|
},
|
|
{
|
|
'prefix': { 'name': { 'value': '@' + escapedQuery, 'boost': 10 } },
|
|
},
|
|
{
|
|
'prefix': { 'name': { 'value': escapedQuery, 'boost': 10 } },
|
|
},
|
|
);
|
|
});
|
|
return queries;
|
|
};
|
|
const splitATD = () => {
|
|
let queries = [];
|
|
escapedQuery.split(' ').every((term, index) => {
|
|
if (index === 4) { return false }
|
|
queries.push({ // Contains search term in Author, Title, Description
|
|
'query_string': {
|
|
'query' : `*${term}*`,
|
|
'fields': [
|
|
'value.stream.metadata.author',
|
|
'value.stream.metadata.title',
|
|
'value.stream.metadata.description',
|
|
],
|
|
'boost': 1,
|
|
},
|
|
});
|
|
});
|
|
return queries;
|
|
};
|
|
|
|
const matPhraseName = { // Match search text as phrase - Name
|
|
'match_phrase': {
|
|
'name': {
|
|
'query': escapedQuery,
|
|
'boost': 10,
|
|
},
|
|
},
|
|
};
|
|
const matTextName = { // Match search text - Name
|
|
'match': {
|
|
'name': {
|
|
'query': escapedQuery,
|
|
'boost': 5,
|
|
},
|
|
},
|
|
};
|
|
const conTermName = { // Contains search term - Name
|
|
'query_string': {
|
|
'query' : `*${escapedQuery}*`,
|
|
'fields': [
|
|
'name',
|
|
],
|
|
'boost': 3,
|
|
},
|
|
};
|
|
const atdSearch = { // ATD search(author, title, desc)
|
|
'nested': {
|
|
'path' : 'value',
|
|
'query': {
|
|
'bool': {
|
|
'should': [
|
|
...splitATD(),
|
|
{ // Contains search term in Author, Title, Description
|
|
'query_string': {
|
|
'query' : `*${escapedQuery}*`,
|
|
'fields': [
|
|
'value.stream.metadata.author',
|
|
'value.stream.metadata.title',
|
|
'value.stream.metadata.description',
|
|
],
|
|
'boost': 1,
|
|
},
|
|
},
|
|
{ // Match search term - Author
|
|
'match': {
|
|
'value.stream.metadata.author': {
|
|
'query': washedQuery,
|
|
'boost': 2,
|
|
},
|
|
},
|
|
},
|
|
{ // Match search text as phrase - Author
|
|
'match_phrase': {
|
|
'value.stream.metadata.author': {
|
|
'query': escapedQuery,
|
|
'boost': 3,
|
|
},
|
|
},
|
|
},
|
|
{ // Match search term - Title
|
|
'match': {
|
|
'value.stream.metadata.title': {
|
|
'query': washedQuery,
|
|
'boost': 2,
|
|
},
|
|
},
|
|
},
|
|
{ // Match search text as phrase - Title
|
|
'match_phrase': {
|
|
'value.stream.metadata.title': {
|
|
'query': escapedQuery,
|
|
'boost': 3,
|
|
},
|
|
},
|
|
},
|
|
{ // Match search term - Description
|
|
'match': {
|
|
'value.stream.metadata.description': {
|
|
'query': washedQuery,
|
|
'boost': 2,
|
|
},
|
|
},
|
|
},
|
|
{ // Match search text as phrase - Description
|
|
'match_phrase': {
|
|
'value.stream.metadata.description': {
|
|
'query': escapedQuery,
|
|
'boost': 3,
|
|
},
|
|
},
|
|
},
|
|
],
|
|
},
|
|
},
|
|
},
|
|
};
|
|
// End of search parts
|
|
let esQuery = {
|
|
index : 'claims',
|
|
_source: ['name', 'claimId'],
|
|
body : {
|
|
'query': {
|
|
'bool': {
|
|
'should': [
|
|
conBoost,
|
|
funcScoreClaimWeight,
|
|
funcScoreChannelWeight,
|
|
],
|
|
'must': [
|
|
...dynamicFilters(),
|
|
{
|
|
'bool': {
|
|
'should': [
|
|
...splitName(),
|
|
matPhraseName,
|
|
matTextName,
|
|
conTermName,
|
|
atdSearch,
|
|
],
|
|
},
|
|
},
|
|
],
|
|
'filter': getFilters(input),
|
|
},
|
|
},
|
|
size: input.size,
|
|
from: input.from,
|
|
sort: {
|
|
_score: 'desc',
|
|
},
|
|
},
|
|
};
|
|
// console.log('QUERY: ', JSON.stringify(esQuery));
|
|
return eclient.search(esQuery);
|
|
}
|
|
|
|
function getIndex () {
|
|
// ideally, data is inserted into elastic search with an index that helps us query it faster/better results
|
|
// A simple start is to default queries to be within the n months, and to make a new index each month.
|
|
|
|
}
|
|
|
|
function getRoutingKey () {
|
|
// This is the most important field for performance. Being able to route the queries ahead of time can make typedowns insanely good.
|
|
|
|
}
|
|
|
|
function getAutoCompleteQuery (query) {
|
|
return {
|
|
bool: {
|
|
should: [
|
|
{ // Author, Title, Description
|
|
nested: {
|
|
path : 'value',
|
|
query: {
|
|
multi_match: {
|
|
query : query.s.trim(),
|
|
type : 'phrase_prefix',
|
|
slop : 5,
|
|
max_expansions: 50,
|
|
fields : [
|
|
'value.stream.metadata.author^3',
|
|
'value.stream.metadata.title^5',
|
|
'value.stream.metadata.description^2',
|
|
],
|
|
},
|
|
},
|
|
},
|
|
},
|
|
{ // Name
|
|
multi_match: {
|
|
query : query.s.trim(),
|
|
type : 'phrase_prefix',
|
|
slop : 5,
|
|
max_expansions: 50,
|
|
fields : [
|
|
'name^4',
|
|
],
|
|
},
|
|
},
|
|
],
|
|
},
|
|
};
|
|
};
|
|
|
|
function getFilters (input) {
|
|
var filters = [];
|
|
var bidStateFilter = {'bool': {'must_not': {'match': { 'bid_state': 'Accepted' }}}};
|
|
if (input.nsfw === 'true' || input.nsfw === 'false') {
|
|
const nsfwFilter = {'match': {'value.stream.metadata.nsfw': input.nsfw}};
|
|
filters.push(nsfwFilter);
|
|
}
|
|
if (input.contentType !== undefined) {
|
|
const contentTypes = input.contentType.split(',');
|
|
const contentFilter = {'terms': {'value.stream.source.contentType.keyword': contentTypes}};
|
|
filters.push(contentFilter);
|
|
}
|
|
if (input.mediaType !== undefined) {
|
|
const mediaTypes = input.mediaType.split(',');
|
|
const possibleTypes = ['audio', 'video', 'text', 'application', 'image'];
|
|
const shouldQueries = [];
|
|
for (var i = 0; i < mediaTypes.length; i++) {
|
|
if (possibleTypes.includes(mediaTypes[i])) {
|
|
const mediaFilter = {'prefix': {'value.stream.source.contentType.keyword': mediaTypes[i] + '/'}};
|
|
shouldQueries.push(mediaFilter);
|
|
} else if (mediaTypes[i] === 'cad') {
|
|
const cadTypes = ['SKP', 'simplify3d_stl'];
|
|
const cadFilter = {'terms': {'value.stream.source.contentType.keyword': cadTypes}};
|
|
shouldQueries.push(cadFilter);
|
|
}
|
|
}
|
|
if (shouldQueries.length === 0) {
|
|
const noneFilter = {'match_none': {}};
|
|
filters.push(noneFilter);
|
|
} else {
|
|
const mediaTypeFilter = {'bool': {'should': shouldQueries}};
|
|
filters.push(mediaTypeFilter);
|
|
}
|
|
}
|
|
if (input.claimType === 'channel' || input.claimType === 'file') {
|
|
var query = '';
|
|
if (input.claimType === 'channel') {
|
|
query = 'certificateType';
|
|
} else if (input.claimType === 'file') {
|
|
query = 'streamType';
|
|
}
|
|
const claimTypeFilter = {'match': {'value.claimType': query}};
|
|
filters.push(claimTypeFilter);
|
|
}
|
|
if (filters.length > 0) {
|
|
const filterQuery = [
|
|
{
|
|
'nested': {
|
|
'path' : 'value',
|
|
'query': {
|
|
'bool': {
|
|
'must': filters,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
bidStateFilter];
|
|
return filterQuery;
|
|
} else {
|
|
return [bidStateFilter];
|
|
}
|
|
}
|
|
|
|
function getAutoComplete (query) {
|
|
return eclient.search({
|
|
index : getIndex(query) || 'claims',
|
|
routing : getRoutingKey(query),
|
|
ignore_unavailable: true, // ignore error when date index does not exist
|
|
body : {
|
|
size : query.size || 10,
|
|
from : query.from || 0,
|
|
query: {
|
|
bool: {
|
|
must : getAutoCompleteQuery(query),
|
|
filter: getFilters(query),
|
|
},
|
|
},
|
|
},
|
|
size: query.size,
|
|
from: query.from,
|
|
});
|
|
}
|
|
|
|
function getStatus () {
|
|
return new Promise((resolve, reject) => {
|
|
rp(`http://localhost:9200/claims/_stats`)
|
|
.then(function (data) {
|
|
data = JSON.parse(data);
|
|
resolve({status: getStats(), spaceUsed: pretty(data._all.total.store.size_in_bytes, true), claimsInIndex: data._all.total.indexing.index_total, totSearches: data._all.total.search.query_total});
|
|
})
|
|
.catch(function (err) {
|
|
reject(err);
|
|
});
|
|
});
|
|
}
|
|
|
|
function getWashedQuery (query) {
|
|
// compress multiple white spaces to 1
|
|
query = query.toLowerCase().replace(/ +/g, ' ').replace('lbry://', '');
|
|
let splitBy = ['&', '$', ' '];
|
|
let regex = new RegExp(splitBy.join('|'), 'gi');
|
|
let badWords = [ 'from', 'with', 'not', 'can', 'all', 'are', 'for', 'but', 'and', 'the' ];
|
|
let words = query.split(regex);
|
|
let sentence = [];
|
|
words.forEach(w => {
|
|
if (!badWords.includes(w)) { sentence.push(w) }
|
|
});
|
|
query = sentence.join(' ');
|
|
|
|
// remove all words < 3 in length
|
|
return query.replace(/((\s|^)\b(\w{1,2})\b)/g, '').trim();
|
|
}
|
|
|
|
function getEscapedQuery (query) {
|
|
// https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#_reserved_characters
|
|
// The reserved characters are: + - = && || > < ! ( ) { } [ ] ^ " ~ * ? : \ /
|
|
let badCharacters = ['+', '-', '=', '&&', '||', '>', '<', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':', '\\', '/'];
|
|
let escapedQuery = '';
|
|
for (var i = 0; i < query.length; i++) {
|
|
let char1 = query.charAt(i);
|
|
if (badCharacters.includes(char1)) {
|
|
escapedQuery = escapedQuery + '\\' + char1;
|
|
} else if (i + 1 <= query.length) {
|
|
let char2 = query.charAt(i + 1);
|
|
if (badCharacters.includes(char1 + char2)) {
|
|
escapedQuery = escapedQuery + '\\' + char1 + char2;
|
|
i++;
|
|
} else {
|
|
escapedQuery = escapedQuery + char1;
|
|
}
|
|
} else {
|
|
escapedQuery = escapedQuery + char1;
|
|
}
|
|
}
|
|
return escapedQuery;
|
|
}
|
|
|
|
async function update () {
|
|
const shell = require('shelljs');
|
|
shell.exec('cd ~ && ./update.sh');
|
|
}
|
|
|
|
class LighthouseControllers {
|
|
/* eslint-disable no-param-reassign */
|
|
// Start syncing blocks...
|
|
startSync () {
|
|
winston.log('info', '[Importer] Started importer, indexing claims.');
|
|
claimSync();
|
|
// sync(); // Old Sync
|
|
}
|
|
/**
|
|
* Search API Endpoint.
|
|
* @param {ctx} Koa Context
|
|
*/
|
|
async search (ctx) {
|
|
await getResults(ctx.query).then(function (result) {
|
|
let results = result.hits.hits;
|
|
let cResults = [];
|
|
for (let pResult of results) {
|
|
cResults.push(pResult._source);
|
|
}
|
|
ctx.body = cResults;
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Autocomplete API Endpoint.
|
|
* @param {ctx} Koa Context
|
|
*/
|
|
async autoComplete (ctx) {
|
|
await getAutoComplete(ctx.query).then(function (result) {
|
|
let results = result.hits.hits;
|
|
let cResults = [];
|
|
for (let pResult of results) {
|
|
var name = pResult._source.name;
|
|
if (name.toString().indexOf(ctx.query.s.trim()) > -1 && name.toString().indexOf('http') === -1) {
|
|
cResults.push(name);
|
|
}
|
|
if (pResult._source.value &&
|
|
pResult._source.value.stream &&
|
|
pResult._source.value.stream !== undefined) {
|
|
var title = pResult._source.value.stream.metadata.title;
|
|
var author = pResult._source.value.stream.metadata.author;
|
|
if (title.indexOf(ctx.query.s.trim()) > -1 && title.indexOf('http') === -1) {
|
|
cResults.push(title);
|
|
}
|
|
if (author.indexOf(ctx.query.s.trim()) > -1 && author.indexOf('http') === -1) {
|
|
cResults.push(author);
|
|
}
|
|
}
|
|
}
|
|
|
|
var clean = [];
|
|
for (var i = 0; i < cResults.length; i++) {
|
|
if (cResults[i] && cResults[i].length > 3 && clean.indexOf(cResults[i]) === -1) {
|
|
clean.push(cResults[i]);
|
|
}
|
|
}
|
|
|
|
ctx.body = clean;
|
|
});
|
|
}
|
|
/**
|
|
* Info about the api here
|
|
* @param {ctx} Koa Context
|
|
*/
|
|
async info (ctx) {
|
|
ctx.redirect('https://github.com/lbryio/lighthouse');
|
|
}
|
|
|
|
/**
|
|
* Status of the api here
|
|
* @param {ctx} Koa Context
|
|
*/
|
|
async status (ctx) {
|
|
ctx.body = await getStatus();
|
|
}
|
|
|
|
/**
|
|
* AutoUpdate updates the application from the master branch.
|
|
* @param {ctx} Koa Context
|
|
*/
|
|
async autoUpdate (ctx) {
|
|
let travisSignature = Buffer.from(ctx.request.headers.signature, 'base64');
|
|
let payload = ctx.request.body.payload;
|
|
let travisResponse = await got('https://api.travis-ci.com/config', {timeout: 10000});
|
|
let travisPublicKey = JSON.parse(travisResponse.body).config.notifications.webhook.public_key;
|
|
let verifier = crypto.createVerify('sha1');
|
|
verifier.update(payload);
|
|
let status = verifier.verify(travisPublicKey, travisSignature);
|
|
if (status) {
|
|
let notification = JSON.parse(payload);
|
|
if (notification.branch === 'master') {
|
|
if (!notification.pull_request) {
|
|
logToSlack('Auto Updating Lighthouse - ' + notification.message);
|
|
update();
|
|
ctx.body = 'OK';
|
|
} else {
|
|
ctx.status = 400;
|
|
ctx.body = 'skip auto update: pull request'; logToSlack(ctx.body);
|
|
}
|
|
} else {
|
|
ctx.status = 400;
|
|
ctx.body = 'skip auto update: only deploys on master branch'; logToSlack(ctx.body);
|
|
}
|
|
} else {
|
|
ctx.status = 500;
|
|
ctx.body = 'skip auto update: could not verify webhook'; logToSlack(ctx.body);
|
|
}
|
|
}
|
|
|
|
/* eslint-enable no-param-reassign */
|
|
}
|
|
|
|
export default new LighthouseControllers();
|