Merge pull request #118 from lbryio/test_effective

added blacklist for words.
This commit is contained in:
Mark 2018-09-26 21:39:01 -04:00 committed by GitHub
commit f437ec0725
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -34,6 +34,10 @@ function getResults (input) {
if (input.from + input.size > 10000) { if (input.from + input.size > 10000) {
input.from = 10000 - input.size; input.from = 10000 - input.size;
} }
let trimmedQuery = input.s.trim();
let escapedQuery = getWashedQuery(getEscapedQuery(trimmedQuery));
let washedQuery = getWashedQuery(trimmedQuery);
let effectiveFactor = '0.0000000001';
// Search is split up into different parts, all search parts goes under this line. // Search is split up into different parts, all search parts goes under this line.
let channelSearch; let channelSearch;
if (input.channel !== undefined) { // If we got a channel argument, lets filter out only that channel if (input.channel !== undefined) { // If we got a channel argument, lets filter out only that channel
@ -42,7 +46,7 @@ function getResults (input) {
'must': { 'must': {
'query_string': { 'query_string': {
'fields': ['channel'], 'fields': ['channel'],
'query' : getEscapedQuery(input.channel.trim()), 'query' : getEscapedQuery(getWashedQuery(input.channel.trim())),
}, },
}, },
}, },
@ -60,7 +64,7 @@ function getResults (input) {
'function_score': { 'function_score': {
'script_score': { 'script_score': {
'script': { 'script': {
'source': "0.00000001 * doc['effective_amount'].value", 'source': `${effectiveFactor} * doc['effective_amount'].value`,
}, },
}, },
}, },
@ -68,7 +72,7 @@ function getResults (input) {
const matPhraseName = { // Match search text as phrase - Name const matPhraseName = { // Match search text as phrase - Name
'match_phrase': { 'match_phrase': {
'name': { 'name': {
'query': input.s.trim(), 'query': washedQuery,
'boost': 10, 'boost': 10,
}, },
}, },
@ -76,14 +80,14 @@ function getResults (input) {
const matTextName = { // Match search text - Name const matTextName = { // Match search text - Name
'match': { 'match': {
'name': { 'name': {
'query': input.s.trim(), 'query': washedQuery,
'boost': 5, 'boost': 5,
}, },
}, },
}; };
const conTermName = { // Contains search term - Name const conTermName = { // Contains search term - Name
'query_string': { 'query_string': {
'query' : '*' + getEscapedQuery(input.s.trim()) + '*', 'query' : `*${escapedQuery}*`,
'fields': [ 'fields': [
'name', 'name',
], ],
@ -98,7 +102,7 @@ function getResults (input) {
'should': [ 'should': [
{ // Contains search term in Author, Title, Description { // Contains search term in Author, Title, Description
'query_string': { 'query_string': {
'query' : '*' + getEscapedQuery(input.s.trim()) + '*', 'query' : `*${escapedQuery}*`,
'fields': [ 'fields': [
'value.stream.metadata.author', 'value.stream.metadata.author',
'value.stream.metadata.title', 'value.stream.metadata.title',
@ -110,7 +114,7 @@ function getResults (input) {
{ // Match search term - Author { // Match search term - Author
'match': { 'match': {
'value.stream.metadata.author': { 'value.stream.metadata.author': {
'query': input.s.trim(), 'query': washedQuery,
'boost': 2, 'boost': 2,
}, },
}, },
@ -118,7 +122,7 @@ function getResults (input) {
{ // Match search text as phrase - Author { // Match search text as phrase - Author
'match_phrase': { 'match_phrase': {
'value.stream.metadata.author': { 'value.stream.metadata.author': {
'query': input.s.trim(), 'query': washedQuery,
'boost': 3, 'boost': 3,
}, },
}, },
@ -126,7 +130,7 @@ function getResults (input) {
{ // Match search term - Title { // Match search term - Title
'match': { 'match': {
'value.stream.metadata.title': { 'value.stream.metadata.title': {
'query': input.s.trim(), 'query': washedQuery,
'boost': 2, 'boost': 2,
}, },
}, },
@ -134,7 +138,7 @@ function getResults (input) {
{ // Match search text as phrase - Title { // Match search text as phrase - Title
'match_phrase': { 'match_phrase': {
'value.stream.metadata.title': { 'value.stream.metadata.title': {
'query': input.s.trim(), 'query': washedQuery,
'boost': 3, 'boost': 3,
}, },
}, },
@ -142,7 +146,7 @@ function getResults (input) {
{ // Match search term - Description { // Match search term - Description
'match': { 'match': {
'value.stream.metadata.description': { 'value.stream.metadata.description': {
'query': input.s.trim(), 'query': washedQuery,
'boost': 2, 'boost': 2,
}, },
}, },
@ -150,7 +154,7 @@ function getResults (input) {
{ // Match search text as phrase - Description { // Match search text as phrase - Description
'match_phrase': { 'match_phrase': {
'value.stream.metadata.description': { 'value.stream.metadata.description': {
'query': input.s.trim(), 'query': washedQuery,
'boost': 3, 'boost': 3,
}, },
}, },
@ -283,6 +287,21 @@ function getStatus () {
}); });
} }
function getWashedQuery (query) {
// compress multiple white spaces to 1
query = query.toLowerCase().replace(/ +/g, ' ');
let badWords = [ 'from', 'with', 'not', 'can', 'all', 'are', 'for', 'but', 'and', 'the' ];
let words = query.split(' ');
let sentence = [];
words.forEach(w => {
if (!badWords.includes(w)) { sentence.push(w) }
});
query = sentence.join(' ');
// remove all words < 3 in length
return query.replace(/(\b(\w{1,2})\b(\s|$))/g, '');
}
function getEscapedQuery (query) { function getEscapedQuery (query) {
// https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#_reserved_characters // https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#_reserved_characters
// The reserved characters are: + - = && || > < ! ( ) { } [ ] ^ " ~ * ? : \ / // The reserved characters are: + - = && || > < ! ( ) { } [ ] ^ " ~ * ? : \ /