From 5b2b27442398e2f7e8412f354187cdb8cce0c805 Mon Sep 17 00:00:00 2001 From: Mark Beamer Jr Date: Wed, 26 Sep 2018 18:41:28 -0400 Subject: [PATCH] added blacklist for words. don't search words less than 3 characters --- server/controllers/lighthouse.js | 43 +++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/server/controllers/lighthouse.js b/server/controllers/lighthouse.js index 186e6ab..e18ef34 100644 --- a/server/controllers/lighthouse.js +++ b/server/controllers/lighthouse.js @@ -34,6 +34,10 @@ function getResults (input) { if (input.from + input.size > 10000) { input.from = 10000 - input.size; } + let trimmedQuery = input.s.trim(); + let escapedQuery = getWashedQuery(getEscapedQuery(trimmedQuery)); + let washedQuery = getWashedQuery(trimmedQuery); + let effectiveFactor = '0.0000000001'; // Search is split up into different parts, all search parts goes under this line. let channelSearch; if (input.channel !== undefined) { // If we got a channel argument, lets filter out only that channel @@ -42,7 +46,7 @@ function getResults (input) { 'must': { 'query_string': { 'fields': ['channel'], - 'query' : getEscapedQuery(input.channel.trim()), + 'query' : getEscapedQuery(getWashedQuery(input.channel.trim())), }, }, }, @@ -60,7 +64,7 @@ function getResults (input) { 'function_score': { 'script_score': { 'script': { - 'source': "0.00000001 * doc['effective_amount'].value", + 'source': `${effectiveFactor} * doc['effective_amount'].value`, }, }, }, @@ -68,7 +72,7 @@ function getResults (input) { const matPhraseName = { // Match search text as phrase - Name 'match_phrase': { 'name': { - 'query': input.s.trim(), + 'query': washedQuery, 'boost': 10, }, }, @@ -76,14 +80,14 @@ function getResults (input) { const matTextName = { // Match search text - Name 'match': { 'name': { - 'query': input.s.trim(), + 'query': washedQuery, 'boost': 5, }, }, }; const conTermName = { // Contains search term - Name 'query_string': { - 'query' : '*' + getEscapedQuery(input.s.trim()) + '*', + 'query' : `*${escapedQuery}*`, 'fields': [ 'name', ], @@ -98,7 +102,7 @@ function getResults (input) { 'should': [ { // Contains search term in Author, Title, Description 'query_string': { - 'query' : '*' + getEscapedQuery(input.s.trim()) + '*', + 'query' : `*${escapedQuery}*`, 'fields': [ 'value.stream.metadata.author', 'value.stream.metadata.title', @@ -110,7 +114,7 @@ function getResults (input) { { // Match search term - Author 'match': { 'value.stream.metadata.author': { - 'query': input.s.trim(), + 'query': washedQuery, 'boost': 2, }, }, @@ -118,7 +122,7 @@ function getResults (input) { { // Match search text as phrase - Author 'match_phrase': { 'value.stream.metadata.author': { - 'query': input.s.trim(), + 'query': washedQuery, 'boost': 3, }, }, @@ -126,7 +130,7 @@ function getResults (input) { { // Match search term - Title 'match': { 'value.stream.metadata.title': { - 'query': input.s.trim(), + 'query': washedQuery, 'boost': 2, }, }, @@ -134,7 +138,7 @@ function getResults (input) { { // Match search text as phrase - Title 'match_phrase': { 'value.stream.metadata.title': { - 'query': input.s.trim(), + 'query': washedQuery, 'boost': 3, }, }, @@ -142,7 +146,7 @@ function getResults (input) { { // Match search term - Description 'match': { 'value.stream.metadata.description': { - 'query': input.s.trim(), + 'query': washedQuery, 'boost': 2, }, }, @@ -150,7 +154,7 @@ function getResults (input) { { // Match search text as phrase - Description 'match_phrase': { 'value.stream.metadata.description': { - 'query': input.s.trim(), + 'query': washedQuery, 'boost': 3, }, }, @@ -283,6 +287,21 @@ function getStatus () { }); } +function getWashedQuery (query) { + // compress multiple white spaces to 1 + query = query.toLowerCase().replace(/ +/g, ' '); + let badWords = [ 'from', 'with', 'not', 'can', 'all', 'are', 'for', 'but', 'and', 'the' ]; + let words = query.split(' '); + let sentence = []; + words.forEach(w => { + if (!badWords.includes(w)) { sentence.push(w) } + }); + query = sentence.join(' '); + + // remove all words < 3 in length + return query.replace(/(\b(\w{1,2})\b(\s|$))/g, ''); +} + function getEscapedQuery (query) { // https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#_reserved_characters // The reserved characters are: + - = && || > < ! ( ) { } [ ] ^ " ~ * ? : \ /