From ad533e3989cc48e0b0b19f4123a9b9ac00c4556d Mon Sep 17 00:00:00 2001 From: Mark Beamer Jr <markbeamerjr@gmail.com> Date: Sat, 15 Sep 2018 21:39:51 -0400 Subject: [PATCH 1/7] trimmed whitespace for search terms. --- server/controllers/lighthouse.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server/controllers/lighthouse.js b/server/controllers/lighthouse.js index e33fca7..09ff375 100644 --- a/server/controllers/lighthouse.js +++ b/server/controllers/lighthouse.js @@ -42,7 +42,7 @@ function getResults (input) { 'must': { 'query_string': { 'fields': ['channel'], - 'query' : getEscapedQuery(input.channel), + 'query' : getEscapedQuery(input.channel.trim()), }, }, }, @@ -83,7 +83,7 @@ function getResults (input) { }; const conTermName = { // Contains search term - Name 'query_string': { - 'query' : '*' + getEscapedQuery(input.s) + '*', + 'query' : '*' + getEscapedQuery(input.s.trim()) + '*', 'fields': [ 'name', ], @@ -98,7 +98,7 @@ function getResults (input) { 'should': [ { // Contains search term in Author, Title, Description 'query_string': { - 'query' : '*' + getEscapedQuery(input.s) + '*', + 'query' : '*' + getEscapedQuery(input.s.trim()) + '*', 'fields': [ 'value.stream.metadata.author', 'value.stream.metadata.title', -- 2.47.2 From 6960a0084e8a8e907e3e2f64d63b7063fea2641d Mon Sep 17 00:00:00 2001 From: Mark Beamer Jr <markbeamerjr@gmail.com> Date: Sat, 15 Sep 2018 22:12:43 -0400 Subject: [PATCH 2/7] I was missing some additional special characters that needed to be escaped. --- server/controllers/lighthouse.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/server/controllers/lighthouse.js b/server/controllers/lighthouse.js index e33fca7..24906c8 100644 --- a/server/controllers/lighthouse.js +++ b/server/controllers/lighthouse.js @@ -263,7 +263,9 @@ function getStatus () { } function getEscapedQuery (query) { - let badCharacters = ['+', '-', '&&', '||', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':', '\\']; + // https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#_reserved_characters + // The reserved characters are: + - = && || > < ! ( ) { } [ ] ^ " ~ * ? : \ / + let badCharacters = ['+', '-', '=', '&&', '||', '>', '<', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':', '\\', '/']; let escapedQuery = ''; for (var i = 0; i < query.length; i++) { let char1 = query.charAt(i); -- 2.47.2 From 56545cfed22347bcbdc3cf795986a6385bebb799 Mon Sep 17 00:00:00 2001 From: Mark Beamer Jr <markbeamerjr@gmail.com> Date: Sun, 16 Sep 2018 00:53:24 -0400 Subject: [PATCH 3/7] Fixed the auto-complete query to leverage the nested structure and to return only relevant claims and the text that triggered the auto-complete. --- server/controllers/lighthouse.js | 60 ++++++++++++++++++++++++-------- 1 file changed, 45 insertions(+), 15 deletions(-) diff --git a/server/controllers/lighthouse.js b/server/controllers/lighthouse.js index e33fca7..6169a05 100644 --- a/server/controllers/lighthouse.js +++ b/server/controllers/lighthouse.js @@ -208,20 +208,41 @@ function getRoutingKey () { function getAutoCompleteQuery (query) { return { - multi_match: { - query : query.s.trim(), - type : 'phrase_prefix', - slop : 5, - max_expansions: 50, - fields : [ - 'name', - 'value.stream.metadata.author', - 'value.stream.metadata.title', - 'value.stream.metadata.description', + bool: { + should: [ + { // Author, Title, Description + nested: { + path : 'value', + query: { + multi_match: { + query : query.s.trim(), + type : 'phrase_prefix', + slop : 5, + max_expansions: 50, + fields : [ + 'value.stream.metadata.author^3', + 'value.stream.metadata.title^5', + 'value.stream.metadata.description^2', + ], + }, + }, + }, + }, + { // Name + multi_match: { + query : query.s.trim(), + type : 'phrase_prefix', + slop : 5, + max_expansions: 50, + fields : [ + 'name^4', + ], + }, + }, ], }, }; -} +}; function getFilter (query) { // this is the best place for putting things like filtering on the type of content @@ -235,7 +256,7 @@ function getAutoComplete (query) { routing : getRoutingKey(query), ignore_unavailable: true, // ignore error when date index does not exist body : { - size : query.size || 5, + size : query.size || 10, from : query.from || 0, query: { bool: { @@ -316,10 +337,19 @@ class LighthouseControllers { let results = result.hits.hits; let cResults = []; for (let pResult of results) { - cResults.push(pResult._source.name); + var name = pResult._source.name; + if (name.indexOf(ctx.query.s.trim()) > -1 && name.indexOf('http') === -1) { + cResults.push(name); + } if (pResult._source.value && pResult._source.value.stream !== undefined) { - cResults.push(pResult._source.value.stream.metadata.title); - cResults.push(pResult._source.value.stream.metadata.author); + var title = pResult._source.value.stream.metadata.title; + var author = pResult._source.value.stream.metadata.author; + if (title.indexOf(ctx.query.s.trim()) > -1 && title.indexOf('http') === -1) { + cResults.push(title); + } + if (author.indexOf(ctx.query.s.trim()) > -1 && author.indexOf('http') === -1) { + cResults.push(author); + } } } -- 2.47.2 From 5b2b27442398e2f7e8412f354187cdb8cce0c805 Mon Sep 17 00:00:00 2001 From: Mark Beamer Jr <markbeamerjr@gmail.com> Date: Wed, 26 Sep 2018 18:41:28 -0400 Subject: [PATCH 4/7] added blacklist for words. don't search words less than 3 characters --- server/controllers/lighthouse.js | 43 +++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/server/controllers/lighthouse.js b/server/controllers/lighthouse.js index 186e6ab..e18ef34 100644 --- a/server/controllers/lighthouse.js +++ b/server/controllers/lighthouse.js @@ -34,6 +34,10 @@ function getResults (input) { if (input.from + input.size > 10000) { input.from = 10000 - input.size; } + let trimmedQuery = input.s.trim(); + let escapedQuery = getWashedQuery(getEscapedQuery(trimmedQuery)); + let washedQuery = getWashedQuery(trimmedQuery); + let effectiveFactor = '0.0000000001'; // Search is split up into different parts, all search parts goes under this line. let channelSearch; if (input.channel !== undefined) { // If we got a channel argument, lets filter out only that channel @@ -42,7 +46,7 @@ function getResults (input) { 'must': { 'query_string': { 'fields': ['channel'], - 'query' : getEscapedQuery(input.channel.trim()), + 'query' : getEscapedQuery(getWashedQuery(input.channel.trim())), }, }, }, @@ -60,7 +64,7 @@ function getResults (input) { 'function_score': { 'script_score': { 'script': { - 'source': "0.00000001 * doc['effective_amount'].value", + 'source': `${effectiveFactor} * doc['effective_amount'].value`, }, }, }, @@ -68,7 +72,7 @@ function getResults (input) { const matPhraseName = { // Match search text as phrase - Name 'match_phrase': { 'name': { - 'query': input.s.trim(), + 'query': washedQuery, 'boost': 10, }, }, @@ -76,14 +80,14 @@ function getResults (input) { const matTextName = { // Match search text - Name 'match': { 'name': { - 'query': input.s.trim(), + 'query': washedQuery, 'boost': 5, }, }, }; const conTermName = { // Contains search term - Name 'query_string': { - 'query' : '*' + getEscapedQuery(input.s.trim()) + '*', + 'query' : `*${escapedQuery}*`, 'fields': [ 'name', ], @@ -98,7 +102,7 @@ function getResults (input) { 'should': [ { // Contains search term in Author, Title, Description 'query_string': { - 'query' : '*' + getEscapedQuery(input.s.trim()) + '*', + 'query' : `*${escapedQuery}*`, 'fields': [ 'value.stream.metadata.author', 'value.stream.metadata.title', @@ -110,7 +114,7 @@ function getResults (input) { { // Match search term - Author 'match': { 'value.stream.metadata.author': { - 'query': input.s.trim(), + 'query': washedQuery, 'boost': 2, }, }, @@ -118,7 +122,7 @@ function getResults (input) { { // Match search text as phrase - Author 'match_phrase': { 'value.stream.metadata.author': { - 'query': input.s.trim(), + 'query': washedQuery, 'boost': 3, }, }, @@ -126,7 +130,7 @@ function getResults (input) { { // Match search term - Title 'match': { 'value.stream.metadata.title': { - 'query': input.s.trim(), + 'query': washedQuery, 'boost': 2, }, }, @@ -134,7 +138,7 @@ function getResults (input) { { // Match search text as phrase - Title 'match_phrase': { 'value.stream.metadata.title': { - 'query': input.s.trim(), + 'query': washedQuery, 'boost': 3, }, }, @@ -142,7 +146,7 @@ function getResults (input) { { // Match search term - Description 'match': { 'value.stream.metadata.description': { - 'query': input.s.trim(), + 'query': washedQuery, 'boost': 2, }, }, @@ -150,7 +154,7 @@ function getResults (input) { { // Match search text as phrase - Description 'match_phrase': { 'value.stream.metadata.description': { - 'query': input.s.trim(), + 'query': washedQuery, 'boost': 3, }, }, @@ -283,6 +287,21 @@ function getStatus () { }); } +function getWashedQuery (query) { + // compress multiple white spaces to 1 + query = query.toLowerCase().replace(/ +/g, ' '); + let badWords = [ 'from', 'with', 'not', 'can', 'all', 'are', 'for', 'but', 'and', 'the' ]; + let words = query.split(' '); + let sentence = []; + words.forEach(w => { + if (!badWords.includes(w)) { sentence.push(w) } + }); + query = sentence.join(' '); + + // remove all words < 3 in length + return query.replace(/(\b(\w{1,2})\b(\s|$))/g, ''); +} + function getEscapedQuery (query) { // https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#_reserved_characters // The reserved characters are: + - = && || > < ! ( ) { } [ ] ^ " ~ * ? : \ / -- 2.47.2 From 44ea0093ef219f3675058b6aa524678532a9073d Mon Sep 17 00:00:00 2001 From: marcdeb1 <marcdebrouchev@laposte.net> Date: Wed, 17 Oct 2018 11:42:25 +0200 Subject: [PATCH 5/7] Added NSFW filter to query --- server/controllers/lighthouse.js | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/server/controllers/lighthouse.js b/server/controllers/lighthouse.js index e33fca7..c2ebc77 100644 --- a/server/controllers/lighthouse.js +++ b/server/controllers/lighthouse.js @@ -184,6 +184,7 @@ function getResults (input) { }, }, ], + 'filter': getFilters(input) }, }, size: input.size, @@ -223,10 +224,30 @@ function getAutoCompleteQuery (query) { }; } -function getFilter (query) { - // this is the best place for putting things like filtering on the type of content - // Perhaps we can add search param that will filter on how people have categorized / tagged their content - +function getFilters (input) { + // this is the best place for putting things like filtering on the type of content + // Perhaps we can add search param that will filter on how people have categorized / tagged their content + var filters = []; + if(input.nsfw === "true" || input.nsfw === "false") { + const nsfwFilter = {"match": {"value.stream.metadata.nsfw": input.nsfw}} + filters.push(nsfwFilter); + } + if(filters.length > 0) { + const filterQuery = { + "nested": { + "path": "value", + "query": { + "bool": { + "must": filters + } + } + } + }; + return filterQuery; + } + else { + return []; + } } function getAutoComplete (query) { -- 2.47.2 From b4052fc921e38c7a6f23f1ab95c14e49573f1f8a Mon Sep 17 00:00:00 2001 From: marcdeb1 <marcdebrouchev@laposte.net> Date: Thu, 18 Oct 2018 08:36:04 +0200 Subject: [PATCH 6/7] Syntax and indentationn --- server/controllers/lighthouse.js | 48 ++++++++++++++++---------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/server/controllers/lighthouse.js b/server/controllers/lighthouse.js index c2ebc77..fcf947c 100644 --- a/server/controllers/lighthouse.js +++ b/server/controllers/lighthouse.js @@ -184,7 +184,7 @@ function getResults (input) { }, }, ], - 'filter': getFilters(input) + 'filter': getFilters(input), }, }, size: input.size, @@ -225,29 +225,29 @@ function getAutoCompleteQuery (query) { } function getFilters (input) { - // this is the best place for putting things like filtering on the type of content - // Perhaps we can add search param that will filter on how people have categorized / tagged their content - var filters = []; - if(input.nsfw === "true" || input.nsfw === "false") { - const nsfwFilter = {"match": {"value.stream.metadata.nsfw": input.nsfw}} - filters.push(nsfwFilter); - } - if(filters.length > 0) { - const filterQuery = { - "nested": { - "path": "value", - "query": { - "bool": { - "must": filters - } - } - } - }; - return filterQuery; - } - else { - return []; - } + // this is the best place for putting things like filtering on the type of content + // Perhaps we can add search param that will filter on how people have categorized / tagged their content + var filters = []; + if (input.nsfw === 'true' || input.nsfw === 'false') { + const nsfwFilter = {'match': {'value.stream.metadata.nsfw': input.nsfw}}; + filters.push(nsfwFilter); + } + if (filters.length > 0) { + const filterQuery = { + 'nested': { + 'path': 'value', + 'query': { + 'bool': { + 'must': filters, + } + } + } + }; + return filterQuery; + } + else { + return []; + } } function getAutoComplete (query) { -- 2.47.2 From a409396f69e4265008531d0beeed4d361ad15820 Mon Sep 17 00:00:00 2001 From: Mark Beamer Jr <markbeamerjr@gmail.com> Date: Sun, 21 Oct 2018 00:06:39 -0400 Subject: [PATCH 7/7] fixed some lint issues and function name --- server/controllers/lighthouse.js | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/server/controllers/lighthouse.js b/server/controllers/lighthouse.js index fcf947c..2e9c818 100644 --- a/server/controllers/lighthouse.js +++ b/server/controllers/lighthouse.js @@ -235,17 +235,16 @@ function getFilters (input) { if (filters.length > 0) { const filterQuery = { 'nested': { - 'path': 'value', + 'path' : 'value', 'query': { 'bool': { 'must': filters, - } - } - } + }, + }, + }, }; return filterQuery; - } - else { + } else { return []; } } @@ -261,7 +260,7 @@ function getAutoComplete (query) { query: { bool: { must : getAutoCompleteQuery(query), - filter: getFilter(query), + filter: getFilters(query), }, }, }, -- 2.47.2