use porter analyzer with weights on full text search

This commit is contained in:
Victor Shyba 2021-01-19 18:38:31 -03:00
parent dc10f8ce72
commit aa37faab0a
2 changed files with 8 additions and 7 deletions

View file

@ -29,7 +29,7 @@ class SearchIndex:
self.index, self.index,
{"settings": {"settings":
{"analysis": {"analysis":
{"analyzer": {"porter": {"tokenizer": "whitespace", "filter": ["lowercase", "porter_stem" ]}}} {"analyzer": {"default": {"tokenizer": "whitespace", "filter": ["lowercase", "porter_stem" ]}}}
} }
} }
) )
@ -274,10 +274,10 @@ def expand_query(**kwargs):
query['should'].append({"term": {"signature_valid": bool(kwargs["signature_valid"])}}) query['should'].append({"term": {"signature_valid": bool(kwargs["signature_valid"])}})
if 'text' in kwargs: if 'text' in kwargs:
return {"query": return {"query":
{"query_string": {"simple_query_string":
{"query": kwargs["text"], "fields": [ {"query": kwargs["text"], "fields": [
"claim_name", "channel_name", "title", "description", "author", "tags" "claim_name^4", "channel_name^8", "title^1", "description^.5", "author^1", "tags^.5"
], "analyzer": "porter"}}} ]}}}
query = { query = {
'query': {'bool': query}, 'query': {'bool': query},
"sort": [], "sort": [],

View file

@ -433,10 +433,11 @@ class ClaimSearchCommand(ClaimTestCase):
await self.assertFindsClaims([claim2], text='autobiography') await self.assertFindsClaims([claim2], text='autobiography')
await self.assertFindsClaims([claim3], text='history') await self.assertFindsClaims([claim3], text='history')
await self.assertFindsClaims([claim4], text='conspiracy') await self.assertFindsClaims([claim4], text='conspiracy')
await self.assertFindsClaims([], text='conspiracy AND history') await self.assertFindsClaims([], text='conspiracy+history')
await self.assertFindsClaims([claim4, claim3], text='conspiracy OR history') await self.assertFindsClaims([claim4, claim3], text='conspiracy|history')
await self.assertFindsClaims([claim1, claim4, claim2, claim3], text='documentary') await self.assertFindsClaims([claim1, claim4, claim2, claim3], text='documentary')
await self.assertFindsClaims([claim4, claim1, claim2, claim3], text='satoshi') # todo: check why claim1 and claim2 order changed. used to be ...claim1, claim2...
await self.assertFindsClaims([claim4, claim2, claim1, claim3], text='satoshi')
claim2 = await self.stream_update( claim2 = await self.stream_update(
self.get_claim_id(claim2), clear_tags=True, tags=['cloud'], self.get_claim_id(claim2), clear_tags=True, tags=['cloud'],