From 879869cb317ec8dcb4c847a5861af5ea9780a8e7 Mon Sep 17 00:00:00 2001 From: zeppi Date: Thu, 24 Mar 2022 19:33:32 -0400 Subject: [PATCH] fix shoulds in es --- scribe/elasticsearch/search.py | 47 ++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/scribe/elasticsearch/search.py b/scribe/elasticsearch/search.py index 6ba31ef..140a56e 100644 --- a/scribe/elasticsearch/search.py +++ b/scribe/elasticsearch/search.py @@ -396,18 +396,43 @@ def expand_query(**kwargs): if 'signature_valid' in kwargs: query['must'].append({"term": {"is_signature_valid": bool(kwargs["signature_valid"])}}) elif 'signature_valid' in kwargs: - query.setdefault('should', []) - query["minimum_should_match"] = 1 - query['should'].append({"bool": {"must_not": {"exists": {"field": "signature"}}}}) - query['should'].append({"term": {"is_signature_valid": bool(kwargs["signature_valid"])}}) + query['must'].append( + {"bool": + {"should": [ + {"bool": {"must_not": {"exists": {"field": "signature"}}}}, + {"bool" : {"must" : {"term": {"is_signature_valid": bool(kwargs["signature_valid"])}}}} + ]} + } + ) if 'has_source' in kwargs: - query.setdefault('should', []) - query["minimum_should_match"] = 1 - is_stream_or_repost = {"terms": {"claim_type": [CLAIM_TYPES['stream'], CLAIM_TYPES['repost']]}} - query['should'].append( - {"bool": {"must": [{"match": {"has_source": kwargs['has_source']}}, is_stream_or_repost]}}) - query['should'].append({"bool": {"must_not": [is_stream_or_repost]}}) - query['should'].append({"bool": {"must": [{"term": {"reposted_claim_type": CLAIM_TYPES['channel']}}]}}) + is_stream_or_repost_terms = {"terms": {"claim_type": [CLAIM_TYPES['stream'], CLAIM_TYPES['repost']]}} + query['must'].append( + {"bool": + {"should": [ + {"bool": # when is_stream_or_repost AND has_source + {"must": [ + {"match": {"has_source": kwargs['has_source']}}, + is_stream_or_repost_terms, + ] + }, + }, + {"bool": # when not is_stream_or_repost + {"must_not": is_stream_or_repost_terms} + }, + {"bool": # when reposted_claim_type wouldn't have source + {"must_not": + [ + {"term": {"reposted_claim_type": CLAIM_TYPES['stream']}} + ], + "must": + [ + {"term": {"claim_type": CLAIM_TYPES['repost']}} + ] + } + } + ]} + } + ) if kwargs.get('text'): query['must'].append( {"simple_query_string":