Skip to content

Commit

Permalink
fix(search): Filter out "removed" entities from autocomplete and anal…
Browse files Browse the repository at this point in the history
…ytics (#2781)
  • Loading branch information
Dexter Lee authored Jun 30, 2021
1 parent 69d9cf4 commit 6fee59e
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 23 deletions.
20 changes: 10 additions & 10 deletions datahub-frontend/app/react/analytics/AnalyticsService.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package react.analytics;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.linkedin.metadata.dao.exception.ESQueryException;
import graphql.BarSegment;
import graphql.DateInterval;
Expand Down Expand Up @@ -64,13 +65,12 @@ public AnalyticsService(final RestHighLevelClient elasticClient) {
public List<NamedLine> getTimeseriesChart(String indexName, DateRange dateRange, DateInterval granularity,
Optional<String> dimension, // Length 1 for now
Map<String, List<String>> filters, Optional<String> uniqueOn) {

_logger.debug(
String.format("Invoked getTimeseriesChart with indexName: %s, dateRange: %s, granularity: %s, dimension: %s,",
indexName, dateRange, granularity, dimension)
+ String.format("filters: %s, uniqueOn: %s", filters, uniqueOn));

AggregationBuilder filteredAgg = getFilteredAggregation(filters, Optional.of(dateRange));
AggregationBuilder filteredAgg = getFilteredAggregation(filters, ImmutableMap.of(), Optional.of(dateRange));

AggregationBuilder dateHistogram = AggregationBuilders.dateHistogram(DATE_HISTOGRAM)
.field("timestamp")
Expand Down Expand Up @@ -123,8 +123,7 @@ public List<NamedBar> getBarChart(String indexName, Optional<DateRange> dateRang
+ String.format("filters: %s, uniqueOn: %s", filters, uniqueOn));

assert (dimensions.size() == 1 || dimensions.size() == 2);

AggregationBuilder filteredAgg = getFilteredAggregation(filters, dateRange);
AggregationBuilder filteredAgg = getFilteredAggregation(filters, ImmutableMap.of(), dateRange);

AggregationBuilder termAgg = AggregationBuilders.terms(DIMENSION).field(dimensions.get(0)).missing(NA);
if (dimensions.size() == 2) {
Expand Down Expand Up @@ -170,13 +169,12 @@ private List<BarSegment> extractBarSegmentsFromAggregations(Aggregations aggrega

public List<Row> getTopNTableChart(String indexName, Optional<DateRange> dateRange, String groupBy,
Map<String, List<String>> filters, Optional<String> uniqueOn, int maxRows) {

_logger.debug(
String.format("Invoked getTopNTableChart with indexName: %s, dateRange: %s, groupBy: %s",
indexName, dateRange, groupBy)
+ String.format("filters: %s, uniqueOn: %s", filters, uniqueOn));

AggregationBuilder filteredAgg = getFilteredAggregation(filters, dateRange);
AggregationBuilder filteredAgg = getFilteredAggregation(filters, ImmutableMap.of(), dateRange);

TermsAggregationBuilder termAgg = AggregationBuilders.terms(DIMENSION).field(groupBy).size(maxRows);
if (uniqueOn.isPresent()) {
Expand All @@ -201,8 +199,8 @@ public List<Row> getTopNTableChart(String indexName, Optional<DateRange> dateRan
}

public int getHighlights(String indexName, Optional<DateRange> dateRange, Map<String, List<String>> filters,
Optional<String> uniqueOn) {
AggregationBuilder filteredAgg = getFilteredAggregation(filters, dateRange);
Map<String, List<String>> mustNotFilters, Optional<String> uniqueOn) {
AggregationBuilder filteredAgg = getFilteredAggregation(filters, mustNotFilters, dateRange);
uniqueOn.ifPresent(s -> filteredAgg.subAggregation(getUniqueQuery(s)));

SearchRequest searchRequest = constructSearchRequest(indexName, filteredAgg);
Expand Down Expand Up @@ -239,9 +237,11 @@ private Filter executeAndExtract(SearchRequest searchRequest) {
}
}

private AggregationBuilder getFilteredAggregation(Map<String, List<String>> filters, Optional<DateRange> dateRange) {
private AggregationBuilder getFilteredAggregation(Map<String, List<String>> mustFilters,
Map<String, List<String>> mustNotFilters, Optional<DateRange> dateRange) {
BoolQueryBuilder filteredQuery = QueryBuilders.boolQuery();
filters.forEach((key, values) -> filteredQuery.must(QueryBuilders.termsQuery(key, values)));
mustFilters.forEach((key, values) -> filteredQuery.must(QueryBuilders.termsQuery(key, values)));
mustNotFilters.forEach((key, values) -> filteredQuery.mustNot(QueryBuilders.termsQuery(key, values)));
dateRange.ifPresent(range -> filteredQuery.must(dateRangeQuery(range)));
return AggregationBuilders.filter(FILTERED, filteredQuery);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,11 @@ private List<Highlight> getHighlights() {

int weeklyActiveUsers =
_analyticsService.getHighlights(AnalyticsService.DATAHUB_USAGE_EVENT_INDEX, Optional.of(dateRange),
ImmutableMap.of(), Optional.of("browserId"));
ImmutableMap.of(), ImmutableMap.of(), Optional.of("browserId"));

int weeklyActiveUsersLastWeek =
_analyticsService.getHighlights(AnalyticsService.DATAHUB_USAGE_EVENT_INDEX, Optional.of(dateRangeLastWeek),
ImmutableMap.of(), Optional.of("browserId"));
ImmutableMap.of(), ImmutableMap.of(), Optional.of("browserId"));

String bodyText = "";
if (weeklyActiveUsersLastWeek > 0) {
Expand All @@ -79,10 +79,11 @@ private List<Highlight> getHighlights() {
}

private Highlight getEntityMetadataStats(String title, String index) {
int numEntities = _analyticsService.getHighlights(index, Optional.empty(), ImmutableMap.of(), Optional.empty());
int numEntities = _analyticsService.getHighlights(index, Optional.empty(), ImmutableMap.of(),
ImmutableMap.of("removed", ImmutableList.of("true")), Optional.empty());
int numEntitiesWithOwners =
_analyticsService.getHighlights(index, Optional.empty(), ImmutableMap.of("hasOwners", ImmutableList.of("true")),
Optional.empty());
ImmutableMap.of("removed", ImmutableList.of("true")), Optional.empty());
String bodyText = "";
if (numEntities > 0) {
double percentChange = 100.0 * numEntitiesWithOwners / numEntities;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import lombok.extern.slf4j.Slf4j;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
Expand Down Expand Up @@ -65,13 +66,17 @@ public SearchRequest getSearchRequest(@Nonnull String input, @Nullable String fi
}

private QueryBuilder getQuery(@Nonnull String query, @Nullable String field) {
BoolQueryBuilder finalQuery = QueryBuilders.boolQuery();
// Search for exact matches with higher boost and ngram matches
List<String> fieldNames = getAutocompleteFields(field).stream()
.flatMap(fieldName -> Stream.of(fieldName, fieldName + ".ngram"))
.collect(Collectors.toList());
MultiMatchQueryBuilder queryBuilder = QueryBuilders.multiMatchQuery(query, fieldNames.toArray(new String[0]));
queryBuilder.analyzer(ANALYZER);
return queryBuilder;
MultiMatchQueryBuilder autocompleteQueryBuilder =
QueryBuilders.multiMatchQuery(query, fieldNames.toArray(new String[0]));
autocompleteQueryBuilder.analyzer(ANALYZER);
finalQuery.should(autocompleteQueryBuilder);
finalQuery.mustNot(QueryBuilders.matchQuery("removed", true));
return finalQuery;
}

// Get HighlightBuilder to highlight the matched field
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import java.util.List;
import java.util.Map;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
Expand All @@ -22,11 +24,17 @@ public void testDefaultAutocompleteRequest() {
SearchRequest autocompleteRequest = handler.getSearchRequest("input", null, null, 10);
SearchSourceBuilder sourceBuilder = autocompleteRequest.source();
assertEquals(sourceBuilder.size(), 10);
MultiMatchQueryBuilder queryBuilder = (MultiMatchQueryBuilder) sourceBuilder.query();
Map<String, Float> queryFields = queryBuilder.fields();
BoolQueryBuilder query = (BoolQueryBuilder) sourceBuilder.query();
assertEquals(query.should().size(), 1);
MultiMatchQueryBuilder matchQuery = (MultiMatchQueryBuilder) query.should().get(0);
Map<String, Float> queryFields = matchQuery.fields();
assertTrue(queryFields.containsKey("keyPart1"));
assertTrue(queryFields.containsKey("keyPart1.ngram"));
assertEquals(queryBuilder.analyzer(), "word_delimited");
assertEquals(matchQuery.analyzer(), "word_delimited");
assertEquals(query.mustNot().size(), 1);
MatchQueryBuilder removedFilter = (MatchQueryBuilder) query.mustNot().get(0);
assertEquals(removedFilter.fieldName(), "removed");
assertEquals(removedFilter.value(), true);
HighlightBuilder highlightBuilder = sourceBuilder.highlighter();
List<HighlightBuilder.Field> highlightedFields = highlightBuilder.fields();
assertEquals(highlightedFields.size(), 2);
Expand All @@ -40,11 +48,16 @@ public void testAutocompleteRequestWithField() {
SearchRequest autocompleteRequest = handler.getSearchRequest("input", "field", null, 10);
SearchSourceBuilder sourceBuilder = autocompleteRequest.source();
assertEquals(sourceBuilder.size(), 10);
MultiMatchQueryBuilder queryBuilder = (MultiMatchQueryBuilder) sourceBuilder.query();
Map<String, Float> queryFields = queryBuilder.fields();
BoolQueryBuilder query = (BoolQueryBuilder) sourceBuilder.query();
assertEquals(query.should().size(), 1);
MultiMatchQueryBuilder matchQuery = (MultiMatchQueryBuilder) query.should().get(0);
Map<String, Float> queryFields = matchQuery.fields();
assertTrue(queryFields.containsKey("field"));
assertTrue(queryFields.containsKey("field.ngram"));
assertEquals(queryBuilder.analyzer(), "word_delimited");
assertEquals(matchQuery.analyzer(), "word_delimited");
MatchQueryBuilder removedFilter = (MatchQueryBuilder) query.mustNot().get(0);
assertEquals(removedFilter.fieldName(), "removed");
assertEquals(removedFilter.value(), true);
HighlightBuilder highlightBuilder = sourceBuilder.highlighter();
List<HighlightBuilder.Field> highlightedFields = highlightBuilder.fields();
assertEquals(highlightedFields.size(), 2);
Expand Down

0 comments on commit 6fee59e

Please sign in to comment.