Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(filters) Fix issues with structured properties filters #11946

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ public static String toElasticsearchFieldName(
/**
* Return an elasticsearch type from structured property type
*
* @param fieldName filter or facet field name
* @param fieldName filter or facet field name - must match actual FQN of structured prop
* @param aspectRetriever aspect retriever
* @return elasticsearch type
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ private void addCriteriaFiltersToAggregationMetadata(
}
}

private void addCriterionFiltersToAggregationMetadata(
public void addCriterionFiltersToAggregationMetadata(
@Nonnull final Criterion criterion,
@Nonnull final List<AggregationMetadata> aggregationMetadata,
@Nullable AspectRetriever aspectRetriever) {
Expand Down Expand Up @@ -422,17 +422,32 @@ private void addCriterionFiltersToAggregationMetadata(
value ->
addMissingAggregationValueToAggregationMetadata(value, originalAggMetadata));
}
} else if (aggregationMetadataMap.containsKey(criterion.getField())) {
/*
* If we already have aggregations for the facet field (original field name), simply inject any missing values counts into the set.
* If there are no results for a particular facet value, it will NOT be in the original aggregation set returned by
* Elasticsearch.
*/
AggregationMetadata originalAggMetadata = aggregationMetadataMap.get(criterion.getField());
criterion
.getValues()
.forEach(
value -> addMissingAggregationValueToAggregationMetadata(value, originalAggMetadata));
} else {
/*
* If we do not have ANY aggregation for the facet field, then inject a new aggregation metadata object for the
* facet field.
* If there are no results for a particular facet, it will NOT be in the original aggregation set returned by
* Elasticsearch.
*/
// Simply replace suffix from original field when there are no aggregations for it. Prevents
// bug where ES mappings for field are different from how we map the field back to UI
// (ie. Structured Properties with dots in them)
String facetField = ESUtils.replaceSuffix(criterion.getField());
aggregationMetadata.add(
buildAggregationMetadata(
finalFacetField,
getFacetToDisplayNames().getOrDefault(finalFacetField, finalFacetField),
facetField,
getFacetToDisplayNames().getOrDefault(facetField, facetField),
new LongMap(
criterion.getValues().stream().collect(Collectors.toMap(i -> i, i -> 0L))),
new FilterValueArray(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -448,9 +448,20 @@ public static String toParentField(
urnDefinition.getFirst(), urnDefinition.getSecond()))
.orElse(filterField);

return replaceSuffix(fieldName);
}

/**
* Strip subfields from filter field
*
* @param fieldName name of the field
* @return normalized field name without subfields
*/
@Nonnull
public static String replaceSuffix(@Nonnull final String fieldName) {
for (String subfield : SUBFIELDS) {
String SUFFIX = "." + subfield;
if (filterField.endsWith(SUFFIX)) {
if (fieldName.endsWith(SUFFIX)) {
return fieldName.replace(SUFFIX, "");
}
}
Expand Down Expand Up @@ -710,7 +721,8 @@ private static QueryBuilder buildEqualsConditionFromCriterionWithValues(
final Map<String, Set<SearchableAnnotation.FieldType>> searchableFieldTypes,
@Nonnull AspectRetriever aspectRetriever,
boolean enableCaseInsensitiveSearch) {
Set<String> fieldTypes = getFieldTypes(searchableFieldTypes, fieldName, aspectRetriever);
Set<String> fieldTypes =
getFieldTypes(searchableFieldTypes, fieldName, criterion, aspectRetriever);
if (fieldTypes.size() > 1) {
log.warn(
"Multiple field types for field name {}, determining best fit for set: {}",
Expand Down Expand Up @@ -753,12 +765,16 @@ private static QueryBuilder buildEqualsConditionFromCriterionWithValues(
private static Set<String> getFieldTypes(
Map<String, Set<SearchableAnnotation.FieldType>> searchableFields,
String fieldName,
@Nonnull final Criterion criterion,
@Nullable AspectRetriever aspectRetriever) {

final Set<String> finalFieldTypes;
if (fieldName.startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX)) {
// use criterion field here for structured props since fieldName has dots replaced with
// underscores
finalFieldTypes =
StructuredPropertyUtils.toElasticsearchFieldType(fieldName, aspectRetriever);
StructuredPropertyUtils.toElasticsearchFieldType(
replaceSuffix(criterion.getField()), aspectRetriever);
} else {
Set<SearchableAnnotation.FieldType> fieldTypes =
searchableFields.getOrDefault(fieldName.split("\\.")[0], Collections.emptySet());
Expand All @@ -782,7 +798,8 @@ private static RangeQueryBuilder buildRangeQueryFromCriterion(
Condition condition,
boolean isTimeseries,
AspectRetriever aspectRetriever) {
Set<String> fieldTypes = getFieldTypes(searchableFieldTypes, fieldName, aspectRetriever);
Set<String> fieldTypes =
getFieldTypes(searchableFieldTypes, fieldName, criterion, aspectRetriever);

// Determine criterion value, range query only accepts single value so take first value in
// values if multiple
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import static com.linkedin.metadata.Constants.DATA_TYPE_URN_PREFIX;
import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME;
import static com.linkedin.metadata.utils.SearchUtil.*;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anySet;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.mock;
Expand All @@ -12,23 +13,36 @@
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.linkedin.common.urn.Urn;
import com.linkedin.common.urn.UrnUtils;
import com.linkedin.data.DataMap;
import com.linkedin.data.template.LongMap;
import com.linkedin.data.template.SetMode;
import com.linkedin.data.template.StringArray;
import com.linkedin.entity.Aspect;
import com.linkedin.metadata.aspect.AspectRetriever;
import com.linkedin.metadata.config.search.SearchConfiguration;
import com.linkedin.metadata.models.EntitySpec;
import com.linkedin.metadata.models.annotation.SearchableAnnotation;
import com.linkedin.metadata.query.filter.Condition;
import com.linkedin.metadata.query.filter.Criterion;
import com.linkedin.metadata.search.AggregationMetadata;
import com.linkedin.metadata.search.FilterValue;
import com.linkedin.metadata.search.FilterValueArray;
import com.linkedin.metadata.search.elasticsearch.query.request.AggregationQueryBuilder;
import com.linkedin.r2.RemoteInvocationException;
import com.linkedin.structured.StructuredPropertyDefinition;
import io.datahubproject.test.metadata.context.TestOperationContexts;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import org.mockito.Mockito;
import org.opensearch.search.aggregations.AggregationBuilder;
import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
import org.testng.Assert;
Expand Down Expand Up @@ -598,4 +612,94 @@ public void testMissingAggregation() {
.equals(
MISSING_SPECIAL_TYPE + AGGREGATION_SPECIAL_TYPE_DELIMITER + "test")));
}

@Test
public void testAddFiltersToMetadataWithStructuredPropsNoResults() {
final Urn propertyUrn = UrnUtils.getUrn("urn:li:structuredProperty:test_me.one");

SearchConfiguration config = new SearchConfiguration();
config.setMaxTermBucketSize(25);

AggregationQueryBuilder builder =
new AggregationQueryBuilder(
config, ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of()));

Criterion criterion =
new Criterion()
.setField("structuredProperties.test_me.one")
.setValues(new StringArray("test123"))
.setCondition(Condition.EQUAL);

AspectRetriever mockAspectRetriever = getMockAspectRetriever(propertyUrn);

final List<AggregationMetadata> aggregationMetadataList = new ArrayList<>();
builder.addCriterionFiltersToAggregationMetadata(
criterion, aggregationMetadataList, mockAspectRetriever);

// ensure we add the correct structured prop aggregation here
Assert.assertEquals(aggregationMetadataList.size(), 1);
// Assert.assertEquals(aggregationMetadataList.get(0).getEntity(), propertyUrn);
Assert.assertEquals(
aggregationMetadataList.get(0).getName(), "structuredProperties.test_me.one");
Assert.assertEquals(aggregationMetadataList.get(0).getAggregations().size(), 1);
Assert.assertEquals(aggregationMetadataList.get(0).getAggregations().get("test123"), 0);
}

@Test
public void testAddFiltersToMetadataWithStructuredPropsWithAggregations() {
final Urn propertyUrn = UrnUtils.getUrn("urn:li:structuredProperty:test_me.one");

final AggregationMetadata aggregationMetadata = new AggregationMetadata();
aggregationMetadata.setName("structuredProperties.test_me.one");
FilterValue filterValue =
new FilterValue().setValue("test123").setFiltered(false).setFacetCount(1);
aggregationMetadata.setFilterValues(new FilterValueArray(filterValue));
LongMap aggregations = new LongMap();
aggregations.put("test123", 1L);
aggregationMetadata.setAggregations(aggregations);

SearchConfiguration config = new SearchConfiguration();
config.setMaxTermBucketSize(25);

AggregationQueryBuilder builder =
new AggregationQueryBuilder(
config, ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of()));

Criterion criterion =
new Criterion()
.setField("structuredProperties.test_me.one")
.setValues(new StringArray("test123"))
.setCondition(Condition.EQUAL);

AspectRetriever mockAspectRetriever = getMockAspectRetriever(propertyUrn);

final List<AggregationMetadata> aggregationMetadataList = new ArrayList<>();
aggregationMetadataList.add(aggregationMetadata);
builder.addCriterionFiltersToAggregationMetadata(
criterion, aggregationMetadataList, mockAspectRetriever);

Assert.assertEquals(aggregationMetadataList.size(), 1);
Assert.assertEquals(
aggregationMetadataList.get(0).getName(), "structuredProperties.test_me.one");
Assert.assertEquals(aggregationMetadataList.get(0).getAggregations().size(), 1);
Assert.assertEquals(aggregationMetadataList.get(0).getAggregations().get("test123"), 1);
}

private AspectRetriever getMockAspectRetriever(Urn propertyUrn) {
AspectRetriever mockAspectRetriever = Mockito.mock(AspectRetriever.class);
Map<Urn, Map<String, Aspect>> mockResult = new HashMap<>();
Map<String, Aspect> aspectMap = new HashMap<>();
DataMap definition = new DataMap();
definition.put("qualifiedName", "test_me.one");
definition.put("valueType", "urn:li:dataType:datahub.string");
Aspect definitionAspect = new Aspect(definition);
aspectMap.put(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME, definitionAspect);
mockResult.put(propertyUrn, aspectMap);
Set<Urn> urns = new HashSet<>();
urns.add(propertyUrn);
Mockito.when(mockAspectRetriever.getLatestAspectObjects(eq(urns), any()))
.thenReturn(mockResult);

return mockAspectRetriever;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ public static void setup() throws RemoteInvocationException, URISyntaxException
Urn abFghTenUrn = Urn.createFromString("urn:li:structuredProperty:ab.fgh.ten");
Urn underscoresAndDotsUrn =
Urn.createFromString("urn:li:structuredProperty:under.scores.and.dots_make_a_mess");
Urn dateWithDotsUrn = Urn.createFromString("urn:li:structuredProperty:date_here.with_dot");

// legacy
aspectRetriever = mock(AspectRetriever.class);
Expand All @@ -64,6 +65,18 @@ public static void setup() throws RemoteInvocationException, URISyntaxException
STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME,
new Aspect(structPropAbFghTenDefinition.data()))));

StructuredPropertyDefinition dateWithDotsDefinition = new StructuredPropertyDefinition();
dateWithDotsDefinition.setVersion(null, SetMode.REMOVE_IF_NULL);
dateWithDotsDefinition.setValueType(Urn.createFromString(DATA_TYPE_URN_PREFIX + "date"));
dateWithDotsDefinition.setQualifiedName("date_here.with_dot");
when(aspectRetriever.getLatestAspectObjects(eq(Set.of(dateWithDotsUrn)), anySet()))
.thenReturn(
Map.of(
dateWithDotsUrn,
Map.of(
STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME,
new Aspect(dateWithDotsDefinition.data()))));

StructuredPropertyDefinition structPropUnderscoresAndDotsDefinition =
new StructuredPropertyDefinition();
structPropUnderscoresAndDotsDefinition.setVersion(null, SetMode.REMOVE_IF_NULL);
Expand Down Expand Up @@ -895,6 +908,36 @@ public void testGetQueryBuilderFromNamespacedStructPropEqualsValueV1() {
Assert.assertEquals(result.toString(), expected);
}

@Test
public void testGetQueryBuilderFromDatesWithDots() {

final Criterion singleValueCriterion =
buildCriterion(
"structuredProperties.date_here.with_dot", Condition.GREATER_THAN, "1731974400000");

OperationContext opContext = mock(OperationContext.class);
when(opContext.getAspectRetriever()).thenReturn(aspectRetriever);
QueryBuilder result =
ESUtils.getQueryBuilderFromCriterion(
singleValueCriterion, false, new HashMap<>(), opContext, QueryFilterRewriteChain.EMPTY);
// structuredProperties.date_here_with_dot should not have .keyword at the end since this field
// type is type long for dates
String expected =
"{\n"
+ " \"range\" : {\n"
+ " \"structuredProperties.date_here_with_dot\" : {\n"
+ " \"from\" : 1731974400000,\n"
+ " \"to\" : null,\n"
+ " \"include_lower\" : false,\n"
+ " \"include_upper\" : true,\n"
+ " \"boost\" : 1.0,\n"
+ " \"_name\" : \"structuredProperties.date_here.with_dot\"\n"
+ " }\n"
+ " }\n"
+ "}";
Assert.assertEquals(result.toString(), expected);
}

@Test
public void testGetQueryBuilderFromStructPropExists() {
final Criterion singleValueCriterion = buildExistsCriterion("structuredProperties.ab.fgh.ten");
Expand Down
Loading