Skip to content

Commit

Permalink
Update Semantic Query To Handle Zero Size Responses (#116277) (#116977)
Browse files Browse the repository at this point in the history
  • Loading branch information
Mikep86 authored Nov 18, 2024
1 parent a8e616f commit 0a18b0e
Show file tree
Hide file tree
Showing 5 changed files with 133 additions and 4 deletions.
6 changes: 6 additions & 0 deletions docs/changelog/116277.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 116277
summary: Update Semantic Query To Handle Zero Size Responses
area: Vector Search
type: bug
issues:
- 116083
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ public Set<NodeFeature> getTestFeatures() {
return Set.of(
SemanticTextFieldMapper.SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX,
SemanticTextFieldMapper.SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX,
SemanticTextFieldMapper.SEMANTIC_TEXT_DELETE_FIX
SemanticTextFieldMapper.SEMANTIC_TEXT_DELETE_FIX,
SemanticTextFieldMapper.SEMANTIC_TEXT_ZERO_SIZE_FIX
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
import java.util.Set;
import java.util.function.Function;

import static org.elasticsearch.search.SearchService.DEFAULT_SIZE;
import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.CHUNKED_EMBEDDINGS_FIELD;
import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.CHUNKED_TEXT_FIELD;
import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.CHUNKS_FIELD;
Expand All @@ -90,6 +91,7 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie
public static final NodeFeature SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX = new NodeFeature("semantic_text.in_object_field_fix");
public static final NodeFeature SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX = new NodeFeature("semantic_text.single_field_update_fix");
public static final NodeFeature SEMANTIC_TEXT_DELETE_FIX = new NodeFeature("semantic_text.delete_fix");
public static final NodeFeature SEMANTIC_TEXT_ZERO_SIZE_FIX = new NodeFeature("semantic_text.zero_size_fix");

public static final String CONTENT_TYPE = "semantic_text";
public static final String DEFAULT_ELSER_2_INFERENCE_ID = DEFAULT_ELSER_ID;
Expand Down Expand Up @@ -510,7 +512,7 @@ public boolean fieldHasValue(FieldInfos fieldInfos) {
return fieldInfos.fieldInfo(getEmbeddingsFieldName(name())) != null;
}

public QueryBuilder semanticQuery(InferenceResults inferenceResults, float boost, String queryName) {
public QueryBuilder semanticQuery(InferenceResults inferenceResults, Integer requestSize, float boost, String queryName) {
String nestedFieldPath = getChunksFieldName(name());
String inferenceResultsFieldName = getEmbeddingsFieldName(name());
QueryBuilder childQueryBuilder;
Expand Down Expand Up @@ -554,7 +556,13 @@ public QueryBuilder semanticQuery(InferenceResults inferenceResults, float boost
);
}

yield new KnnVectorQueryBuilder(inferenceResultsFieldName, inference, null, null, null);
Integer k = requestSize;
if (k != null) {
// Ensure that k is at least the default size so that aggregations work when size is set to 0 in the request
k = Math.max(k, DEFAULT_SIZE);
}

yield new KnnVectorQueryBuilder(inferenceResultsFieldName, inference, k, null, null);
}
default -> throw new IllegalStateException(
"Field ["
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ private QueryBuilder doRewriteBuildSemanticQuery(SearchExecutionContext searchEx
);
}

return semanticTextFieldType.semanticQuery(inferenceResults, boost(), queryName());
return semanticTextFieldType.semanticQuery(inferenceResults, searchExecutionContext.requestSize(), boost(), queryName());
} else {
throw new IllegalArgumentException(
"Field [" + fieldName + "] of type [" + fieldType.typeName() + "] does not support " + NAME + " queries"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -878,3 +878,117 @@ setup:

- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }

---
"Query using a sparse embedding model with size set to zero":
- requires:
cluster_features: "semantic_text.zero_size_fix"
reason: zero size fix added in 8.16.1 & 8.15.5

- do:
indices.create:
index: test-sparse-index-with-agg-id
body:
mappings:
properties:
inference_field:
type: semantic_text
inference_id: sparse-inference-id
non_inference_field:
type: text
agg_id:
type: keyword

- do:
index:
index: test-sparse-index-with-agg-id
id: doc_1
body:
inference_field: "inference test"
agg_id: "doc_1"

- do:
index:
index: test-sparse-index-with-agg-id
id: doc_2
body:
non_inference_field: "non-inference test"
agg_id: "doc_2"
refresh: true

- do:
search:
index: test-sparse-index-with-agg-id
body:
size: 0
query:
semantic:
field: "inference_field"
query: "inference test"
aggs:
agg_ids:
terms:
field: agg_id

- match: { hits.total.value: 1 }
- length: { hits.hits: 0 }
- length: { aggregations.agg_ids.buckets: 1 }
- match: { aggregations.agg_ids.buckets.0.key: "doc_1" }
- match: { aggregations.agg_ids.buckets.0.doc_count: 1 }

---
"Query using a dense embedding model with size set to zero":
- requires:
cluster_features: "semantic_text.zero_size_fix"
reason: zero size fix added in 8.16.1 & 8.15.5

- do:
indices.create:
index: test-dense-index-with-agg-id
body:
mappings:
properties:
inference_field:
type: semantic_text
inference_id: dense-inference-id
non_inference_field:
type: text
agg_id:
type: keyword

- do:
index:
index: test-dense-index-with-agg-id
id: doc_1
body:
inference_field: "inference test"
agg_id: "doc_1"

- do:
index:
index: test-dense-index-with-agg-id
id: doc_2
body:
non_inference_field: "non-inference test"
agg_id: "doc_2"
refresh: true

- do:
search:
index: test-dense-index-with-agg-id
body:
size: 0
query:
semantic:
field: "inference_field"
query: "inference test"
aggs:
agg_ids:
terms:
field: agg_id

- match: { hits.total.value: 1 }
- length: { hits.hits: 0 }
- length: { aggregations.agg_ids.buckets: 1 }
- match: { aggregations.agg_ids.buckets.0.key: "doc_1" }
- match: { aggregations.agg_ids.buckets.0.doc_count: 1 }

0 comments on commit 0a18b0e

Please sign in to comment.