Skip to content

Commit

Permalink
Make sure shared source always represents the top-level root document. (
Browse files Browse the repository at this point in the history
#66725)

We started passing down the root document's _source when processing
nested hits, to avoid reloading and reparsing the root source for each hit.
Unfortunately the approach did not work when there are multiple layers of
`inner_hits`. In this case, the second-layer inner hit received its immediate
parent's source instead of the root source. This parent source is filtered to
just contain the parts corresponding to the nested document, but the source
parsing logic is designed to always operate on the top-level root source. This
caused failures when loading the second-layer inner hits.

This PR makes sure to always pass the root document's _source when processing
inner hits, even if there are multiple layers.
  • Loading branch information
jtibshirani committed Jan 5, 2021
1 parent 0fad7f6 commit ff67baa
Show file tree
Hide file tree
Showing 4 changed files with 158 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,69 @@ setup:
- match: { hits.hits.0.fields._seq_no: [1] }
- match: { hits.hits.0.inner_hits.nested_field.hits.hits.0._version: 2 }
- match: { hits.hits.0.inner_hits.nested_field.hits.hits.0.fields._seq_no: [1] }

---
"Inner hits with disabled _source":
- do:
indices.create:
index: disabled_source
body:
mappings:
_source:
enabled: false
properties:
nested_field:
type: nested
properties:
sub_nested_field:
type: nested

- do:
index:
index: disabled_source
id: 1
body:
nested_field:
field: value
sub_nested_field:
field: value

- do:
indices.refresh: {}

- do:
search:
index: disabled_source
rest_total_hits_as_int: true
body:
query:
nested:
path: "nested_field.sub_nested_field"
query: { match_all: {}}
inner_hits: {}
- match: { hits.total: 1 }
- match: { hits.hits.0._id: "1" }
- match: { hits.hits.0.inner_hits.nested_field\.sub_nested_field.hits.hits.0._id: "1" }
- is_false: hits.hits.0.inner_hits.nested_field\.sub_nested_field.hits.hits.0._source

- do:
search:
index: disabled_source
rest_total_hits_as_int: true
body:
query:
nested:
path: "nested_field"
inner_hits: {}
query:
nested:
path: "nested_field.sub_nested_field"
query: { match_all: {}}
inner_hits: {}

- match: { hits.total: 1 }
- match: { hits.hits.0._id: "1" }
- match: { hits.hits.0.inner_hits.nested_field.hits.hits.0._id: "1" }
- is_false: hits.hits.0.inner_hits.nested_field.hits.hits.0._source
- match: { hits.hits.0.inner_hits.nested_field.hits.hits.0.inner_hits.nested_field\.sub_nested_field.hits.hits.0._id: "1" }
- is_false: hits.hits.0.inner_hits.nested_field.hits.hits.0.inner_hits.nested_field\.sub_nested_field.hits.hits.0._source
Original file line number Diff line number Diff line change
Expand Up @@ -279,10 +279,14 @@ public void testNestedMultipleLayers() throws Exception {
requests.add(client().prepareIndex("articles", "article", "1").setSource(jsonBuilder().startObject()
.field("title", "quick brown fox")
.startArray("comments")
.startObject()
.field("message", "fox eat quick")
.startArray("remarks").startObject().field("message", "good").endObject().endArray()
.endObject()
.startObject()
.field("message", "fox eat quick")
.startArray("remarks").startObject().field("message", "good").endObject().endArray()
.endObject()
.startObject()
.field("message", "hippo is hungry")
.startArray("remarks").startObject().field("message", "neutral").endObject().endArray()
.endObject()
.endArray()
.endObject()));
requests.add(client().prepareIndex("articles", "article", "2").setSource(jsonBuilder().startObject()
Expand All @@ -296,6 +300,7 @@ public void testNestedMultipleLayers() throws Exception {
.endObject()));
indexRandom(true, requests);

// Check we can load the first doubly-nested document.
SearchResponse response = client().prepareSearch("articles")
.setQuery(
nestedQuery("comments",
Expand All @@ -322,6 +327,33 @@ public void testNestedMultipleLayers() throws Exception {
assertThat(innerHits.getAt(0).getNestedIdentity().getChild().getField().string(), equalTo("remarks"));
assertThat(innerHits.getAt(0).getNestedIdentity().getChild().getOffset(), equalTo(0));

// Check we can load the second doubly-nested document.
response = client().prepareSearch("articles")
.setQuery(
nestedQuery("comments",
nestedQuery("comments.remarks", matchQuery("comments.remarks.message", "neutral"), ScoreMode.Avg)
.innerHit(new InnerHitBuilder("remark")),
ScoreMode.Avg).innerHit(new InnerHitBuilder())
).get();
assertNoFailures(response);
assertHitCount(response, 1);
assertSearchHit(response, 1, hasId("1"));
assertThat(response.getHits().getAt(0).getInnerHits().size(), equalTo(1));
innerHits = response.getHits().getAt(0).getInnerHits().get("comments");
assertThat(innerHits.getTotalHits().value, equalTo(1L));
assertThat(innerHits.getHits().length, equalTo(1));
assertThat(innerHits.getAt(0).getId(), equalTo("1"));
assertThat(innerHits.getAt(0).getNestedIdentity().getField().string(), equalTo("comments"));
assertThat(innerHits.getAt(0).getNestedIdentity().getOffset(), equalTo(1));
innerHits = innerHits.getAt(0).getInnerHits().get("remark");
assertThat(innerHits.getTotalHits().value, equalTo(1L));
assertThat(innerHits.getHits().length, equalTo(1));
assertThat(innerHits.getAt(0).getId(), equalTo("1"));
assertThat(innerHits.getAt(0).getNestedIdentity().getField().string(), equalTo("comments"));
assertThat(innerHits.getAt(0).getNestedIdentity().getOffset(), equalTo(1));
assertThat(innerHits.getAt(0).getNestedIdentity().getChild().getField().string(), equalTo("remarks"));
assertThat(innerHits.getAt(0).getNestedIdentity().getChild().getOffset(), equalTo(0));

// Directly refer to the second level:
response = client().prepareSearch("articles")
.setQuery(nestedQuery("comments.remarks", matchQuery("comments.remarks.message", "bad"), ScoreMode.Avg)
Expand Down Expand Up @@ -364,6 +396,34 @@ public void testNestedMultipleLayers() throws Exception {
assertThat(innerHits.getAt(0).getNestedIdentity().getOffset(), equalTo(0));
assertThat(innerHits.getAt(0).getNestedIdentity().getChild().getField().string(), equalTo("remarks"));
assertThat(innerHits.getAt(0).getNestedIdentity().getChild().getOffset(), equalTo(0));

// Check that inner hits contain _source even when it's disabled on the parent request.
response = client().prepareSearch("articles")
.setFetchSource(false)
.setQuery(
nestedQuery("comments",
nestedQuery("comments.remarks", matchQuery("comments.remarks.message", "good"), ScoreMode.Avg)
.innerHit(new InnerHitBuilder("remark")), ScoreMode.Avg)
.innerHit(new InnerHitBuilder())
).get();
assertNoFailures(response);
innerHits = response.getHits().getAt(0).getInnerHits().get("comments");
innerHits = innerHits.getAt(0).getInnerHits().get("remark");
assertNotNull(innerHits.getAt(0).getSourceAsMap());
assertFalse(innerHits.getAt(0).getSourceAsMap().isEmpty());

response = client().prepareSearch("articles")
.setQuery(
nestedQuery("comments",
nestedQuery("comments.remarks", matchQuery("comments.remarks.message", "good"), ScoreMode.Avg)
.innerHit(new InnerHitBuilder("remark")), ScoreMode.Avg)
.innerHit(new InnerHitBuilder().setFetchSourceContext(new FetchSourceContext(false)))
).get();
assertNoFailures(response);
innerHits = response.getHits().getAt(0).getInnerHits().get("comments");
innerHits = innerHits.getAt(0).getInnerHits().get("remark");
assertNotNull(innerHits.getAt(0).getSourceAsMap());
assertFalse(innerHits.getAt(0).getSourceAsMap().isEmpty());
}

// Issue #9723
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,13 @@
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
import org.elasticsearch.search.fetch.subphase.FieldAndFormat;
import org.elasticsearch.search.fetch.subphase.InnerHitsContext;
import org.elasticsearch.search.fetch.subphase.InnerHitsContext.InnerHitSubContext;
import org.elasticsearch.search.fetch.subphase.ScriptFieldsContext;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext;
import org.elasticsearch.search.internal.ContextIndexSearcher;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.lookup.SearchLookup;
import org.elasticsearch.search.lookup.SourceLookup;
import org.elasticsearch.search.rescore.RescoreContext;

import java.util.Collections;
Expand Down Expand Up @@ -204,4 +206,23 @@ public ScriptFieldsContext scriptFields() {
public SearchExtBuilder getSearchExt(String name) {
return searchContext.getSearchExt(name);
}

/**
* For a hit document that's being processed, return the source lookup representing the
* root document. This method is used to pass down the root source when processing this
* document's nested inner hits.
*
* @param hitContext The context of the hit that's being processed.
*/
public SourceLookup getRootSourceLookup(FetchSubPhase.HitContext hitContext) {
// Usually the root source simply belongs to the hit we're processing. But if
// there are multiple layers of inner hits and we're in a nested context, then
// the root source is found on the inner hits context.
if (searchContext instanceof InnerHitSubContext && hitContext.hit().getNestedIdentity() != null) {
InnerHitSubContext innerHitsContext = (InnerHitSubContext) searchContext;
return innerHitsContext.getRootLookup();
} else {
return hitContext.sourceLookup();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -59,16 +59,16 @@ public void setNextReader(LeafReaderContext readerContext) {

@Override
public void process(HitContext hitContext) throws IOException {
hitExecute(innerHits, hitContext);
SearchHit hit = hitContext.hit();
SourceLookup rootLookup = searchContext.getRootSourceLookup(hitContext);
hitExecute(innerHits, hit, rootLookup);
}
};
}

private void hitExecute(Map<String, InnerHitsContext.InnerHitSubContext> innerHits, HitContext hitContext) throws IOException {

SearchHit hit = hitContext.hit();
SourceLookup sourceLookup = hitContext.sourceLookup();

private void hitExecute(Map<String, InnerHitsContext.InnerHitSubContext> innerHits,
SearchHit hit,
SourceLookup rootLookup) throws IOException {
for (Map.Entry<String, InnerHitsContext.InnerHitSubContext> entry : innerHits.entrySet()) {
InnerHitsContext.InnerHitSubContext innerHitsContext = entry.getValue();
TopDocsAndMaxScore topDoc = innerHitsContext.topDocs(hit);
Expand All @@ -84,7 +84,7 @@ private void hitExecute(Map<String, InnerHitsContext.InnerHitSubContext> innerHi
}
innerHitsContext.docIdsToLoad(docIdsToLoad, 0, docIdsToLoad.length);
innerHitsContext.setRootId(new Uid(hit.getType(), hit.getId()));
innerHitsContext.setRootLookup(sourceLookup);
innerHitsContext.setRootLookup(rootLookup);

fetchPhase.execute(innerHitsContext);
FetchSearchResult fetchResult = innerHitsContext.fetchResult();
Expand Down

0 comments on commit ff67baa

Please sign in to comment.