Merge branch 'main' into custom-roles-get-roles-api

n1v0lg · Feb 20, 2024 · a40a922 · a40a922
2 parents 6282623 + b8dc5c3
commit a40a922
Show file tree

Hide file tree

Showing 34 changed files with 411 additions and 79 deletions.
diff --git a/docs/changelog/105373.yaml b/docs/changelog/105373.yaml
@@ -0,0 +1,5 @@
+pr: 105373
+summary: "Fix parsing of flattened fields within subobjects: false"
+area: Mapping
+type: bug
+issues: []
diff --git a/docs/changelog/105633.yaml b/docs/changelog/105633.yaml
@@ -0,0 +1,6 @@
+pr: 105633
+summary: "[Connector API] Bugfix: support list type in filtering advenced snippet\
+  \ value"
+area: Application
+type: bug
+issues: []
diff --git a/docs/reference/how-to/size-your-shards.asciidoc b/docs/reference/how-to/size-your-shards.asciidoc
@@ -221,6 +221,15 @@ GET _cat/shards?v=true
 ----
 // TEST[setup:my_index]
 
+[discrete]
+[[shard-count-per-node-recommendation]]
+==== Add enough nodes to stay within the cluster shard limits
+
+The <<cluster-shard-limit,cluster shard limits>> prevent creation of more than
+1000 non-frozen shards per node, and 3000 frozen shards per dedicated frozen
+node. Make sure you have enough nodes of each type in your cluster to handle
+the number of shards you need.
+
 [discrete]
 [[field-count-recommendation]]
 ==== Allow enough heap for field mappers and overheads

diff --git a/docs/reference/modules/cluster/misc.asciidoc b/docs/reference/modules/cluster/misc.asciidoc
@@ -24,35 +24,46 @@ API can make the cluster read-write again.
 
 [discrete]
 [[cluster-shard-limit]]
-==== Cluster shard limit
+==== Cluster shard limits
 
-There is a soft limit on the number of shards in a cluster, based on the number
-of nodes in the cluster. This is intended to prevent operations which may
-unintentionally destabilize the cluster.
+There is a limit on the number of shards in a cluster, based on the number of
+nodes in the cluster. This is intended to prevent a runaway process from
+creating too many shards which can harm performance and in extreme cases may
+destabilize your cluster.
 
-IMPORTANT: This limit is intended as a safety net, not a sizing recommendation. The
-exact number of shards your cluster can safely support depends on your hardware
-configuration and workload, but should remain well below this limit in almost
-all cases, as the default limit is set quite high.
+[IMPORTANT]
+====
 
-If an operation, such as creating a new index, restoring a snapshot of an index,
-or opening a closed index would lead to the number of shards in the cluster
-going over this limit, the operation will fail with an error indicating the
-shard limit.
+These limits are intended as a safety net to protect against runaway shard
+creation and are not a sizing recommendation. The exact number of shards your
+cluster can safely support depends on your hardware configuration and workload,
+and may be smaller than the default limits.
 
-If the cluster is already over the limit, due to changes in node membership or
-setting changes, all operations that create or open indices will fail until
-either the limit is increased as described below, or some indices are
-<<indices-open-close,closed>> or <<indices-delete-index,deleted>> to bring the
-number of shards below the limit.
+We do not recommend increasing these limits beyond the defaults. Clusters with
+more shards may appear to run well in normal operation, but may take a very
+long time to recover from temporary disruptions such as a network partition or
+an unexpected node restart, and may encounter problems when performing
+maintenance activities such as a rolling restart or upgrade.
 
-The cluster shard limit defaults to 1,000 shards per non-frozen data node for
+====
+
+If an operation, such as creating a new index, restoring a snapshot of an
+index, or opening a closed index would lead to the number of shards in the
+cluster going over this limit, the operation will fail with an error indicating
+the shard limit. To resolve this, either scale out your cluster by adding
+nodes, or <<indices-delete-index,delete some indices>> to bring the number of
+shards below the limit.
+
+If a cluster is already over the limit, perhaps due to changes in node
+membership or setting changes, all operations that create or open indices will
+fail.
+
+The cluster shard limit defaults to 1000 shards per non-frozen data node for
 normal (non-frozen) indices and 3000 shards per frozen data node for frozen
-indices.
-Both primary and replica shards of all open indices count toward the limit,
-including unassigned shards.
-For example, an open index with 5 primary shards and 2 replicas counts as 15 shards.
-Closed indices do not contribute to the shard count.
+indices. Both primary and replica shards of all open indices count toward the
+limit, including unassigned shards. For example, an open index with 5 primary
+shards and 2 replicas counts as 15 shards. Closed indices do not contribute to
+the shard count.
 
 You can dynamically adjust the cluster shard limit with the following setting:
 
@@ -99,12 +110,13 @@ For example, a cluster with a `cluster.max_shards_per_node.frozen` setting of
 `100` and three frozen data nodes has a frozen shard limit of 300. If the
 cluster already contains 296 shards, {es} rejects any request that adds five or
 more frozen shards to the cluster.
+--
 
-NOTE: These setting do not limit shards for individual nodes. To limit the
-number of shards for each node, use the
+NOTE: These limits only apply to actions which create shards and do not limit
+the number of shards assigned to each node. To limit the number of shards
+assigned to each node, use the
 <<cluster-total-shards-per-node,`cluster.routing.allocation.total_shards_per_node`>>
 setting.
---
 
 [discrete]
 [[user-defined-data]]

diff --git a/docs/reference/search/search-your-data/knn-search.asciidoc b/docs/reference/search/search-your-data/knn-search.asciidoc
@@ -568,12 +568,12 @@ NOTE: `similarity` is the true <<dense-vector-similarity, similarity>> before it
 
 For each configured <<dense-vector-similarity, similarity>>, here is the corresponding inverted `_score` function. This is so if you are wanting to filter from a `_score` perspective, you can do this minor transformation to correctly reject irrelevant results.
 --
- - `l2_norm`: `sqrt((1 / _score) - 1)`
- - `cosine`: `(2 * _score) - 1`
- - `dot_product`: `(2 * _score) - 1`
- - `max_inner_product`:
-    - `_score < 1`: `1 - (1 / _score)`
-    - `_score >= 1`: `_score - 1`
+* `l2_norm`: `sqrt((1 / _score) - 1)`
+* `cosine`: `(2 * _score) - 1`
+* `dot_product`: `(2 * _score) - 1`
+* `max_inner_product`:
+** `_score < 1`: `1 - (1 / _score)`
+** `_score >= 1`: `_score - 1`
 --
 
 Here is an example. In this example we search for the given `query_vector` for `k` nearest neighbors. However, with

diff --git a/server/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java b/server/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java
@@ -216,7 +216,7 @@ public void writeVInt(int i) throws IOException {
         writeBytes(buffer, 0, index);
     }
 
-    private static int putVInt(byte[] buffer, int i, int off) {
+    public static int putVInt(byte[] buffer, int i, int off) {
         if (Integer.numberOfLeadingZeros(i) >= 25) {
             buffer[off] = (byte) i;
             return 1;

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java
@@ -455,11 +455,12 @@ private static void parseObject(final DocumentParserContext context, String curr
 
     private static void doParseObject(DocumentParserContext context, String currentFieldName, Mapper objectMapper) throws IOException {
         context.path().add(currentFieldName);
+        boolean withinLeafObject = context.path().isWithinLeafObject();
         if (objectMapper instanceof ObjectMapper objMapper && objMapper.subobjects() == false) {
             context.path().setWithinLeafObject(true);
         }
         parseObjectOrField(context, objectMapper);
-        context.path().setWithinLeafObject(false);
+        context.path().setWithinLeafObject(withinLeafObject);
         context.path().remove();
     }
 

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapper.java
@@ -19,6 +19,7 @@
 import org.elasticsearch.common.hash.MurmurHash3;
 import org.elasticsearch.common.io.stream.BytesStreamOutput;
 import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.network.NetworkAddress;
 import org.elasticsearch.common.util.ByteUtils;
 import org.elasticsearch.core.Nullable;
@@ -208,6 +209,12 @@ public BytesReference buildLegacyTsid() throws IOException {
             }
         }
 
+        private static final int MAX_HASH_LEN_BYTES = 2;
+
+        static {
+            assert MAX_HASH_LEN_BYTES == StreamOutput.putVInt(new byte[2], tsidHashLen(MAX_DIMENSIONS), 0);
+        }
+
         /**
          * Here we build the hash of the tsid using a similarity function so that we have a result
          * with the following pattern:
@@ -219,11 +226,13 @@ public BytesReference buildLegacyTsid() throws IOException {
          * The idea is to be able to place 'similar' time series close to each other. Two time series
          * are considered 'similar' if they share the same dimensions (names and values).
          */
-        public BytesReference buildTsidHash() throws IOException {
+        public BytesReference buildTsidHash() {
             // NOTE: hash all dimension field names
             int numberOfDimensions = Math.min(MAX_DIMENSIONS, dimensions.size());
-            int tsidHashIndex = 0;
-            byte[] tsidHash = new byte[16 + 16 + 4 * numberOfDimensions];
+            int len = tsidHashLen(numberOfDimensions);
+            // either one or two bytes are occupied by the vint since we're bounded by #MAX_DIMENSIONS
+            byte[] tsidHash = new byte[MAX_HASH_LEN_BYTES + len];
+            int tsidHashIndex = StreamOutput.putVInt(tsidHash, len, 0);
 
             tsidHasher.reset();
             for (final Dimension dimension : dimensions) {
@@ -258,11 +267,11 @@ public BytesReference buildTsidHash() throws IOException {
             }
             tsidHashIndex = writeHash128(tsidHasher.digestHash(), tsidHash, tsidHashIndex);
 
-            assert tsidHashIndex == tsidHash.length;
-            try (BytesStreamOutput out = new BytesStreamOutput(tsidHash.length)) {
-                out.writeBytesRef(new BytesRef(tsidHash, 0, tsidHash.length));
-                return out.bytes();
-            }
+            return new BytesArray(tsidHash, 0, tsidHashIndex);
+        }
+
+        private static int tsidHashLen(int numberOfDimensions) {
+            return 16 + 16 + 4 * numberOfDimensions;
         }
 
         private int writeHash128(final MurmurHash3.Hash128 hash128, byte[] buffer, int tsidHashIndex) {

diff --git a/server/src/main/java/org/elasticsearch/inference/SemanticTextModelSettings.java b/server/src/main/java/org/elasticsearch/inference/SemanticTextModelSettings.java
@@ -0,0 +1,91 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.inference;
+
+import org.elasticsearch.core.Nullable;
+import org.elasticsearch.xcontent.ConstructingObjectParser;
+import org.elasticsearch.xcontent.ParseField;
+import org.elasticsearch.xcontent.XContentParser;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Objects;
+
+/**
+ * Model settings that are interesting for semantic_text inference fields. This class is used to serialize common
+ * ServiceSettings methods when building inference for semantic_text fields.
+ *
+ * @param taskType task type
+ * @param inferenceId inference id
+ * @param dimensions number of dimensions. May be null if not applicable
+ * @param similarity similarity used by the service. May be null if not applicable
+ */
+public record SemanticTextModelSettings(
+    TaskType taskType,
+    String inferenceId,
+    @Nullable Integer dimensions,
+    @Nullable SimilarityMeasure similarity
+) {
+
+    public static final String NAME = "model_settings";
+    private static final ParseField TASK_TYPE_FIELD = new ParseField("task_type");
+    private static final ParseField INFERENCE_ID_FIELD = new ParseField("inference_id");
+    private static final ParseField DIMENSIONS_FIELD = new ParseField("dimensions");
+    private static final ParseField SIMILARITY_FIELD = new ParseField("similarity");
+
+    public SemanticTextModelSettings(TaskType taskType, String inferenceId, Integer dimensions, SimilarityMeasure similarity) {
+        Objects.requireNonNull(taskType, "task type must not be null");
+        Objects.requireNonNull(inferenceId, "inferenceId must not be null");
+        this.taskType = taskType;
+        this.inferenceId = inferenceId;
+        this.dimensions = dimensions;
+        this.similarity = similarity;
+    }
+
+    public SemanticTextModelSettings(Model model) {
+        this(
+            model.getTaskType(),
+            model.getInferenceEntityId(),
+            model.getServiceSettings().dimensions(),
+            model.getServiceSettings().similarity()
+        );
+    }
+
+    public static SemanticTextModelSettings parse(XContentParser parser) throws IOException {
+        return PARSER.apply(parser, null);
+    }
+
+    private static final ConstructingObjectParser<SemanticTextModelSettings, Void> PARSER = new ConstructingObjectParser<>(NAME, args -> {
+        TaskType taskType = TaskType.fromString((String) args[0]);
+        String inferenceId = (String) args[1];
+        Integer dimensions = (Integer) args[2];
+        SimilarityMeasure similarity = args[3] == null ? null : SimilarityMeasure.fromString((String) args[2]);
+        return new SemanticTextModelSettings(taskType, inferenceId, dimensions, similarity);
+    });
+    static {
+        PARSER.declareString(ConstructingObjectParser.constructorArg(), TASK_TYPE_FIELD);
+        PARSER.declareString(ConstructingObjectParser.constructorArg(), INFERENCE_ID_FIELD);
+        PARSER.declareInt(ConstructingObjectParser.optionalConstructorArg(), DIMENSIONS_FIELD);
+        PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), SIMILARITY_FIELD);
+    }
+
+    public Map<String, Object> asMap() {
+        Map<String, Object> attrsMap = new HashMap<>();
+        attrsMap.put(TASK_TYPE_FIELD.getPreferredName(), taskType.toString());
+        attrsMap.put(INFERENCE_ID_FIELD.getPreferredName(), inferenceId);
+        if (dimensions != null) {
+            attrsMap.put(DIMENSIONS_FIELD.getPreferredName(), dimensions);
+        }
+        if (similarity != null) {
+            attrsMap.put(SIMILARITY_FIELD.getPreferredName(), similarity);
+        }
+        return Map.of(NAME, attrsMap);
+    }
+}
diff --git a/server/src/main/java/org/elasticsearch/inference/ServiceSettings.java b/server/src/main/java/org/elasticsearch/inference/ServiceSettings.java
@@ -17,4 +17,23 @@ public interface ServiceSettings extends ToXContentObject, VersionedNamedWriteab
      * Returns a {@link ToXContentObject} that only writes the exposed fields. Any hidden fields are not written.
      */
     ToXContentObject getFilteredXContentObject();
+
+    /**
+     * Similarity used in the service. Will be null if not applicable.
+     *
+     * @return similarity
+     */
+    default SimilarityMeasure similarity() {
+        return null;
+    }
+
+    /**
+     * Number of dimensions the service works with. Will be null if not applicable.
+     *
+     * @return number of dimensions
+     */
+    default Integer dimensions() {
+        return null;
+    }
+
 }
diff --git a/...k/inference/common/SimilarityMeasure.java → ...icsearch/inference/SimilarityMeasure.java b/...k/inference/common/SimilarityMeasure.java → ...icsearch/inference/SimilarityMeasure.java
@@ -1,11 +1,12 @@
 /*
  * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
  * or more contributor license agreements. Licensed under the Elastic License
- * 2.0; you may not use this file except in compliance with the Elastic License
- * 2.0.
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
  */
 
-package org.elasticsearch.xpack.inference.common;
+package org.elasticsearch.inference;
 
 import java.util.Locale;
 

diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/InternalAggregations.java b/server/src/main/java/org/elasticsearch/search/aggregations/InternalAggregations.java
@@ -165,7 +165,7 @@ public static InternalAggregations from(List<InternalAggregation> aggregations)
     }
 
     public static InternalAggregations readFrom(StreamInput in) throws IOException {
-        return from(in.readCollectionAsList(stream -> stream.readNamedWriteable(InternalAggregation.class)));
+        return from(in.readNamedWriteableCollectionAsList(InternalAggregation.class));
     }
 
     @Override

diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java
@@ -2273,6 +2273,39 @@ public void testSubobjectsFalseParentDynamicFalse() throws Exception {
         assertNull(doc.dynamicMappingsUpdate());
     }
 
+    public void testSubobjectsFalseFlattened() throws Exception {
+        DocumentMapper mapper = createDocumentMapper(mapping(b -> {
+            b.startObject("attributes");
+            {
+                b.field("dynamic", false);
+                b.field("subobjects", false);
+                b.startObject("properties");
+                {
+                    b.startObject("simple.attribute");
+                    b.field("type", "keyword");
+                    b.endObject();
+                    b.startObject("complex.attribute");
+                    b.field("type", "flattened");
+                    b.endObject();
+                }
+                b.endObject();
+            }
+            b.endObject();
+        }));
+        ParsedDocument doc = mapper.parse(source("""
+            {
+              "attributes": {
+                "complex.attribute": {
+                  "foo" : "bar"
+                },
+                "simple.attribute": "foo"
+              }
+            }
+            """));
+        assertNotNull(doc.rootDoc().getField("attributes.complex.attribute"));
+        assertNotNull(doc.rootDoc().getField("attributes.simple.attribute"));
+    }
+
     public void testWriteToFieldAlias() throws Exception {
         DocumentMapper mapper = createDocumentMapper(mapping(b -> {
             b.startObject("alias-field");