Skip to content

Commit

Permalink
Merge branch 'main' into breaking/unsupported_params_metadata_fields
Browse files Browse the repository at this point in the history
  • Loading branch information
javanna committed Nov 18, 2024
2 parents 83cd2fd + 29659fd commit 12dc24d
Show file tree
Hide file tree
Showing 115 changed files with 2,162 additions and 1,941 deletions.
6 changes: 6 additions & 0 deletions docs/changelog/115585.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 115459
summary: Adds access to flags no_sub_matches and no_overlapping_matches to hyphenation-decompounder-tokenfilter
area: Search
type: enhancement
issues:
- 97849
6 changes: 6 additions & 0 deletions docs/changelog/116277.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 116277
summary: Update Semantic Query To Handle Zero Size Responses
area: Vector Search
type: bug
issues:
- 116083
5 changes: 0 additions & 5 deletions docs/changelog/116339.yaml

This file was deleted.

5 changes: 5 additions & 0 deletions docs/changelog/116915.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 116915
summary: Improve message about insecure S3 settings
area: Snapshot/Restore
type: enhancement
issues: []
5 changes: 5 additions & 0 deletions docs/changelog/116918.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 116918
summary: Split searchable snapshot into multiple repo operations
area: Snapshot/Restore
type: enhancement
issues: []
5 changes: 5 additions & 0 deletions docs/changelog/116942.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 116942
summary: Fix handling of bulk requests with semantic text fields and delete ops
area: Relevance
type: bug
issues: []
11 changes: 11 additions & 0 deletions docs/changelog/116943.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
pr: 116943
summary: Remove support for deprecated `force_source` highlighting parameter
area: Highlighting
type: breaking
issues: []
breaking:
title: Remove support for deprecated `force_source` highlighting parameter
area: REST API
details: The deprecated highlighting `force_source` parameter is no longer supported.
impact: Users should remove usages of the `force_source` parameter from their search requests.
notable: false
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,18 @@ output. Defaults to `5`.
(Optional, Boolean)
If `true`, only include the longest matching subword. Defaults to `false`.

`no_sub_matches`::
(Optional, Boolean)
If `true`, do not match sub tokens in tokens that are in the word list.
Defaults to `false`.

`no_overlapping_matches`::
(Optional, Boolean)
If `true`, do not allow overlapping tokens.
Defaults to `false`.

Typically users will only want to include one of the three flags as enabling `no_overlapping_matches` is the most restrictive and `no_sub_matches` is more restrictive than `only_longest_match`. When enabling a more restrictive option the state of the less restrictive does not have any effect.

[[analysis-hyp-decomp-tokenfilter-customize]]
==== Customize and add to an analyzer

Expand Down
2 changes: 1 addition & 1 deletion docs/reference/how-to/indexing-speed.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ different nodes so there's redundancy for any node failures. You can also use
insurance.

[discrete]
==== Local vs.remote storage
==== Local vs. remote storage

include::./remote-storage.asciidoc[]

Expand Down
2 changes: 0 additions & 2 deletions docs/reference/search/search-your-data/highlighting.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,6 @@ fragmenter:: Specifies how text should be broken up in highlight
snippets: `simple` or `span`. Only valid for the `plain` highlighter.
Defaults to `span`.

force_source:: deprecated; this parameter has no effect

`simple`::: Breaks up text into same-sized fragments.
`span`::: Breaks up text into same-sized fragments, but tries to avoid
breaking up text between highlighted terms. This is helpful when you're
Expand Down
5 changes: 0 additions & 5 deletions docs/reference/security/authorization/built-in-roles.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,6 @@ roles have a fixed set of privileges and cannot be updated.
Grants access necessary for the APM system user to send system-level data
(such as monitoring) to {es}.

[[built-in-roles-apm-user]] `apm_user` ::
Grants the privileges required for APM users (such as `read` and
`view_index_metadata` privileges on the `apm-*` and `.ml-anomalies*` indices).
deprecated:[7.13.0,"See {kibana-ref}/apm-app-users.html[APM app users and privileges\] for alternatives."].

[[built-in-roles-beats-admin]] `beats_admin` ::
Grants access to the `.management-beats` index, which contains configuration
information for the Beats.
Expand Down
2 changes: 1 addition & 1 deletion gradle/build.versions.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[versions]
asm = "9.6"
asm = "9.7.1"
jackson = "2.15.0"
junit5 = "5.8.1"
spock = "2.1-groovy-3.0"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
*/
public class HyphenationCompoundWordTokenFilterFactory extends AbstractCompoundWordTokenFilterFactory {

private final boolean noSubMatches;
private final boolean noOverlappingMatches;
private final HyphenationTree hyphenationTree;

HyphenationCompoundWordTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
Expand All @@ -46,6 +48,9 @@ public class HyphenationCompoundWordTokenFilterFactory extends AbstractCompoundW
} catch (Exception e) {
throw new IllegalArgumentException("Exception while reading hyphenation_patterns_path.", e);
}

noSubMatches = settings.getAsBoolean("no_sub_matches", false);
noOverlappingMatches = settings.getAsBoolean("no_overlapping_matches", false);
}

@Override
Expand All @@ -57,7 +62,9 @@ public TokenStream create(TokenStream tokenStream) {
minWordSize,
minSubwordSize,
maxSubwordSize,
onlyLongestMatch
onlyLongestMatch,
noSubMatches,
noOverlappingMatches
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@
import org.hamcrest.MatcherAssert;

import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
Expand All @@ -42,6 +45,7 @@
import static org.hamcrest.Matchers.instanceOf;

public class CompoundAnalysisTests extends ESTestCase {

public void testDefaultsCompoundAnalysis() throws Exception {
Settings settings = getJsonSettings();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings);
Expand All @@ -63,6 +67,44 @@ public void testDictionaryDecompounder() throws Exception {
assertWarnings("Setting [version] on analysis component [custom7] has no effect and is deprecated");
}

public void testHyphenationDecompoundingAnalyzerOnlyLongestMatch() throws Exception {
Settings[] settingsArr = new Settings[] { getJsonSettings(), getYamlSettings() };
for (Settings settings : settingsArr) {
List<String> terms = analyze(settings, "hyphenationDecompoundingAnalyzerOnlyLongestMatch", "kaffeemaschine fussballpumpe");
MatcherAssert.assertThat(
terms,
hasItems("kaffeemaschine", "kaffee", "fee", "maschine", "fussballpumpe", "fussball", "ballpumpe", "pumpe")
);
}
assertWarnings("Setting [version] on analysis component [custom7] has no effect and is deprecated");
}

/**
* For example given a word list of: ["kaffee", "fee", "maschine"]
* no_sub_matches should prevent the token "fee" as a token in "kaffeemaschine".
*/
public void testHyphenationDecompoundingAnalyzerNoSubMatches() throws Exception {
Settings[] settingsArr = new Settings[] { getJsonSettings(), getYamlSettings() };
for (Settings settings : settingsArr) {
List<String> terms = analyze(settings, "hyphenationDecompoundingAnalyzerNoSubMatches", "kaffeemaschine fussballpumpe");
MatcherAssert.assertThat(terms, hasItems("kaffeemaschine", "kaffee", "maschine", "fussballpumpe", "fussball", "ballpumpe"));
}
assertWarnings("Setting [version] on analysis component [custom7] has no effect and is deprecated");
}

/**
* For example given a word list of: ["fuss", "fussball", "ballpumpe", "ball", "pumpe"]
* no_overlapping_matches should prevent the token "ballpumpe" as a token in "fussballpumpe.
*/
public void testHyphenationDecompoundingAnalyzerNoOverlappingMatches() throws Exception {
Settings[] settingsArr = new Settings[] { getJsonSettings(), getYamlSettings() };
for (Settings settings : settingsArr) {
List<String> terms = analyze(settings, "hyphenationDecompoundingAnalyzerNoOverlappingMatches", "kaffeemaschine fussballpumpe");
MatcherAssert.assertThat(terms, hasItems("kaffeemaschine", "kaffee", "maschine", "fussballpumpe", "fussball", "pumpe"));
}
assertWarnings("Setting [version] on analysis component [custom7] has no effect and is deprecated");
}

private List<String> analyze(Settings settings, String analyzerName, String text) throws IOException {
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings);
AnalysisModule analysisModule = createAnalysisModule(settings);
Expand Down Expand Up @@ -92,20 +134,25 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
}

private Settings getJsonSettings() throws IOException {
String json = "/org/elasticsearch/analysis/common/test1.json";
return Settings.builder()
.loadFromStream(json, getClass().getResourceAsStream(json), false)
.put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current())
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
return getSettings("/org/elasticsearch/analysis/common/test1.json");
}

private Settings getYamlSettings() throws IOException {
String yaml = "/org/elasticsearch/analysis/common/test1.yml";
return getSettings("/org/elasticsearch/analysis/common/test1.yml");
}

private Settings getSettings(String filePath) throws IOException {
String hypenationRulesFileName = "de_DR.xml";
InputStream hypenationRules = getClass().getResourceAsStream(hypenationRulesFileName);
Path home = createTempDir();
Path config = home.resolve("config");
Files.createDirectory(config);
Files.copy(hypenationRules, config.resolve(hypenationRulesFileName));

return Settings.builder()
.loadFromStream(yaml, getClass().getResourceAsStream(yaml), false)
.loadFromStream(filePath, getClass().getResourceAsStream(filePath), false)
.put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current())
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put(Environment.PATH_HOME_SETTING.getKey(), home.toString())
.build();
}
}
Loading

0 comments on commit 12dc24d

Please sign in to comment.