Skip to content

Commit

Permalink
Rebase/merge fixes + further improvements.
Browse files Browse the repository at this point in the history
Signed-off-by: Yury-Fridlyand <[email protected]>
  • Loading branch information
Yury-Fridlyand committed Sep 9, 2022
1 parent aeb45df commit 5861f36
Show file tree
Hide file tree
Showing 10 changed files with 160 additions and 80 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -181,11 +181,11 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli
}
});
} catch (Exception e) {
if (null != newSqlQueryHandler.getError()) {
LOG.error(LogUtils.getRequestId() + " V2 SQL error during query execution",
QueryDataAnonymizer.anonymizeData(newSqlQueryHandler.getError().toString()));
if (null != QueryContext.getError()) {
LOG.error(QueryContext.getRequestId() + " V2 SQL error during query execution", e,
"previously collected error(s):", QueryDataAnonymizer.anonymizeData(QueryContext.getError()));
} else {
LOG.error(LogUtils.getRequestId() + " V2 SQL error during query execution");
LOG.error(QueryContext.getRequestId() + " V2 SQL error during query execution", e);
}
logAndPublishMetrics(e);
return channel -> reportError(channel, e, isClientError(e) ? BAD_REQUEST : SERVICE_UNAVAILABLE);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@ public MatchPhrasePrefixQuery() {
.put("slop", (b, v) -> b.slop(
FunctionParameterRepository.convertIntValue(v, "slop")))
.put("zero_terms_query", (b, v) -> b.zeroTermsQuery(
org.opensearch.index.search.MatchQuery.ZeroTermsQuery.valueOf(valueOfToUpper(v))))
.put("boost", (b, v) -> b.boost(Float.parseFloat(v.stringValue())))
FunctionParameterRepository.convertZeroTermsQuery(v)))
.build());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ public MatchPhraseQuery() {
.put("slop", (b, v) -> b.slop(
FunctionParameterRepository.convertIntValue(v, "slop")))
.put("zero_terms_query", (b, v) -> b.zeroTermsQuery(
org.opensearch.index.search.MatchQuery.ZeroTermsQuery.valueOf(valueOfToUpper(v))))
FunctionParameterRepository.convertZeroTermsQuery(v)))
.build());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,17 @@ public class MatchQuery extends SingleFieldQuery<MatchQueryBuilder> {
public MatchQuery() {
super(ImmutableMap.<String, QueryBuilderStep<MatchQueryBuilder>>builder()
.put("analyzer", (b, v) -> b.analyzer(v.stringValue()))
.put("auto_generate_synonyms_phrase_query",
(b, v) -> b.autoGenerateSynonymsPhraseQuery(Boolean.parseBoolean(v.stringValue())))
.put("fuzziness", (b, v) -> b.fuzziness(valueOfToUpper(v)))
.put("max_expansions", (b, v) -> b.maxExpansions(Integer.parseInt(v.stringValue())))
.put("prefix_length", (b, v) -> b.prefixLength(Integer.parseInt(v.stringValue())))
.put("fuzzy_transpositions",
(b, v) -> b.fuzzyTranspositions(Boolean.parseBoolean(v.stringValue())))
.put("fuzzy_rewrite", (b, v) -> b.fuzzyRewrite(v.stringValue()))
.put("lenient", (b, v) -> b.lenient(Boolean.parseBoolean(v.stringValue())))
.put("operator", (b, v) -> b.operator(Operator.fromString(v.stringValue())))
.put("auto_generate_synonyms_phrase_query", (b, v) -> b.autoGenerateSynonymsPhraseQuery(
FunctionParameterRepository.convertBoolValue(v,"auto_generate_synonyms_phrase_query")))
.put("boost", (b, v) -> b.boost(
FunctionParameterRepository.convertFloatValue(v, "boost")))
.put("fuzziness", (b, v) -> b.fuzziness(FunctionParameterRepository.convertFuzziness(v)))
.put("fuzzy_rewrite", (b, v) -> b.fuzzyRewrite(
FunctionParameterRepository.checkRewrite(v, "fuzzy_rewrite")))
.put("fuzzy_transpositions", (b, v) -> b.fuzzyTranspositions(
FunctionParameterRepository.convertBoolValue(v, "fuzzy_transpositions")))
.put("lenient", (b, v) -> b.lenient(
FunctionParameterRepository.convertBoolValue(v, "lenient")))
.put("minimum_should_match", (b, v) -> b.minimumShouldMatch(v.stringValue()))
.put("max_expansions", (b, v) -> b.maxExpansions(
FunctionParameterRepository.convertIntValue(v, "max_expansions")))
Expand All @@ -39,8 +40,7 @@ public MatchQuery() {
.put("prefix_length", (b, v) -> b.prefixLength(
FunctionParameterRepository.convertIntValue(v, "prefix_length")))
.put("zero_terms_query", (b, v) -> b.zeroTermsQuery(
org.opensearch.index.search.MatchQuery.ZeroTermsQuery.valueOf(valueOfToUpper(v))))
.put("boost", (b, v) -> b.boost(Float.parseFloat(v.stringValue())))
FunctionParameterRepository.convertZeroTermsQuery(v)))
.build());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public MultiMatchQuery() {
FunctionParameterRepository.convertFloatValue(v, "tie_breaker")))
.put("type", (b, v) -> b.type(FunctionParameterRepository.convertType(v)))
.put("zero_terms_query", (b, v) -> b.zeroTermsQuery(
org.opensearch.index.search.MatchQuery.ZeroTermsQuery.valueOf(valueOfToUpper(v))))
FunctionParameterRepository.convertZeroTermsQuery(v)))
.build());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,12 @@ public QueryStringQuery() {
FunctionParameterRepository.convertIntValue(v, "phrase_slop")))
.put("quote_analyzer", (b, v) -> b.quoteAnalyzer(v.stringValue()))
.put("quote_field_suffix", (b, v) -> b.quoteFieldSuffix(v.stringValue()))
.put("rewrite", (b, v) -> b.rewrite(v.stringValue()))
.put("type", (b, v) -> b.type(MultiMatchQueryBuilder.Type.parse(valueOfToLower(v),
LoggingDeprecationHandler.INSTANCE)))
.put("tie_breaker", (b, v) -> b.tieBreaker(Float.parseFloat(v.stringValue())))
.put("time_zone", (b, v) -> b.timeZone(v.stringValue()))
.put("rewrite", (b, v) -> b.rewrite(
FunctionParameterRepository.checkRewrite(v, "rewrite")))
.put("tie_breaker", (b, v) -> b.tieBreaker(
FunctionParameterRepository.convertFloatValue(v, "tie_breaker")))
.put("time_zone", (b, v) -> b.timeZone(FunctionParameterRepository.checkTimeZone(v)))
.put("type", (b, v) -> b.type(FunctionParameterRepository.convertType(v)))
.build());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,15 @@

package org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance;

import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.function.BiFunction;
import java.util.stream.Collectors;
import lombok.RequiredArgsConstructor;
import org.opensearch.index.query.QueryBuilder;
import org.opensearch.sql.common.antlr.SyntaxCheckException;
import org.opensearch.sql.data.model.ExprValue;
import org.opensearch.sql.exception.SemanticCheckException;
import org.opensearch.sql.expression.Expression;
import org.opensearch.sql.expression.FunctionExpression;
import org.opensearch.sql.expression.NamedArgumentExpression;
import org.opensearch.sql.opensearch.storage.script.filter.lucene.LuceneQuery;
Expand All @@ -31,26 +27,43 @@ public abstract class RelevanceQuery<T extends QueryBuilder> extends LuceneQuery

@Override
public QueryBuilder build(FunctionExpression func) {
List<Expression> arguments = func.getArguments();
var arguments = func.getArguments().stream()
.map(a -> (NamedArgumentExpression)a).collect(Collectors.toList());
if (arguments.size() < 2) {
throw new SyntaxCheckException(
String.format("%s requires at least two parameters", getQueryName()));
}
NamedArgumentExpression field = (NamedArgumentExpression) arguments.get(0);
NamedArgumentExpression query = (NamedArgumentExpression) arguments.get(1);

// Aggregate parameters by name, so getting a Map<Name:String, List>
arguments.stream().collect(Collectors.groupingBy(a -> a.getArgName().toLowerCase()))
.forEach((k, v) -> {
if (v.size() > 1) {
throw new SemanticCheckException(
String.format("Parameter '%s' can only be specified once.", k));
}
});

// No validation that 'field' and 'fields' are present both, just pick a first occurrence
// Extract 'field'/'fields' and 'query'
var field = arguments.stream()
.filter(a -> a.getArgName().equalsIgnoreCase("field")
|| a.getArgName().equalsIgnoreCase("fields"))
.findFirst()
.orElseThrow(() -> new SyntaxCheckException("'field'/'fields' parameter is missing"));
var query = arguments.stream()
.filter(a -> a.getArgName().equalsIgnoreCase("query"))
.findFirst()
.orElseThrow(() -> new SyntaxCheckException("'query' parameter is missing"));
T queryBuilder = createQueryBuilder(field, query);

Iterator<Expression> iterator = arguments.listIterator(2);
Set<String> visitedParms = new HashSet();
arguments.removeIf(a -> a.getArgName().equalsIgnoreCase("field")
|| a.getArgName().equalsIgnoreCase("fields")
|| a.getArgName().equalsIgnoreCase("query"));

var iterator = arguments.listIterator();
while (iterator.hasNext()) {
NamedArgumentExpression arg = (NamedArgumentExpression) iterator.next();
NamedArgumentExpression arg = iterator.next();
String argNormalized = arg.getArgName().toLowerCase();
if (visitedParms.contains(argNormalized)) {
throw new SemanticCheckException(String.format("Parameter '%s' can only be specified once.",
argNormalized));
} else {
visitedParms.add(argNormalized);
}

if (!queryBuildActions.containsKey(argNormalized)) {
throw new SemanticCheckException(
Expand Down Expand Up @@ -79,12 +92,4 @@ protected abstract T createQueryBuilder(NamedArgumentExpression field,
protected interface QueryBuilderStep<T extends QueryBuilder> extends
BiFunction<T, ExprValue, T> {
}

public static String valueOfToUpper(ExprValue v) {
return v.stringValue().toUpperCase();
}

public static String valueOfToLower(ExprValue v) {
return v.stringValue().toLowerCase();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,23 @@ public class SimpleQueryStringQuery extends MultiFieldQuery<SimpleQueryStringBui
public SimpleQueryStringQuery() {
super(ImmutableMap.<String, QueryBuilderStep<SimpleQueryStringBuilder>>builder()
.put("analyzer", (b, v) -> b.analyzer(v.stringValue()))
.put("auto_generate_synonyms_phrase_query", (b, v) ->
b.autoGenerateSynonymsPhraseQuery(Boolean.parseBoolean(v.stringValue())))
.put("boost", (b, v) -> b.boost(Float.parseFloat(v.stringValue())))
.put("default_operator", (b, v) -> b.defaultOperator(Operator.fromString(v.stringValue())))
.put("flags", (b, v) -> b.flags(Arrays.stream(valueOfToUpper(v).split("\\|"))
.map(SimpleQueryStringFlag::valueOf)
.toArray(SimpleQueryStringFlag[]::new)))
.put("fuzzy_max_expansions", (b, v) ->
b.fuzzyMaxExpansions(Integer.parseInt(v.stringValue())))
.put("fuzzy_prefix_length", (b, v) ->
b.fuzzyPrefixLength(Integer.parseInt(v.stringValue())))
.put("fuzzy_transpositions", (b, v) ->
b.fuzzyTranspositions(Boolean.parseBoolean(v.stringValue())))
.put("lenient", (b, v) -> b.lenient(Boolean.parseBoolean(v.stringValue())))
.put("analyze_wildcard", (b, v) -> b.analyzeWildcard(
FunctionParameterRepository.convertBoolValue(v, "analyze_wildcard")))
.put("auto_generate_synonyms_phrase_query", (b, v) -> b.autoGenerateSynonymsPhraseQuery(
FunctionParameterRepository.convertBoolValue(v,"auto_generate_synonyms_phrase_query")))
.put("boost", (b, v) -> b.boost(
FunctionParameterRepository.convertFloatValue(v, "boost")))
.put("default_operator", (b, v) -> b.defaultOperator(
FunctionParameterRepository.convertOperator(v, "default_operator")))
.put("flags", (b, v) -> b.flags(FunctionParameterRepository.convertFlags(v)))
.put("fuzzy_max_expansions", (b, v) -> b.fuzzyMaxExpansions(
FunctionParameterRepository.convertIntValue(v, "fuzzy_max_expansions")))
.put("fuzzy_prefix_length", (b, v) -> b.fuzzyPrefixLength(
FunctionParameterRepository.convertIntValue(v, "fuzzy_prefix_length")))
.put("fuzzy_transpositions", (b, v) -> b.fuzzyTranspositions(
FunctionParameterRepository.convertBoolValue(v, "fuzzy_transpositions")))
.put("lenient", (b, v) -> b.lenient(
FunctionParameterRepository.convertBoolValue(v, "lenient")))
.put("minimum_should_match", (b, v) -> b.minimumShouldMatch(v.stringValue()))
.put("quote_field_suffix", (b, v) -> b.quoteFieldSuffix(v.stringValue()))
.build());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,39 @@ void match_invalid_parameter() {
assertTrue(msg.startsWith("Parameter invalid_parameter is invalid for match function."));
}

@Test
void match_disallow_duplicate_parameter() {
FunctionExpression expr = dsl.match(
dsl.namedArgument("field", literal("message")),
dsl.namedArgument("query", literal("search query")),
dsl.namedArgument("analyzer", literal("keyword")),
dsl.namedArgument("AnalYzer", literal("english")));
var msg = assertThrows(SemanticCheckException.class, () -> buildQuery(expr)).getMessage();
assertEquals("Parameter 'analyzer' can only be specified once.", msg);
}

@Test
void match_disallow_duplicate_query() {
FunctionExpression expr = dsl.match(
dsl.namedArgument("field", literal("message")),
dsl.namedArgument("query", literal("search query")),
dsl.namedArgument("analyzer", literal("keyword")),
dsl.namedArgument("QUERY", literal("something")));
var msg = assertThrows(SemanticCheckException.class, () -> buildQuery(expr)).getMessage();
assertEquals("Parameter 'query' can only be specified once.", msg);
}

@Test
void match_disallow_duplicate_field() {
FunctionExpression expr = dsl.match(
dsl.namedArgument("field", literal("message")),
dsl.namedArgument("query", literal("search query")),
dsl.namedArgument("analyzer", literal("keyword")),
dsl.namedArgument("Field", literal("something")));
var msg = assertThrows(SemanticCheckException.class, () -> buildQuery(expr)).getMessage();
assertEquals("Parameter 'field' can only be specified once.", msg);
}

@Test
void should_build_match_phrase_query_with_default_parameters() {
assertJsonEquals(
Expand Down Expand Up @@ -570,12 +603,13 @@ void should_build_match_phrase_query_with_custom_parameters() {
+ " \"analyzer\" : \"keyword\","
+ " \"slop\" : 2,\n"
+ " \"zero_terms_query\" : \"ALL\",\n"
+ " \"boost\" : 1.0\n"
+ " \"boost\" : 1.2\n"
+ " }\n"
+ " }\n"
+ "}",
buildQuery(
dsl.match_phrase(
dsl.namedArgument("boost", literal("1.2")),
dsl.namedArgument("field", literal("message")),
dsl.namedArgument("query", literal("search query")),
dsl.namedArgument("analyzer", literal("keyword")),
Expand Down Expand Up @@ -850,13 +884,53 @@ void relevancy_func_invalid_arg_values() {

var ztqTest = dsl.match_phrase(field, query,
dsl.namedArgument("zero_terms_query", literal("meow")));
var msg = assertThrows(IllegalArgumentException.class, () -> buildQuery(expr)).getMessage();
assertEquals("No enum constant org.opensearch.index.search.MatchQuery.ZeroTermsQuery.MEOW",
msg);
msg = assertThrows(RuntimeException.class, () -> buildQuery(ztqTest)).getMessage();
assertEquals(
"Invalid zero_terms_query value: 'meow'. Available values are: NONE, ALL, NULL.", msg);

var boostTest = dsl.match(field, query,
dsl.namedArgument("boost", literal("pewpew")));
msg = assertThrows(RuntimeException.class, () -> buildQuery(boostTest)).getMessage();
assertEquals(
"Invalid boost value: 'pewpew'. Accepts only floating point values greater than 0.", msg);

var boolTest = dsl.query_string(fields, query,
dsl.namedArgument("escape", literal("42")));
msg = assertThrows(RuntimeException.class, () -> buildQuery(boolTest)).getMessage();
assertEquals(
"Invalid escape value: '42'. Accepts only boolean values: 'true' or 'false'.", msg);

var typeTest = dsl.multi_match(fields, query,
dsl.namedArgument("type", literal("42")));
msg = assertThrows(RuntimeException.class, () -> buildQuery(typeTest)).getMessage();
assertTrue(msg.startsWith("Invalid type value: '42'. Available values are:"));

var operatorTest = dsl.simple_query_string(fields, query,
dsl.namedArgument("default_operator", literal("42")));
msg = assertThrows(RuntimeException.class, () -> buildQuery(operatorTest)).getMessage();
assertTrue(msg.startsWith("Invalid default_operator value: '42'. Available values are:"));

var flagsTest = dsl.simple_query_string(fields, query,
dsl.namedArgument("flags", literal("42")));
msg = assertThrows(RuntimeException.class, () -> buildQuery(flagsTest)).getMessage();
assertTrue(msg.startsWith("Invalid flags value: '42'. Available values are:"));

var fuzzinessTest = dsl.match_bool_prefix(field, query,
dsl.namedArgument("fuzziness", literal("AUTO:")));
msg = assertThrows(RuntimeException.class, () -> buildQuery(fuzzinessTest)).getMessage();
assertTrue(msg.startsWith("Invalid fuzziness value: 'AUTO:'. Available values are:"));

var rewriteTest = dsl.match_bool_prefix(field, query,
dsl.namedArgument("fuzzy_rewrite", literal("42")));
msg = assertThrows(RuntimeException.class, () -> buildQuery(rewriteTest)).getMessage();
assertTrue(msg.startsWith("Invalid fuzzy_rewrite value: '42'. Available values are:"));

var timezoneTest = dsl.query_string(fields, query,
dsl.namedArgument("time_zone", literal("42")));
msg = assertThrows(RuntimeException.class, () -> buildQuery(timezoneTest)).getMessage();
assertTrue(msg.startsWith("Invalid time_zone value: '42'."));
}



@Test
void should_build_match_bool_prefix_query_with_default_parameters() {
assertJsonEquals(
Expand All @@ -878,6 +952,14 @@ void should_build_match_bool_prefix_query_with_default_parameters() {
dsl.namedArgument("query", literal("search query")))));
}

@Test
void multi_match_missing_fields() {
var msg = assertThrows(SemanticCheckException.class, () ->
dsl.multi_match(
dsl.namedArgument("query", literal("search query")))).getMessage();
assertEquals("Expected type STRUCT instead of STRING for parameter #1", msg);
}

@Test
void should_build_match_phrase_prefix_query_with_default_parameters() {
assertJsonEquals(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,16 +72,6 @@ void throws_SemanticCheckException_when_same_argument_twice() {
assertEquals("Parameter 'boost' can only be specified once.", exception.getMessage());
}

@Test
void throws_SemanticCheckException_when_same_argument_twice() {
FunctionExpression expr = createCall(List.of(FIELD_ARG, QUERY_ARG,
namedArgument("boost", "2.3"),
namedArgument("boost", "2.4")));
SemanticCheckException exception =
assertThrows(SemanticCheckException.class, () -> query.build(expr));
assertEquals("Parameter 'boost' can only be specified once.", exception.getMessage());
}

@Test
void throws_SemanticCheckException_when_wrong_argument_name() {
FunctionExpression expr =
Expand Down

0 comments on commit 5861f36

Please sign in to comment.