Skip to content

Commit

Permalink
Adding Highlight support in PPL. Adding wildcard support for highligh…
Browse files Browse the repository at this point in the history
…t in SQL and PPL.

Signed-off-by: forestmvey <[email protected]>
  • Loading branch information
forestmvey committed Sep 2, 2022
1 parent c2973a3 commit 4fa96cf
Show file tree
Hide file tree
Showing 30 changed files with 632 additions and 190 deletions.
23 changes: 23 additions & 0 deletions core/src/main/java/org/opensearch/sql/analysis/Analyzer.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import static org.opensearch.sql.ast.tree.Sort.NullOrder.NULL_LAST;
import static org.opensearch.sql.ast.tree.Sort.SortOrder.ASC;
import static org.opensearch.sql.ast.tree.Sort.SortOrder.DESC;
import static org.opensearch.sql.data.type.ExprCoreType.STRING;
import static org.opensearch.sql.data.type.ExprCoreType.STRUCT;
import static org.opensearch.sql.utils.MLCommonsConstants.RCF_ANOMALOUS;
import static org.opensearch.sql.utils.MLCommonsConstants.RCF_ANOMALY_GRADE;
Expand All @@ -31,8 +32,10 @@
import org.opensearch.sql.analysis.symbol.Namespace;
import org.opensearch.sql.analysis.symbol.Symbol;
import org.opensearch.sql.ast.AbstractNodeVisitor;
import org.opensearch.sql.ast.expression.Alias;
import org.opensearch.sql.ast.expression.Argument;
import org.opensearch.sql.ast.expression.Field;
import org.opensearch.sql.ast.expression.HighlightFunction;
import org.opensearch.sql.ast.expression.Let;
import org.opensearch.sql.ast.expression.Literal;
import org.opensearch.sql.ast.expression.Map;
Expand All @@ -43,6 +46,7 @@
import org.opensearch.sql.ast.tree.Eval;
import org.opensearch.sql.ast.tree.Filter;
import org.opensearch.sql.ast.tree.Head;
import org.opensearch.sql.ast.tree.Highlight;
import org.opensearch.sql.ast.tree.Kmeans;
import org.opensearch.sql.ast.tree.Limit;
import org.opensearch.sql.ast.tree.Parse;
Expand All @@ -55,6 +59,7 @@
import org.opensearch.sql.ast.tree.Sort.SortOption;
import org.opensearch.sql.ast.tree.UnresolvedPlan;
import org.opensearch.sql.ast.tree.Values;
import org.opensearch.sql.common.utils.StringUtils;
import org.opensearch.sql.data.model.ExprMissingValue;
import org.opensearch.sql.data.type.ExprCoreType;
import org.opensearch.sql.exception.SemanticCheckException;
Expand All @@ -71,6 +76,7 @@
import org.opensearch.sql.planner.logical.LogicalDedupe;
import org.opensearch.sql.planner.logical.LogicalEval;
import org.opensearch.sql.planner.logical.LogicalFilter;
import org.opensearch.sql.planner.logical.LogicalHighlight;
import org.opensearch.sql.planner.logical.LogicalLimit;
import org.opensearch.sql.planner.logical.LogicalMLCommons;
import org.opensearch.sql.planner.logical.LogicalPlan;
Expand Down Expand Up @@ -329,6 +335,23 @@ public LogicalPlan visitEval(Eval node, AnalysisContext context) {
return new LogicalEval(child, expressionsBuilder.build());
}

/**
* Build {@link LogicalHighlight}.
*/
@Override
public LogicalPlan visitHighlight(Highlight node, AnalysisContext context) {
LogicalPlan child = node.getChild().get(0).accept(this, context);

TypeEnvironment env = context.peek();
env.define(new Symbol(Namespace.FIELD_NAME,
(((Alias) node.getExpression()).getName())), STRING);

HighlightFunction unresolved = (HighlightFunction) ((Alias)node.getExpression()).getDelegated();
Expression field = expressionAnalyzer.analyze(unresolved.getHighlightField(), context);
return new LogicalHighlight(child, field);
}


/**
* Build {@link ParseExpression} to context and skip to child nodes.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,13 @@
import org.opensearch.sql.ast.expression.WindowFunction;
import org.opensearch.sql.ast.expression.Xor;
import org.opensearch.sql.common.antlr.SyntaxCheckException;
import org.opensearch.sql.common.utils.StringUtils;
import org.opensearch.sql.data.model.ExprValueUtils;
import org.opensearch.sql.data.type.ExprCoreType;
import org.opensearch.sql.data.type.ExprType;
import org.opensearch.sql.exception.SemanticCheckException;
import org.opensearch.sql.expression.DSL;
import org.opensearch.sql.expression.Expression;
import org.opensearch.sql.expression.HighlightExpression;
import org.opensearch.sql.expression.LiteralExpression;
import org.opensearch.sql.expression.NamedArgumentExpression;
import org.opensearch.sql.expression.NamedExpression;
Expand Down Expand Up @@ -191,9 +192,10 @@ public Expression visitWindowFunction(WindowFunction node, AnalysisContext conte
}

@Override
public Expression visitHighlight(HighlightFunction node, AnalysisContext context) {
public Expression visitHighlightFunction(HighlightFunction node, AnalysisContext context) {
Expression expr = node.getHighlightField().accept(this, context);
return new HighlightExpression(expr);
String highlightStr = "highlight(" + StringUtils.unquoteText(expr.toString()) + ")";
return new ReferenceExpression(highlightStr, ExprCoreType.STRING);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import org.opensearch.sql.ast.tree.Eval;
import org.opensearch.sql.ast.tree.Filter;
import org.opensearch.sql.ast.tree.Head;
import org.opensearch.sql.ast.tree.Highlight;
import org.opensearch.sql.ast.tree.Kmeans;
import org.opensearch.sql.ast.tree.Limit;
import org.opensearch.sql.ast.tree.Parse;
Expand Down Expand Up @@ -256,7 +257,11 @@ public T visitAD(AD node, C context) {
return visitChildren(node, context);
}

public T visitHighlight(HighlightFunction node, C context) {
public T visitHighlightFunction(HighlightFunction node, C context) {
return visitChildren(node, context);
}

public T visitHighlight(Highlight node, C context) {
return visitChildren(node, context);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public class HighlightFunction extends UnresolvedExpression {

@Override
public <T, C> T accept(AbstractNodeVisitor<T, C> nodeVisitor, C context) {
return nodeVisitor.visitHighlight(this, context);
return nodeVisitor.visitHighlightFunction(this, context);
}

@Override
Expand Down
45 changes: 45 additions & 0 deletions core/src/main/java/org/opensearch/sql/ast/tree/Highlight.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.ast.tree;

import com.google.common.collect.ImmutableList;
import java.util.List;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.Setter;
import lombok.ToString;
import org.opensearch.sql.ast.AbstractNodeVisitor;
import org.opensearch.sql.ast.expression.UnresolvedExpression;

/**
* AST node represent Highlight operation.
*/
@Getter
@Setter
@ToString
@EqualsAndHashCode(callSuper = false)
@RequiredArgsConstructor
public class Highlight extends UnresolvedPlan {
private final UnresolvedExpression expression;
private UnresolvedPlan child;

@Override
public Highlight attach(UnresolvedPlan child) {
this.child = child;
return this;
}

@Override
public List<UnresolvedPlan> getChild() {
return ImmutableList.of(this.child);
}

@Override
public <T, C> T accept(AbstractNodeVisitor<T, C> nodeVisitor, C context) {
return nodeVisitor.visitHighlight(this, context);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,6 @@ public T visitNamed(NamedExpression node, C context) {
return node.getDelegated().accept(this, context);
}

public T visitHighlight(HighlightExpression node, C context) {
return visitNode(node, context);
}

public T visitReference(ReferenceExpression node, C context) {
return visitNode(node, context);
}
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,11 @@
import java.util.Map;
import java.util.stream.Collectors;
import lombok.experimental.UtilityClass;
import org.opensearch.sql.ast.dsl.AstDSL;
import org.opensearch.sql.data.model.ExprValue;
import org.opensearch.sql.data.type.ExprCoreType;
import org.opensearch.sql.data.type.ExprType;
import org.opensearch.sql.expression.Expression;
import org.opensearch.sql.expression.FunctionExpression;
import org.opensearch.sql.expression.HighlightExpression;
import org.opensearch.sql.expression.NamedArgumentExpression;
import org.opensearch.sql.expression.env.Environment;

Expand Down Expand Up @@ -51,14 +49,6 @@ public void register(BuiltinFunctionRepository repository) {
repository.register(match_phrase(BuiltinFunctionName.MATCH_PHRASE));
repository.register(match_phrase(BuiltinFunctionName.MATCHPHRASE));
repository.register(match_phrase_prefix());
repository.register(highlight());
}

private static FunctionResolver highlight() {
FunctionName functionName = BuiltinFunctionName.HIGHLIGHT.getName();
FunctionSignature functionSignature = new FunctionSignature(functionName, List.of(STRING));
FunctionBuilder functionBuilder = arguments -> new HighlightExpression(arguments.get(0));
return new FunctionResolver(functionName, ImmutableMap.of(functionSignature, functionBuilder));
}

private static FunctionResolver match_bool_prefix() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.planner.physical;

import static org.opensearch.sql.data.type.ExprCoreType.STRING;
import static org.opensearch.sql.data.type.ExprCoreType.STRUCT;
import static org.opensearch.sql.expression.env.Environment.extendEnv;

import com.google.common.collect.ImmutableMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.opensearch.sql.common.utils.StringUtils;
import org.opensearch.sql.data.model.ExprTupleValue;
import org.opensearch.sql.data.model.ExprValue;
import org.opensearch.sql.data.model.ExprValueUtils;
import org.opensearch.sql.expression.DSL;
import org.opensearch.sql.expression.Expression;
import org.opensearch.sql.expression.ReferenceExpression;
import org.opensearch.sql.expression.env.Environment;

/**
* HighlightOperator evaluates the {@link HighlightOperator#highlight} to put result
* into the output. Highlight fields in input are matched to the appropriate output.
* Direct mapping between input and output, as well as partial mapping is made
* dependent on highlight expression.
*
*/
@EqualsAndHashCode
public class HighlightOperator extends PhysicalPlan {
@Getter
private final PhysicalPlan input;
@Getter
private final Expression highlight;

public HighlightOperator(PhysicalPlan input, Expression highlight) {
this.input = input;
this.highlight = highlight;
}

@Override
public <R, C> R accept(PhysicalPlanNodeVisitor<R, C> visitor, C context) {
return visitor.visitHighlight(this, context);
}

@Override
public boolean hasNext() {
return input.hasNext();
}

@Override
public ExprValue next() {
ExprValue inputValue = input.next();
Pair<String, ExprValue> evalMap = mapHighlight(inputValue.bindingTuples());

if (STRUCT == inputValue.type()) {
ImmutableMap.Builder<String, ExprValue> resultBuilder = new ImmutableMap.Builder<>();
Map<String, ExprValue> tupleValue = ExprValueUtils.getTupleValue(inputValue);
for (Map.Entry<String, ExprValue> valueEntry : tupleValue.entrySet()) {
resultBuilder.put(valueEntry);
}
resultBuilder.put(evalMap);
return ExprTupleValue.fromExprValueMap(resultBuilder.build());
} else {
return inputValue;
}
}

/**
* Evaluate the expression in the {@link HighlightOperator#highlight} with {@link Environment}.
* @param env {@link Environment}
* @return The mapping of reference and {@link ExprValue} for expression.
*/
private Pair<String, ExprValue> mapHighlight(Environment<Expression, ExprValue> env) {
String osHighlightKey = "_highlight";
if (!highlight.toString().contains("*")) {
osHighlightKey += "." + StringUtils.unquoteText(highlight.toString());
}

ReferenceExpression osOutputVar = DSL.ref(osHighlightKey, STRING);
ExprValue value = osOutputVar.valueOf(env);

// In the event of multiple returned highlights and wildcard being
// used in conjunction with other highlight calls, we need to ensure
// only wildcard regex matching is mapped to wildcard call.
if (StringUtils.unquoteText(highlight.toString()).matches("(.+\\*)|(\\*.+)")
&& value.type() == STRUCT) {
value = new ExprTupleValue(
new LinkedHashMap<String, ExprValue>(value.tupleValue()
.entrySet()
.stream()
.filter(s -> s.getKey().matches(
StringUtils.unquoteText(
highlight.toString().replace("*", "(.*)"))))
.collect(Collectors.toMap(
e -> e.getKey(),
e -> e.getValue()))));
}

String sqlHighlightKey = "highlight(" + StringUtils.unquoteText(highlight.toString()) + ")";
ReferenceExpression sqlOutputVar = DSL.ref(sqlHighlightKey, STRING);

// Add mapping for sql output and opensearch returned highlight fields
extendEnv(env, sqlOutputVar, value);

return new ImmutablePair<>(sqlOutputVar.toString(), value);
}

@Override
public List<PhysicalPlan> getChild() {
return List.of(this.input);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -79,4 +79,8 @@ public R visitMLCommons(PhysicalPlan node, C context) {
public R visitAD(PhysicalPlan node, C context) {
return visitNode(node, context);
}

public R visitHighlight(PhysicalPlan node, C context) {
return visitNode(node, context);
}
}
Loading

0 comments on commit 4fa96cf

Please sign in to comment.