Skip to content

Commit

Permalink
Add ByteMatch hashCode() to reduce transitions object count. (#199)
Browse files Browse the repository at this point in the history
Add ByteMatch hashCode() to reduce transitions object count.

It takes many hours to compute the machine complexity of this rule
on a laptop:

{
    "field1": [{
        "numeric": ["<=", 120.0]
    }],
    "field2": [{
        "numeric": [">", 300.0]
    }],
    "field3": [{
        "numeric": ["=", 60.0]
    }],
    "field4": [{
        "numeric": ["<", 60.0]
    }],
    "field5": [{
        "numeric": ["<=", 60.0]
    }]
}

This is because numeric matchers are compiles as ranges for comparison.
The ranges create a sequence of numbers that then get added into the
byte machine however most of these transition to a small number
of state. See `ByteMachine.addRangePattern` for more details.

Given that the numbers are only transition to handful of end states,
we should have been merging the transtion within `ByteMap.updateTransitions`.
However the merge relies on transitions being comparable which
isn't the case for ByteMap.

After adding hashcode() and equals(), we are not able to dedupe and
avoid creating duplicates. This offers nominal benefit in rule matching
latency but has notable improvement for (1) rule addition / removal time,
and (2) when comparing the machine size / complexity.
  • Loading branch information
baldawar authored Nov 1, 2024
1 parent 28c3d81 commit 4c883d7
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 4 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
<groupId>software.amazon.event.ruler</groupId>
<artifactId>event-ruler</artifactId>
<name>Event Ruler</name>
<version>1.8.0</version>
<version>1.8.1</version>
<description>Event Ruler is a Java library that allows matching Rules to Events. An event is a list of fields,
which may be given as name/value pairs or as a JSON object. A rule associates event field names with lists of
possible values. There are two reasons to use Ruler: 1/ It's fast; the time it takes to match Events doesn't
Expand Down
18 changes: 18 additions & 0 deletions src/main/software/amazon/event/ruler/ByteMatch.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package software.amazon.event.ruler;

import java.util.Collections;
import java.util.Objects;
import java.util.Set;

/**
Expand Down Expand Up @@ -77,4 +78,21 @@ public void gatherObjects(Set<Object> objectSet, int maxObjectCount) {
public String toString() {
return "BM: HC=" + hashCode() + " P=" + pattern + "(" + pattern.pattern() + ") NNS=" + nextNameState;
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
ByteMatch byteMatch = (ByteMatch) o;
return Objects.equals(pattern, byteMatch.pattern) && Objects.equals(nextNameState, byteMatch.nextNameState);
}

@Override
public int hashCode() {
return Objects.hash(pattern, nextNameState);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,7 @@ public void testEvaluateAnythingButWildcardMultiplePatternsViaSet() {
ByteMachine machine = new ByteMachine();
machine.addPattern(Patterns.anythingButWildcard(new HashSet<>(Arrays.asList("a*aaa", "aa*aa"))));
// "aaaa" is matched by 7 wildcard prefixes: "a*", "a*a", "a*aa", "a*aaa", "aa*", "aa*a", "aa*aa"
assertEquals(7, machine.evaluateComplexity(evaluator));
assertEquals(6, machine.evaluateComplexity(evaluator));
}

@Test
Expand All @@ -520,6 +520,38 @@ public void testEvaluateWildcardWithAnythingButWildcard() {
Patterns.anythingButWildcard("aa*aa"));
}

/**
* Make sure we do not trigger a state explosion when evaluating complexity for rules
* with numeric matchers
*/
@Test(timeout = 250)
public void testEvaluateForMultipleNumericMatchers() throws Exception {
String rule = "{\n" +
" \"field1\": [{\n" +
" \"numeric\": [\"<=\", 120.0]\n" +
" }],\n" +
" \"field2\": [{\n" +
" \"numeric\": [\">\", 300.0]\n" +
" }],\n" +
" \"field3\": [{\n" +
" \"numeric\": [\"=\", 60.0]\n" +
" }],\n" +
" \"field4\": [{\n" +
" \"numeric\": [\"<\", 60.0]\n" +
" }],\n" +
" \"field5\": [{\n" +
" \"numeric\": [\"<=\", 60.0]\n" +
" }]\n" +
"}";
Machine machine = new Machine.Builder().withAdditionalNameStateReuse(true).build();
machine.addRule("rule", rule);
assertEquals(0, machine.evaluateComplexity(evaluator));

machine = new Machine.Builder().withAdditionalNameStateReuse(false).build();
machine.addRule("rule", rule);
assertEquals(0, machine.evaluateComplexity(evaluator));
}

private void testPatternPermutations(int expectedComplexity, Patterns ... patterns) {
ByteMachine machine = new ByteMachine();
List<Patterns[]> patternPermutations = generateAllPermutations(patterns);
Expand Down
4 changes: 2 additions & 2 deletions src/test/software/amazon/event/ruler/MachineTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2418,7 +2418,7 @@ public void testSuffixChineseMatch() throws Exception {
assertEquals(1, matchRules.size());
}

@Test(timeout = 500)
@Test(timeout = 250)
public void testApproximateSizeDoNotTakeForeverForRulesWithNumericMatchers() throws Exception {
Machine machine = new Machine();
machine.addRule("rule",
Expand All @@ -2428,7 +2428,7 @@ public void testApproximateSizeDoNotTakeForeverForRulesWithNumericMatchers() thr
" \"c\": [{ \"numeric\": [\">\", 50] }]\n" +
"}");

assertEquals(3299, machine.approximateObjectCount(10000));
assertEquals(52, machine.approximateObjectCount(10000));
}

@Test
Expand Down

0 comments on commit 4c883d7

Please sign in to comment.