Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support to filter traces using !~ #2410

Merged
merged 9 commits into from
May 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
## main / unreleased

* [ENHANCEMENT] Add `prefix` configuration option to `storage.trace.azure` and `storage.trace.gcs` [#2362](https://github.com/grafana/tempo/pull/2386) (@kousikmitra)
* [ENHANCEMENT] Add support to filter using negated regex operator `!~` [#2410](https://github.com/grafana/tempo/pull/2410) (@kousikmitra)
* [ENHANCEMENT] Add `prefix` configuration option to `storage.trace.azure` and `storage.trace.gcs` [#2386](https://github.com/grafana/tempo/pull/2386) (@kousikmitra)
* [ENHANCEMENT] Add `prefix` configuration option to `storage.trace.s3` [#2362](https://github.com/grafana/tempo/pull/2362) (@kousikmitra)
* [FEATURE] Add support for `q` query param in `/api/v2/search/<tag.name>/values` to filter results based on a TraceQL query [#2253](https://github.com/grafana/tempo/pull/2253) (@mapno)
* [ENHANCEMENT] Add `scope` parameter to `/api/search/tags` [#2282](https://github.com/grafana/tempo/pull/2282) (@joe-elliott)
Expand Down
1 change: 1 addition & 0 deletions docs/sources/tempo/traceql/_index.md
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ The implemented comparison operators are:
- `<` (less than)
- `<=` (less than or equal to)
- `=~` (regular expression)
- `!~` (negated regular expression)

TraceQL uses Golang regular expressions. Online regular expression testing sites like https://regex101.com/ are convenient to validate regular expressions used in TraceQL queries.

Expand Down
200 changes: 157 additions & 43 deletions pkg/parquetquery/predicate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,19 @@ import (
"github.com/stretchr/testify/require"
)

var _ Predicate = (*mockPredicate)(nil)

type mockPredicate struct {
ret bool
valCalled bool
pageCalled bool
chunkCalled bool
}

type testDictString struct {
S string `parquet:",dict"`
}

var _ Predicate = (*mockPredicate)(nil)

func newAlwaysTruePredicate() *mockPredicate {
return &mockPredicate{ret: true}
}
Expand All @@ -32,41 +36,140 @@ func (p *mockPredicate) KeepValue(parquet.Value) bool { p.valCalled
func (p *mockPredicate) KeepPage(parquet.Page) bool { p.pageCalled = true; return p.ret }
func (p *mockPredicate) KeepColumnChunk(parquet.ColumnChunk) bool { p.chunkCalled = true; return p.ret }

type predicateTestCase struct {
testName string
writeData func(w *parquet.Writer) //nolint:all
keptChunks int
keptPages int
keptValues int
predicate Predicate
}

func TestSubstringPredicate(t *testing.T) {
testCases := []predicateTestCase{
{
testName: "all chunks/pages/values inspected",
predicate: NewSubstringPredicate("b"),
keptChunks: 1,
keptPages: 1,
keptValues: 2,
writeData: func(w *parquet.Writer) { //nolint:all

require.NoError(t, w.Write(&testDictString{"abc"})) // kept
require.NoError(t, w.Write(&testDictString{"bcd"})) // kept
require.NoError(t, w.Write(&testDictString{"cde"})) // skipped
},
},
{
testName: "dictionary in the page header allows for skipping a page",
predicate: NewSubstringPredicate("x"), // Not present in any values
keptChunks: 1,
keptPages: 0,
keptValues: 0,
writeData: func(w *parquet.Writer) { //nolint:all
require.NoError(t, w.Write(&testDictString{"abc"}))
require.NoError(t, w.Write(&testDictString{"abc"}))
require.NoError(t, w.Write(&testDictString{"abc"}))
require.NoError(t, w.Write(&testDictString{"abc"}))
require.NoError(t, w.Write(&testDictString{"abc"}))
},
},
}

for _, tC := range testCases {
t.Run(tC.testName, func(t *testing.T) {
testPredicate(t, tC)
})
}
}

func TestNewRegexInPredicate(t *testing.T) {
testCases := []predicateTestCase{
{
testName: "all chunks/pages/values inspected",
predicate: func() Predicate {
pred, err := NewRegexInPredicate([]string{"a.*"})
require.NoError(t, err)

return pred
}(),
keptChunks: 1,
keptPages: 1,
keptValues: 2,
writeData: func(w *parquet.Writer) { //nolint:all
require.NoError(t, w.Write(&testDictString{"abc"})) // kept
require.NoError(t, w.Write(&testDictString{"acd"})) // kept
require.NoError(t, w.Write(&testDictString{"cde"})) // skipped
},
},
{
testName: "dictionary in the page header allows for skipping a page",
predicate: func() Predicate {
pred, err := NewRegexInPredicate([]string{"x.*"})
require.NoError(t, err)

return pred
}(), // Not present in any values
keptChunks: 1,
keptPages: 0,
keptValues: 0,
writeData: func(w *parquet.Writer) { //nolint:all
require.NoError(t, w.Write(&testDictString{"abc"}))
require.NoError(t, w.Write(&testDictString{"abc"}))
},
},
}

// Normal case - all chunks/pages/values inspected
testPredicate(t, predicateTestCase{
predicate: NewSubstringPredicate("b"),
keptChunks: 1,
keptPages: 1,
keptValues: 2,
writeData: func(w *parquet.Writer) { //nolint:all
type String struct {
S string `parquet:",dict"`
}
require.NoError(t, w.Write(&String{"abc"})) // kept
require.NoError(t, w.Write(&String{"bcd"})) // kept
require.NoError(t, w.Write(&String{"cde"})) // skipped
for _, tC := range testCases {
t.Run(tC.testName, func(t *testing.T) {
testPredicate(t, tC)
})
}
}

func TestNewRegexNotInPredicate(t *testing.T) {
testCases := []predicateTestCase{
{
testName: "all chunks/pages/values inspected",
predicate: func() Predicate {
pred, err := NewRegexNotInPredicate([]string{"a.*"})
require.NoError(t, err)

return pred
}(),
keptChunks: 1,
keptPages: 1,
keptValues: 2,
writeData: func(w *parquet.Writer) { //nolint:all
require.NoError(t, w.Write(&testDictString{"abc"})) // skipped
require.NoError(t, w.Write(&testDictString{"acd"})) // skipped
require.NoError(t, w.Write(&testDictString{"cde"})) // kept
require.NoError(t, w.Write(&testDictString{"xde"})) // kept
},
},
})

// Dictionary in the page header allows for skipping a page
testPredicate(t, predicateTestCase{
predicate: NewSubstringPredicate("x"), // Not present in any values
keptChunks: 1,
keptPages: 0,
keptValues: 0,
writeData: func(w *parquet.Writer) { //nolint:all
type dictString struct {
S string `parquet:",dict"`
}
require.NoError(t, w.Write(&dictString{"abc"}))
require.NoError(t, w.Write(&dictString{"abc"}))
require.NoError(t, w.Write(&dictString{"abc"}))
require.NoError(t, w.Write(&dictString{"abc"}))
require.NoError(t, w.Write(&dictString{"abc"}))
{
testName: "dictionary in the page header allows for skipping a page",
predicate: func() Predicate {
pred, err := NewRegexNotInPredicate([]string{"x.*"})
require.NoError(t, err)

return pred
}(), // Not present in any values
keptChunks: 1,
keptPages: 0,
keptValues: 0,
writeData: func(w *parquet.Writer) { //nolint:all
require.NoError(t, w.Write(&testDictString{"xyz"}))
require.NoError(t, w.Write(&testDictString{"xyz"}))
},
},
})
}

for _, tC := range testCases {
t.Run(tC.testName, func(t *testing.T) {
testPredicate(t, tC)
})
}
}

// TestOrPredicateCallsKeepColumnChunk ensures that the OrPredicate calls
Expand Down Expand Up @@ -120,17 +223,10 @@ func TestOrPredicateCallsKeepColumnChunk(t *testing.T) {
}
}

type predicateTestCase struct {
writeData func(w *parquet.Writer) //nolint:all
keptChunks int
keptPages int
keptValues int
predicate Predicate
}

// testPredicate by writing data and then iterating the column. The data model
// must contain a single column.
// testPredicate by writing data and then iterating the column.
// The data model must contain a single column.
func testPredicate(t *testing.T, tc predicateTestCase) {
t.Helper()
buf := new(bytes.Buffer)
w := parquet.NewWriter(buf)
tc.writeData(w)
Expand Down Expand Up @@ -190,3 +286,21 @@ func BenchmarkStringInPredicate(b *testing.B) {
}
}
}

func BenchmarkRegexInPredicate(b *testing.B) {
p, err := NewRegexInPredicate([]string{"abc"})
require.NoError(b, err)

s := make([]parquet.Value, 1000)
for i := 0; i < 1000; i++ {
s[i] = parquet.ValueOf(uuid.New().String())
}

b.ResetTimer()

for i := 0; i < b.N; i++ {
for _, ss := range s {
p.KeepValue(ss)
}
}
}
45 changes: 28 additions & 17 deletions pkg/parquetquery/predicates.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,21 +82,33 @@ func (p *StringInPredicate) KeepPage(page pq.Page) bool {
return p.helper.keepPage(page, p.KeepValue)
}

// RegexInPredicate checks for match against any of the given regexs.
// Memoized and resets on each row group.
type RegexInPredicate struct {
regs []*regexp.Regexp
matches map[string]bool
type regexPredicate struct {
regs []*regexp.Regexp
matches map[string]bool
shouldMatch bool

helper DictionaryPredicateHelper
}

var _ Predicate = (*RegexInPredicate)(nil)
var _ Predicate = (*regexPredicate)(nil)

// NewRegexInPredicate checks for match against any of the given regexs.
// Memoized and resets on each row group.
func NewRegexInPredicate(regs []string) (Predicate, error) {
return newRegexPredicate(regs, true)
}

// NewRegexNotInPredicate checks for values that not match against any of the given regexs.
// Memoized and resets on each row group.
func NewRegexNotInPredicate(regs []string) (Predicate, error) {
return newRegexPredicate(regs, false)
}

func NewRegexInPredicate(regs []string) (*RegexInPredicate, error) {
p := &RegexInPredicate{
regs: make([]*regexp.Regexp, 0, len(regs)),
matches: make(map[string]bool),
func newRegexPredicate(regs []string, shouldMatch bool) (Predicate, error) {
p := &regexPredicate{
regs: make([]*regexp.Regexp, 0, len(regs)),
matches: make(map[string]bool),
shouldMatch: shouldMatch,
}
for _, reg := range regs {
r, err := regexp.Compile(reg)
Expand All @@ -108,17 +120,16 @@ func NewRegexInPredicate(regs []string) (*RegexInPredicate, error) {
return p, nil
}

func (p *RegexInPredicate) String() string {
func (p *regexPredicate) String() string {
var strings string
for _, s := range p.regs {
strings += fmt.Sprintf("%s, ", s.String())
}
return fmt.Sprintf("RegexInPredicate{%s}", strings)
}

func (p *RegexInPredicate) keep(v *pq.Value) bool {
func (p *regexPredicate) keep(v *pq.Value) bool {
if v.IsNull() {
// Null
return false
}

Expand All @@ -129,7 +140,7 @@ func (p *RegexInPredicate) keep(v *pq.Value) bool {

matched := false
for _, r := range p.regs {
if r.MatchString(s) {
if r.MatchString(s) == p.shouldMatch {
kousikmitra marked this conversation as resolved.
Show resolved Hide resolved
matched = true
break
}
Expand All @@ -139,7 +150,7 @@ func (p *RegexInPredicate) keep(v *pq.Value) bool {
return matched
}

func (p *RegexInPredicate) KeepColumnChunk(cc pq.ColumnChunk) bool {
func (p *regexPredicate) KeepColumnChunk(cc pq.ColumnChunk) bool {
p.helper.setNewRowGroup()

// Reset match cache on each row group change
Expand All @@ -149,11 +160,11 @@ func (p *RegexInPredicate) KeepColumnChunk(cc pq.ColumnChunk) bool {
return true
}

func (p *RegexInPredicate) KeepValue(v pq.Value) bool {
func (p *regexPredicate) KeepValue(v pq.Value) bool {
return p.keep(&v)
}

func (p *RegexInPredicate) KeepPage(page pq.Page) bool {
func (p *regexPredicate) KeepPage(page pq.Page) bool {
return p.helper.keepPage(page, p.KeepValue)
}

Expand Down
3 changes: 1 addition & 2 deletions pkg/traceql/ast_validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,7 @@ func (o BinaryOperation) validate() error {
}

switch o.Op {
case OpNotRegex,
OpSpansetChild,
case OpSpansetChild,
OpSpansetDescendant,
OpSpansetSibling:
return newUnsupportedError(fmt.Sprintf("binary operation (%v)", o.Op))
Expand Down
6 changes: 3 additions & 3 deletions pkg/traceql/test_examples.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ valid:
- '{ 1 <= 2 }'
- '{ -1 = 2 }'
- '{ "test" =~ "test" }'
- '{ "test" !~ "test" }'
mdisibio marked this conversation as resolved.
Show resolved Hide resolved
- '{ "test" = "test" }'
- '{ "test" != "test" }'
- '{ .a }'
Expand All @@ -27,6 +28,7 @@ valid:
- '{ .a <= 2 }'
- '{ -.a = 2 }'
- '{ .a =~ "test" }'
- '{ .a !~ "test" }'
- '{ .a = "test" }'
- '{ .a != "test" }'
- '{ resource.a != 3 }'
Expand Down Expand Up @@ -159,6 +161,7 @@ validate_fails:
- '{ 1 > ok }'
- '{ 1 = name }'
- '{ 1 =~ 2}'
- '{ 1 !~ 2}'
- '{ 1 && "foo" }'
- '{ 1 || ok }'
- '{ true || 1.1 }'
Expand Down Expand Up @@ -228,9 +231,6 @@ unsupported:
- '{ !parent = nil }'
# nil - will be valid when supported
- '{ .foo = nil }'
# binary operations - will be valid when supported
- '{ "test" !~ "test" }'
- '{ .a !~ "test" }'
# childCount - will be valid when supported
- '{ 1 = childCount }'
# childCount - will be invalid when supported
Expand Down
Loading