Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support to filter traces using !~ #2410

Merged
merged 9 commits into from
May 2, 2023
Merged
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
## main / unreleased

* [ENHANCEMENT] Add `prefix` configuration option to `storage.trace.azure` and `storage.trace.gcs` [#2362](https://github.com/grafana/tempo/pull/2386) (@kousikmitra)
* [ENHANCEMENT] Add support to filter using negated regex operator `!~` [#2410](https://github.com/grafana/tempo/pull/2410) (@kousikmitra)
* [ENHANCEMENT] Add `prefix` configuration option to `storage.trace.azure` and `storage.trace.gcs` [#2386](https://github.com/grafana/tempo/pull/2386) (@kousikmitra)
* [ENHANCEMENT] Add `prefix` configuration option to `storage.trace.s3` [#2362](https://github.com/grafana/tempo/pull/2362) (@kousikmitra)
* [FEATURE] Add support for `q` query param in `/api/v2/search/<tag.name>/values` to filter results based on a TraceQL query [#2253](https://github.com/grafana/tempo/pull/2253) (@mapno)
* [ENHANCEMENT] Add `scope` parameter to `/api/search/tags` [#2282](https://github.com/grafana/tempo/pull/2282) (@joe-elliott)
Expand Down
1 change: 1 addition & 0 deletions docs/sources/tempo/traceql/_index.md
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ The implemented comparison operators are:
- `<` (less than)
- `<=` (less than or equal to)
- `=~` (regular expression)
- `!~` (negated regular expression)

TraceQL uses Golang regular expressions. Online regular expression testing sites like https://regex101.com/ are convenient to validate regular expressions used in TraceQL queries.

Expand Down
188 changes: 147 additions & 41 deletions pkg/parquetquery/predicate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (

"github.com/google/uuid"
"github.com/segmentio/parquet-go"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

Expand All @@ -32,41 +33,153 @@ func (p *mockPredicate) KeepValue(parquet.Value) bool { p.valCalled
func (p *mockPredicate) KeepPage(parquet.Page) bool { p.pageCalled = true; return p.ret }
func (p *mockPredicate) KeepColumnChunk(parquet.ColumnChunk) bool { p.chunkCalled = true; return p.ret }

type predicateTestCase struct {
testName string
writeData func(w *parquet.Writer) //nolint:all
keptChunks int
keptPages int
keptValues int
predicate Predicate
}

func TestSubstringPredicate(t *testing.T) {
testCases := []predicateTestCase{
{
testName: "all chunks/pages/values inspected",
predicate: NewSubstringPredicate("b"),
keptChunks: 1,
keptPages: 1,
keptValues: 2,
writeData: func(w *parquet.Writer) { //nolint:all
type String struct {
S string `parquet:",dict"`
}
require.NoError(t, w.Write(&String{"abc"})) // kept
require.NoError(t, w.Write(&String{"bcd"})) // kept
require.NoError(t, w.Write(&String{"cde"})) // skipped
},
},
{
testName: "dictionary in the page header allows for skipping a page",
predicate: NewSubstringPredicate("x"), // Not present in any values
keptChunks: 1,
keptPages: 0,
keptValues: 0,
writeData: func(w *parquet.Writer) { //nolint:all
type dictString struct {
mdisibio marked this conversation as resolved.
Show resolved Hide resolved
S string `parquet:",dict"`
}
require.NoError(t, w.Write(&dictString{"abc"}))
require.NoError(t, w.Write(&dictString{"abc"}))
require.NoError(t, w.Write(&dictString{"abc"}))
require.NoError(t, w.Write(&dictString{"abc"}))
require.NoError(t, w.Write(&dictString{"abc"}))
},
},
}

for _, tC := range testCases {
t.Run(tC.testName, func(t *testing.T) {
testPredicate(t, tC)
})
}
}

func TestNewRegexInPredicate(t *testing.T) {
testCases := []predicateTestCase{
{
testName: "all chunks/pages/values inspected",
predicate: func() Predicate {
pred, err := NewRegexInPredicate([]string{"a.*"})
assert.NoError(t, err)
mdisibio marked this conversation as resolved.
Show resolved Hide resolved
return pred
}(),
keptChunks: 1,
keptPages: 1,
keptValues: 2,
writeData: func(w *parquet.Writer) { //nolint:all
type String struct {
S string `parquet:",dict"`
}
require.NoError(t, w.Write(&String{"abc"})) // kept
require.NoError(t, w.Write(&String{"acd"})) // kept
require.NoError(t, w.Write(&String{"cde"})) // skipped
},
},
{
testName: "dictionary in the page header allows for skipping a page",
predicate: func() Predicate {
pred, err := NewRegexInPredicate([]string{"x.*"})
assert.NoError(t, err)
return pred
}(), // Not present in any values
keptChunks: 1,
keptPages: 0,
keptValues: 0,
writeData: func(w *parquet.Writer) { //nolint:all
type dictString struct {
S string `parquet:",dict"`
}
require.NoError(t, w.Write(&dictString{"abc"}))
require.NoError(t, w.Write(&dictString{"abc"}))
},
},
}

// Normal case - all chunks/pages/values inspected
testPredicate(t, predicateTestCase{
predicate: NewSubstringPredicate("b"),
keptChunks: 1,
keptPages: 1,
keptValues: 2,
writeData: func(w *parquet.Writer) { //nolint:all
type String struct {
S string `parquet:",dict"`
}
require.NoError(t, w.Write(&String{"abc"})) // kept
require.NoError(t, w.Write(&String{"bcd"})) // kept
require.NoError(t, w.Write(&String{"cde"})) // skipped
for _, tC := range testCases {
t.Run(tC.testName, func(t *testing.T) {
testPredicate(t, tC)
})
}
}

func TestNewRegexNotInPredicate(t *testing.T) {
testCases := []predicateTestCase{
{
testName: "all chunks/pages/values inspected",
predicate: func() Predicate {
pred, err := NewRegexNotInPredicate([]string{"a.*"})
assert.NoError(t, err)
return pred
}(),
keptChunks: 1,
keptPages: 1,
keptValues: 2,
writeData: func(w *parquet.Writer) { //nolint:all
type String struct {
S string `parquet:",dict"`
}
require.NoError(t, w.Write(&String{"abc"})) // skipped
require.NoError(t, w.Write(&String{"acd"})) // skipped
require.NoError(t, w.Write(&String{"cde"})) // kept
require.NoError(t, w.Write(&String{"xde"})) // kept
},
},
})

// Dictionary in the page header allows for skipping a page
testPredicate(t, predicateTestCase{
predicate: NewSubstringPredicate("x"), // Not present in any values
keptChunks: 1,
keptPages: 0,
keptValues: 0,
writeData: func(w *parquet.Writer) { //nolint:all
type dictString struct {
S string `parquet:",dict"`
}
require.NoError(t, w.Write(&dictString{"abc"}))
require.NoError(t, w.Write(&dictString{"abc"}))
require.NoError(t, w.Write(&dictString{"abc"}))
require.NoError(t, w.Write(&dictString{"abc"}))
require.NoError(t, w.Write(&dictString{"abc"}))
{
testName: "dictionary in the page header allows for skipping a page",
predicate: func() Predicate {
pred, err := NewRegexNotInPredicate([]string{"x.*"})
assert.NoError(t, err)
return pred
}(), // Not present in any values
keptChunks: 1,
keptPages: 0,
keptValues: 0,
writeData: func(w *parquet.Writer) { //nolint:all
type dictString struct {
S string `parquet:",dict"`
}
require.NoError(t, w.Write(&dictString{"xyz"}))
require.NoError(t, w.Write(&dictString{"xyz"}))
},
},
})
}

for _, tC := range testCases {
t.Run(tC.testName, func(t *testing.T) {
testPredicate(t, tC)
})
}
}

// TestOrPredicateCallsKeepColumnChunk ensures that the OrPredicate calls
Expand Down Expand Up @@ -120,17 +233,10 @@ func TestOrPredicateCallsKeepColumnChunk(t *testing.T) {
}
}

type predicateTestCase struct {
writeData func(w *parquet.Writer) //nolint:all
keptChunks int
keptPages int
keptValues int
predicate Predicate
}

// testPredicate by writing data and then iterating the column. The data model
// must contain a single column.
// testPredicate by writing data and then iterating the column.
// The data model must contain a single column.
func testPredicate(t *testing.T, tc predicateTestCase) {
t.Helper()
buf := new(bytes.Buffer)
w := parquet.NewWriter(buf)
tc.writeData(w)
Expand Down
45 changes: 28 additions & 17 deletions pkg/parquetquery/predicates.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,21 +82,33 @@ func (p *StringInPredicate) KeepPage(page pq.Page) bool {
return p.helper.keepPage(page, p.KeepValue)
}

// RegexInPredicate checks for match against any of the given regexs.
// Memoized and resets on each row group.
type RegexInPredicate struct {
regs []*regexp.Regexp
matches map[string]bool
type regexPredicate struct {
regs []*regexp.Regexp
matches map[string]bool
shouldMatch bool

helper DictionaryPredicateHelper
}

var _ Predicate = (*RegexInPredicate)(nil)
var _ Predicate = (*regexPredicate)(nil)

// NewRegexInPredicate checks for match against any of the given regexs.
// Memoized and resets on each row group.
func NewRegexInPredicate(regs []string) (Predicate, error) {
return newRegexPredicate(regs, true)
}

// NewRegexNotInPredicate checks for values that not match against any of the given regexs.
// Memoized and resets on each row group.
func NewRegexNotInPredicate(regs []string) (Predicate, error) {
return newRegexPredicate(regs, false)
}

func NewRegexInPredicate(regs []string) (*RegexInPredicate, error) {
p := &RegexInPredicate{
regs: make([]*regexp.Regexp, 0, len(regs)),
matches: make(map[string]bool),
func newRegexPredicate(regs []string, shouldMatch bool) (Predicate, error) {
p := &regexPredicate{
regs: make([]*regexp.Regexp, 0, len(regs)),
matches: make(map[string]bool),
shouldMatch: shouldMatch,
}
for _, reg := range regs {
r, err := regexp.Compile(reg)
Expand All @@ -108,17 +120,16 @@ func NewRegexInPredicate(regs []string) (*RegexInPredicate, error) {
return p, nil
}

func (p *RegexInPredicate) String() string {
func (p *regexPredicate) String() string {
var strings string
for _, s := range p.regs {
strings += fmt.Sprintf("%s, ", s.String())
}
return fmt.Sprintf("RegexInPredicate{%s}", strings)
}

func (p *RegexInPredicate) keep(v *pq.Value) bool {
func (p *regexPredicate) keep(v *pq.Value) bool {
if v.IsNull() {
// Null
return false
}

Expand All @@ -129,7 +140,7 @@ func (p *RegexInPredicate) keep(v *pq.Value) bool {

matched := false
for _, r := range p.regs {
if r.MatchString(s) {
if r.MatchString(s) == p.shouldMatch {
kousikmitra marked this conversation as resolved.
Show resolved Hide resolved
matched = true
break
}
Expand All @@ -139,7 +150,7 @@ func (p *RegexInPredicate) keep(v *pq.Value) bool {
return matched
}

func (p *RegexInPredicate) KeepColumnChunk(cc pq.ColumnChunk) bool {
func (p *regexPredicate) KeepColumnChunk(cc pq.ColumnChunk) bool {
p.helper.setNewRowGroup()

// Reset match cache on each row group change
Expand All @@ -149,11 +160,11 @@ func (p *RegexInPredicate) KeepColumnChunk(cc pq.ColumnChunk) bool {
return true
}

func (p *RegexInPredicate) KeepValue(v pq.Value) bool {
func (p *regexPredicate) KeepValue(v pq.Value) bool {
return p.keep(&v)
}

func (p *RegexInPredicate) KeepPage(page pq.Page) bool {
func (p *regexPredicate) KeepPage(page pq.Page) bool {
return p.helper.keepPage(page, p.KeepValue)
}

Expand Down
3 changes: 1 addition & 2 deletions pkg/traceql/ast_validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,7 @@ func (o BinaryOperation) validate() error {
}

switch o.Op {
case OpNotRegex,
OpSpansetChild,
case OpSpansetChild,
OpSpansetDescendant,
OpSpansetSibling:
return newUnsupportedError(fmt.Sprintf("binary operation (%v)", o.Op))
Expand Down
Loading