diff --git a/pkg/parquetquery/predicates.go b/pkg/parquetquery/predicates.go index f5b78edd556..e6e126fb644 100644 --- a/pkg/parquetquery/predicates.go +++ b/pkg/parquetquery/predicates.go @@ -133,29 +133,30 @@ func (p *regexPredicate) keep(v *pq.Value) bool { return false } - s := v.String() - if matched, ok := p.matches[s]; ok { + b := v.ByteArray() + + // Check uses zero alloc optimization of map[string([]byte)] + if matched, ok := p.matches[string(b)]; ok { return matched } matched := false for _, r := range p.regs { - if r.MatchString(s) == p.shouldMatch { + if r.Match(b) == p.shouldMatch { matched = true break } } - p.matches[s] = matched + // Only alloc the string when updating the map + p.matches[string(b)] = matched + return matched } func (p *regexPredicate) KeepColumnChunk(cc pq.ColumnChunk) bool { p.helper.setNewRowGroup() - // Reset match cache on each row group change - p.matches = make(map[string]bool, len(p.matches)) - // Can we do any filtering here? return true } @@ -165,6 +166,20 @@ func (p *regexPredicate) KeepValue(v pq.Value) bool { } func (p *regexPredicate) KeepPage(page pq.Page) bool { + if p.helper.newRowGroup { + // Reset match cache on each row group change + // We delay until the first page is received + // so we can get an accurate count of the number + // of distinct values for dictionary columns. + count := len(p.matches) + if d := page.Dictionary(); d != nil { + if d.Len() > count { + count = d.Len() + } + } + p.matches = make(map[string]bool, count) + } + return p.helper.keepPage(page, p.KeepValue) } diff --git a/tempodb/encoding/vparquet2/block_traceql_test.go b/tempodb/encoding/vparquet2/block_traceql_test.go index 0fbb58b5d10..e36e33c27f4 100644 --- a/tempodb/encoding/vparquet2/block_traceql_test.go +++ b/tempodb/encoding/vparquet2/block_traceql_test.go @@ -456,6 +456,8 @@ func BenchmarkBackendBlockTraceQL(b *testing.B) { {"spanAttValMatch", traceql.MustExtractFetchSpansRequest("{ span.bloom > 0 }")}, {"spanAttIntrinsicNoMatch", traceql.MustExtractFetchSpansRequest("{ name = `asdfasdf` }")}, {"spanAttIntrinsicMatch", traceql.MustExtractFetchSpansRequest("{ name = `gcs.ReadRange` }")}, + {"spanAttIntrinsicRegexNoMatch", traceql.MustExtractFetchSpansRequest("{ name =~ `asdfasdf` }")}, + {"spanAttIntrinsicRegexMatch", traceql.MustExtractFetchSpansRequest("{ name =~ `gcs.ReadRange` }")}, // resource {"resourceAttNameNoMatch", traceql.MustExtractFetchSpansRequest("{ resource.foo = `bar` }")}, @@ -474,10 +476,10 @@ func BenchmarkBackendBlockTraceQL(b *testing.B) { ctx := context.TODO() tenantID := "1" - blockID := uuid.MustParse("149e41d2-cc4d-4f71-b355-3377eabc94c8") + blockID := uuid.MustParse("2968a567-5873-4e4c-b3cb-21c106c6714b") r, _, _, err := local.New(&local.Config{ - Path: path.Join("/home/joe/testblock/"), + Path: path.Join("/Users/marty/src/tmp/"), }) require.NoError(b, err)