Skip to content

Commit

Permalink
Prefix service graph extra dimensions (#2335)
Browse files Browse the repository at this point in the history
* prefix service graph extra dimensions

* update changelog

* update changelog

* add EnableClientServerPrefix options for service graph

* lint

* add override for service graph client server prefix config option

* lint fmt

* test

* fixes after rebase

* fix name
  • Loading branch information
domasx2 authored Jun 1, 2023
1 parent 890838b commit 767115d
Show file tree
Hide file tree
Showing 12 changed files with 138 additions and 52 deletions.
10 changes: 5 additions & 5 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

* [CHANGE] Disable tempo-query by default in Jsonnet libs. [#2462](https://github.com/grafana/tempo/pull/2462) (@electron0zero)
* [ENHANCEMENT] Fill parent ID column and nested set columns [#2487](https://github.com/grafana/tempo/pull/2487) (@stoewer)
* [CHANGE] Prefix service graph extra dimensions labels with `server_` and `client_` if `enable_client_server_prefix` is enabled [#2335](https://github.com/grafana/tempo/pull/2335) (@domasx2)
* [ENHANCEMENT] log client ip to help identify which client is no org id [#2436](https://github.com/grafana/tempo/pull/2436)
* [ENHANCEMENT] Add `spss` parameter to `/api/search/tags`[#2308] to configure the spans per span set in response
* [BUGFIX] Fix Search SLO by routing tags to a new handler. [#2468](https://github.com/grafana/tempo/issues/2468) (@electron0zero)
Expand Down Expand Up @@ -48,10 +49,9 @@ To make use of filtering, configure `autocomplete_filtering_enabled`.
* [CHANGE] **Breaking Change** Rename s3.insecure_skip_verify [#2407](https://github.com/grafana/tempo/pull/2407) (@zalegrala)
```yaml
storage:
trace:
s3:
insecure_skip_verify: true // renamed to tls_insecure_skip_verify

trace:
s3:
insecure_skip_verify: true // renamed to tls_insecure_skip_verify
```
* [CHANGE] Ignore context canceled errors in the queriers [#2440](https://github.com/grafana/tempo/pull/2440) (@joe-elliott)
* [CHANGE] Start flush queue worker after wal replay and block rediscovery [#2456](https://github.com/grafana/tempo/pull/2456) (@ie-pham)
Expand Down Expand Up @@ -745,4 +745,4 @@ Jsonnet users will now need to specify a storage request and limit for the gener
* [BUGFIX] S3 multi-part upload errors [#306](https://github.com/grafana/tempo/pull/325)
* [BUGFIX] Increase Prometheus `notfound` metric on tempo-vulture. [#301](https://github.com/grafana/tempo/pull/301)
* [BUGFIX] Return 404 if searching for a tenant id that does not exist in the backend. [#321](https://github.com/grafana/tempo/pull/321)
* [BUGFIX] Prune in-memory blocks from missing tenants. [#314](https://github.com/grafana/tempo/pull/314)
* [BUGFIX] Prune in-memory blocks from missing tenants. [#314](https://github.com/grafana/tempo/pull/314)
4 changes: 4 additions & 0 deletions docs/sources/tempo/configuration/_index.md
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,10 @@ metrics_generator:
# resource and span attributes and are added to the metrics if present.
[dimensions: <list of string>]
# Prefix additional dimensions with "client_" and "_server". Adds two labels
# per additional dimension instead of one.
[enable_client_server_prefix: <bool> | default = false]
# Attribute Key to multiply span metrics
[span_multiplier_key: <string> | default = ""]
Expand Down
2 changes: 2 additions & 0 deletions modules/generator/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,5 +118,7 @@ func (cfg *ProcessorConfig) copyWithOverrides(o metricsGeneratorOverrides, userI

copyCfg.SpanMetrics.EnableTargetInfo = o.MetricsGeneratorProcessorSpanMetricsEnableTargetInfo(userID)

copyCfg.ServiceGraphs.EnableClientServerPrefix = o.MetricsGeneratorProcessorServiceGraphsEnableClientServerPrefix(userID)

return copyCfg, nil
}
1 change: 1 addition & 0 deletions modules/generator/overrides.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ type metricsGeneratorOverrides interface {
MetricsGeneratorProcessorLocalBlocksCompleteBlockTimeout(userID string) time.Duration
MetricsGeneratorProcessorSpanMetricsDimensionMappings(userID string) []sharedconfig.DimensionMappings
MetricsGeneratorProcessorSpanMetricsEnableTargetInfo(userID string) bool
MetricsGeneratorProcessorServiceGraphsEnableClientServerPrefix(userID string) bool
}

var _ metricsGeneratorOverrides = (overrides.Interface)(nil)
37 changes: 21 additions & 16 deletions modules/generator/overrides_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,23 @@ import (
)

type mockOverrides struct {
processors map[string]struct{}
serviceGraphsHistogramBuckets []float64
serviceGraphsDimensions []string
serviceGraphsPeerAttributes []string
spanMetricsHistogramBuckets []float64
spanMetricsDimensions []string
spanMetricsIntrinsicDimensions map[string]bool
spanMetricsFilterPolicies []filterconfig.FilterPolicy
spanMetricsDimensionMappings []sharedconfig.DimensionMappings
spanMetricsEnableTargetInfo bool
localBlocksMaxLiveTraces uint64
localBlocksMaxBlockDuration time.Duration
localBlocksMaxBlockBytes uint64
localBlocksFlushCheckPeriod time.Duration
localBlocksTraceIdlePeriod time.Duration
localBlocksCompleteBlockTimeout time.Duration
processors map[string]struct{}
serviceGraphsHistogramBuckets []float64
serviceGraphsDimensions []string
serviceGraphsPeerAttributes []string
serviceGraphsEnableClientServerPrefix bool
spanMetricsHistogramBuckets []float64
spanMetricsDimensions []string
spanMetricsIntrinsicDimensions map[string]bool
spanMetricsFilterPolicies []filterconfig.FilterPolicy
spanMetricsDimensionMappings []sharedconfig.DimensionMappings
spanMetricsEnableTargetInfo bool
localBlocksMaxLiveTraces uint64
localBlocksMaxBlockDuration time.Duration
localBlocksMaxBlockBytes uint64
localBlocksFlushCheckPeriod time.Duration
localBlocksTraceIdlePeriod time.Duration
localBlocksCompleteBlockTimeout time.Duration
}

var _ metricsGeneratorOverrides = (*mockOverrides)(nil)
Expand Down Expand Up @@ -105,3 +106,7 @@ func (m *mockOverrides) MetricsGeneratorProcessorSpanMetricsDimensionMappings(us
func (m *mockOverrides) MetricsGeneratorProcessorSpanMetricsEnableTargetInfo(userID string) bool {
return m.spanMetricsEnableTargetInfo
}

func (m *mockOverrides) MetricsGeneratorProcessorServiceGraphsEnableClientServerPrefix(userID string) bool {
return m.serviceGraphsEnableClientServerPrefix
}
9 changes: 7 additions & 2 deletions modules/generator/processor/servicegraphs/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,15 @@ type Config struct {
HistogramBuckets []float64 `yaml:"histogram_buckets"`

// Additional dimensions (labels) to be added to the metric along with the default ones.
// If client and server spans have the same attribute, behaviour is undetermined
// (either value could get used)
// If client and server spans have the same attribute and EnableClientServerPrefix is not enabled,
// behaviour is undetermined (either value could get used)
Dimensions []string `yaml:"dimensions"`

// If enabled, additional dimensions (labels) will be prefixed with either
// "client_" or "server_" depending on the span kind. Up to two labels will be added
// per dimension.
EnableClientServerPrefix bool `yaml:"enable_client_server_prefix"`

// PeerAttributes are attributes that will be used to create a peer edge
// Attributes are searched in the order they are provided
PeerAttributes []string `yaml:"peer_attributes"`
Expand Down
24 changes: 18 additions & 6 deletions modules/generator/processor/servicegraphs/servicegraphs.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,11 @@ type Processor struct {
func New(cfg Config, tenant string, registry registry.Registry, logger log.Logger) gen.Processor {
labels := []string{"client", "server", "connection_type"}
for _, d := range cfg.Dimensions {
labels = append(labels, strutil.SanitizeLabelName(d))
if cfg.EnableClientServerPrefix {
labels = append(labels, strutil.SanitizeLabelName("client_"+d), strutil.SanitizeLabelName("server_"+d))
} else {
labels = append(labels, strutil.SanitizeLabelName(d))
}
}

p := &Processor{
Expand Down Expand Up @@ -174,7 +178,7 @@ func (p *Processor) consume(resourceSpans []*v1_trace.ResourceSpans) (err error)
e.ClientService = svcName
e.ClientLatencySec = spanDurationSec(span)
e.Failed = e.Failed || p.spanFailed(span)
p.upsertDimensions(e.Dimensions, rs.Resource.Attributes, span.Attributes)
p.upsertDimensions("client_", e.Dimensions, rs.Resource.Attributes, span.Attributes)
e.SpanMultiplier = spanMultiplier
p.upsertPeerNode(e, span.Attributes)

Expand All @@ -199,7 +203,7 @@ func (p *Processor) consume(resourceSpans []*v1_trace.ResourceSpans) (err error)
e.ServerService = svcName
e.ServerLatencySec = spanDurationSec(span)
e.Failed = e.Failed || p.spanFailed(span)
p.upsertDimensions(e.Dimensions, rs.Resource.Attributes, span.Attributes)
p.upsertDimensions("server_", e.Dimensions, rs.Resource.Attributes, span.Attributes)
e.SpanMultiplier = spanMultiplier
p.upsertPeerNode(e, span.Attributes)
})
Expand Down Expand Up @@ -235,10 +239,14 @@ func (p *Processor) consume(resourceSpans []*v1_trace.ResourceSpans) (err error)
return nil
}

func (p *Processor) upsertDimensions(m map[string]string, resourceAttr []*v1_common.KeyValue, spanAttr []*v1_common.KeyValue) {
func (p *Processor) upsertDimensions(prefix string, m map[string]string, resourceAttr []*v1_common.KeyValue, spanAttr []*v1_common.KeyValue) {
for _, dim := range p.Cfg.Dimensions {
if v, ok := processor_util.FindAttributeValue(dim, resourceAttr, spanAttr); ok {
m[dim] = v
if p.Cfg.EnableClientServerPrefix {
m[prefix+dim] = v
} else {
m[dim] = v
}
}
}
}
Expand All @@ -261,7 +269,11 @@ func (p *Processor) onComplete(e *store.Edge) {
labelValues = append(labelValues, e.ClientService, e.ServerService, string(e.ConnectionType))

for _, dimension := range p.Cfg.Dimensions {
labelValues = append(labelValues, e.Dimensions[dimension])
if p.Cfg.EnableClientServerPrefix {
labelValues = append(labelValues, e.Dimensions["client_"+dimension], e.Dimensions["server_"+dimension])
} else {
labelValues = append(labelValues, e.Dimensions[dimension])
}
}

registryLabelValues := p.registry.NewLabelValueCombo(p.labels, labelValues)
Expand Down
40 changes: 39 additions & 1 deletion modules/generator/processor/servicegraphs/servicegraphs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ func TestServiceGraphs(t *testing.T) {
cfg.RegisterFlagsAndApplyDefaults("", nil)

cfg.HistogramBuckets = []float64{0.04}
cfg.Dimensions = []string{"beast"}
cfg.Dimensions = []string{"beast", "god"}

p := New(cfg, "test", testRegistry, log.NewNopLogger())
defer p.Shutdown(context.Background())
Expand All @@ -42,18 +42,21 @@ func TestServiceGraphs(t *testing.T) {
"server": "mythical-server",
"connection_type": "",
"beast": "manticore",
"god": "zeus",
})
serverToDatabaseLabels := labels.FromMap(map[string]string{
"client": "mythical-server",
"server": "postgres",
"connection_type": "database",
"beast": "",
"god": "",
})
requesterToRecorderLabels := labels.FromMap(map[string]string{
"client": "mythical-requester",
"server": "mythical-recorder",
"connection_type": "messaging_system",
"beast": "",
"god": "",
})

fmt.Println(testRegistry)
Expand Down Expand Up @@ -100,6 +103,41 @@ func TestServiceGraphs(t *testing.T) {
assert.InDelta(t, 0.000693, testRegistry.Query(`traces_service_graph_request_server_seconds_sum`, requesterToRecorderLabels), 0.001)
}

func TestServiceGraphs_prefixDimensions(t *testing.T) {
testRegistry := registry.NewTestRegistry()

cfg := Config{}
cfg.RegisterFlagsAndApplyDefaults("", nil)

cfg.HistogramBuckets = []float64{0.04}
cfg.Dimensions = []string{"beast", "god"}
cfg.EnableClientServerPrefix = true

p := New(cfg, "test", testRegistry, log.NewNopLogger())
defer p.Shutdown(context.Background())

request, err := loadTestData("testdata/trace-with-queue-database.json")
require.NoError(t, err)

p.PushSpans(context.Background(), request)

requesterToServerLabels := labels.FromMap(map[string]string{
"client": "mythical-requester",
"server": "mythical-server",
"connection_type": "",
"client_beast": "manticore",
"server_beast": "manticore",
"client_god": "ares",
"server_god": "zeus",
})

fmt.Println(testRegistry)

// counters
assert.Equal(t, 1.0, testRegistry.Query(`traces_service_graph_request_total`, requesterToServerLabels))

}

func TestServiceGraphs_failedRequests(t *testing.T) {
testRegistry := registry.NewTestRegistry()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@
"value": {
"stringValue": "manticore"
}
},
{
"key": "god",
"value": {
"stringValue": "ares"
}
}
],
"status": {
Expand Down Expand Up @@ -251,6 +257,12 @@
"stringValue": "ip_tcp"
}
},
{
"key": "god",
"value": {
"stringValue": "zeus"
}
},
{
"key": "beast",
"value": {
Expand Down
1 change: 1 addition & 0 deletions modules/overrides/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ type Interface interface {
MetricsGeneratorProcessorLocalBlocksCompleteBlockTimeout(userID string) time.Duration
MetricsGeneratorProcessorSpanMetricsDimensionMappings(userID string) []sharedconfig.DimensionMappings
MetricsGeneratorProcessorSpanMetricsEnableTargetInfo(userID string) bool
MetricsGeneratorProcessorServiceGraphsEnableClientServerPrefix(userID string) bool
BlockRetention(userID string) time.Duration
MaxSearchDuration(userID string) time.Duration
}
Loading

0 comments on commit 767115d

Please sign in to comment.