Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add user-configurable overrides module #2543

Merged
merged 41 commits into from
Jul 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
a6b3691
Add user-configurable overrides module
Jun 6, 2023
157b596
Add /api/overrides and fix crash on boot
electron0zero Jun 15, 2023
ede924c
Add overridesHandler and WriteStatusRuntimeConfig
electron0zero Jun 23, 2023
73e16af
return json and only return overrides for the tenant
electron0zero Jun 23, 2023
48cce8e
Implement delete
Jun 26, 2023
0f23713
Fix test I think?
Jun 26, 2023
45e7259
Fix tests
Jun 26, 2023
bb47bbd
clean up handler and TODOs
electron0zero Jun 28, 2023
9607fe9
Add tests for overridesHandler
electron0zero Jun 28, 2023
5beb100
Add e2e test
Jun 29, 2023
5e3f2b9
Refactor:
Jun 30, 2023
ff1ba81
Linting
Jun 30, 2023
f38b210
address more Linting
Jun 30, 2023
68cc70f
fix lint and add test for PATCH
electron0zero Jul 4, 2023
9dcac76
fix lint error unparam
electron0zero Jul 4, 2023
6ba104b
remove todo
electron0zero Jul 4, 2023
0af8a02
use tenantLimits as return type
electron0zero Jul 4, 2023
38ba7cb
Add prometheus.Collector to overrides.Interface
Jul 4, 2023
9768af7
Test tempo_overrides_user_configurable_overrides_fetch_total metric i…
Jul 4, 2023
fad825a
Sprinkle in some tracing
Jul 4, 2023
3c70a49
Update CHANGELOG.md
Jul 4, 2023
051cc79
Merge branch 'main' into kvrhdn/user-configurable-overrides
Jul 4, 2023
a70f2d1
Merge branch 'main' into kvrhdn/user-configurable-overrides
Jul 10, 2023
8e85e22
Rename loop to running for consistency
Jul 10, 2023
da786c1
Have mux handle method routing; split up GET, POST and DELETE handlers
Jul 10, 2023
495cf6f
Use built in contains
Jul 10, 2023
da34f87
Split up user-configurable overrides manager, api and backend client
Jul 11, 2023
c052744
Merge branch 'main' into kvrhdn/user-configurable-overrides
Jul 11, 2023
85bc1d0
Move overrides API to httpclient
Jul 11, 2023
ec915e3
Clean up, linting, fmt
Jul 11, 2023
aaf4a73
Remove version field from json
Jul 11, 2023
41e6bd5
Address review comments
Jul 12, 2023
ce26e0f
If overrides.json does not exist, properly delete it from cache
Jul 12, 2023
1cfd59c
Add config warning for conflicting storage
Jul 12, 2023
3aa2bb1
Check in my tests as well
Jul 12, 2023
f839914
Simplify API handler, return 404 on overrides not found
Jul 13, 2023
7b0522f
Merge branch 'main' into kvrhdn/user-configurable-overrides
Jul 13, 2023
e1a7e2f
Typo, linting, fix test
Jul 13, 2023
f4fd0a9
Merge branch 'main' into kvrhdn/user-configurable-overrides
Jul 14, 2023
2ac2642
Use backend constants
Jul 14, 2023
8f35b98
Merge branch 'main' into kvrhdn/user-configurable-overrides
Jul 17, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
* [CHANGE] Disable tempo-query by default in Jsonnet libs. [#2462](https://github.com/grafana/tempo/pull/2462) (@electron0zero)
* [FEATURE] New experimental API to derive on-demand RED metrics grouped by any attribute, and new metrics generator processor [#2368](https://github.com/grafana/tempo/pull/2368) [#2418](https://github.com/grafana/tempo/pull/2418) [#2424](https://github.com/grafana/tempo/pull/2424) [#2442](https://github.com/grafana/tempo/pull/2442) [#2480](https://github.com/grafana/tempo/pull/2480) [#2481](https://github.com/grafana/tempo/pull/2481) [#2501](https://github.com/grafana/tempo/pull/2501) [#2579](https://github.com/grafana/tempo/pull/2579) [#2582](https://github.com/grafana/tempo/pull/2582) (@mdisibio @zalegrala)
* [FEATURE] New TraceQL structural operators descendant (>>), child (>), and sibling (~) [#2625](https://github.com/grafana/tempo/pull/2625) [#2660](https://github.com/grafana/tempo/pull/2660) (@mdisibio)
* [FEATURE] Add user-configurable overrides module [#2543](https://github.com/grafana/tempo/pull/2543) (@electron0zero @kvrhdn)
* [ENHANCEMENT] Add capability to flush all remaining traces to backend when ingester is stopped [#2538](https://github.com/grafana/tempo/pull/2538)
* [ENHANCEMENT] Fill parent ID column and nested set columns [#2487](https://github.com/grafana/tempo/pull/2487) (@stoewer)
* [ENHANCEMENT] Add metrics generator config option to allow customizable ring port [#2399](https://github.com/grafana/tempo/pull/2399) (@mdisibio)
Expand Down
31 changes: 30 additions & 1 deletion cmd/tempo/app/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ func (c *Config) RegisterFlagsAndApplyDefaults(prefix string, f *flag.FlagSet) {
c.IngesterClient.GRPCClientConfig.GRPCCompression = "snappy"
flagext.DefaultValues(&c.GeneratorClient)
c.GeneratorClient.GRPCClientConfig.GRPCCompression = "snappy"
flagext.DefaultValues(&c.LimitsConfig)
c.LimitsConfig.RegisterFlagsAndApplyDefaults(f)

c.Distributor.RegisterFlagsAndApplyDefaults(util.PrefixConfig(prefix, "distributor"), f)
c.Ingester.RegisterFlagsAndApplyDefaults(util.PrefixConfig(prefix, "ingester"), f)
Expand Down Expand Up @@ -186,6 +186,10 @@ func (c *Config) CheckConfig() []ConfigWarning {
warnings = append(warnings, newV2Warning("v2_prefetch_traces_count"))
}

if c.tracesAndOverridesStorageConflict() {
warnings = append(warnings, warnTracesAndUserConfigurableOverridesStorageConflict)
}

return warnings
}

Expand Down Expand Up @@ -241,6 +245,9 @@ var (
warnStorageTraceBackendLocal = ConfigWarning{
Message: "Local backend will not correctly retrieve traces with a distributed deployment unless all components have access to the same disk. You should probably be using object storage as a backend.",
}
warnTracesAndUserConfigurableOverridesStorageConflict = ConfigWarning{
Message: "Trace storage conflicts with user-configurable overrides storage",
}
)

func newV2Warning(setting string) ConfigWarning {
Expand All @@ -249,3 +256,25 @@ func newV2Warning(setting string) ConfigWarning {
Explain: "This setting is only used in v2 blocks",
}
}

func (c *Config) tracesAndOverridesStorageConflict() bool {
traceStorage := c.StorageConfig.Trace
overridesStorage := c.LimitsConfig.UserConfigurableOverridesConfig.ClientConfig

if traceStorage.Backend != overridesStorage.Backend {
return false
}

switch traceStorage.Backend {
case backend.Local:
return traceStorage.Local.PathMatches(overridesStorage.Local)
case backend.GCS:
return traceStorage.GCS.PathMatches(overridesStorage.GCS)
case backend.S3:
return traceStorage.S3.PathMatches(overridesStorage.S3)
case backend.Azure:
return traceStorage.Azure.PathMatches(overridesStorage.Azure)
}

return false
}
38 changes: 38 additions & 0 deletions cmd/tempo/app/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,44 @@ func TestConfig_CheckConfig(t *testing.T) {
}(),
expect: nil,
},
{
name: "trace storage conflicts with overrides storage - local",
config: func() *Config {
cfg := newDefaultConfig()
cfg.StorageConfig.Trace.Backend = backend.Local
cfg.StorageConfig.Trace.Local.Path = "/var/tempo"
cfg.LimitsConfig.UserConfigurableOverridesConfig.ClientConfig.Backend = backend.Local
cfg.LimitsConfig.UserConfigurableOverridesConfig.ClientConfig.Local.Path = "/var/tempo"
return cfg
}(),
expect: []ConfigWarning{warnTracesAndUserConfigurableOverridesStorageConflict},
},
{
name: "trace storage conflicts with overrides storage - gcs",
config: func() *Config {
cfg := newDefaultConfig()
cfg.StorageConfig.Trace.Backend = backend.GCS
cfg.StorageConfig.Trace.GCS.BucketName = "bucketname"
cfg.StorageConfig.Trace.GCS.Prefix = "tempo"
cfg.LimitsConfig.UserConfigurableOverridesConfig.ClientConfig.Backend = backend.GCS
cfg.LimitsConfig.UserConfigurableOverridesConfig.ClientConfig.GCS.BucketName = "bucketname"
cfg.LimitsConfig.UserConfigurableOverridesConfig.ClientConfig.GCS.Prefix = "tempo"
return cfg
}(),
expect: []ConfigWarning{warnTracesAndUserConfigurableOverridesStorageConflict},
},
{
name: "trace storage conflicts with overrides storage - different backends",
config: func() *Config {
cfg := newDefaultConfig()
cfg.StorageConfig.Trace.Backend = backend.GCS
cfg.StorageConfig.Trace.GCS.BucketName = "my-bucket"
cfg.LimitsConfig.UserConfigurableOverridesConfig.ClientConfig.Backend = backend.S3
cfg.LimitsConfig.UserConfigurableOverridesConfig.ClientConfig.S3.Bucket = "my-bucket"
return cfg
}(),
expect: nil,
},
}

for _, tc := range tt {
Expand Down
26 changes: 24 additions & 2 deletions cmd/tempo/app/modules.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/grafana/dskit/ring"
"github.com/grafana/dskit/services"
jsoniter "github.com/json-iterator/go"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/collectors"
"github.com/weaveworks/common/middleware"
Expand All @@ -26,6 +27,7 @@ import (
"github.com/grafana/tempo/modules/generator"
"github.com/grafana/tempo/modules/ingester"
"github.com/grafana/tempo/modules/overrides"
"github.com/grafana/tempo/modules/overrides/userconfigurableapi"
"github.com/grafana/tempo/modules/querier"
tempo_storage "github.com/grafana/tempo/modules/storage"
"github.com/grafana/tempo/pkg/api"
Expand Down Expand Up @@ -179,18 +181,38 @@ func (t *App) initReadRing(cfg ring.Config, name, key string) (*ring.Ring, error
}

func (t *App) initOverrides() (services.Service, error) {
overrides, err := overrides.NewOverrides(t.cfg.LimitsConfig)
o, err := overrides.NewOverrides(t.cfg.LimitsConfig)
if err != nil {
return nil, fmt.Errorf("failed to create overrides %w", err)
}
t.Overrides = overrides
t.Overrides = o

prometheus.MustRegister(&t.cfg.LimitsConfig)

if t.cfg.LimitsConfig.PerTenantOverrideConfig != "" {
prometheus.MustRegister(t.Overrides)
}

// User-configurable overrides API
userConfigOverridesCfg := t.cfg.LimitsConfig.UserConfigurableOverridesConfig

// only run API on query-frontend
if userConfigOverridesCfg.Enabled && t.isModuleActive(QueryFrontend) {
userConfigOverridesAPI, err := userconfigurableapi.NewUserConfigOverridesAPI(&userConfigOverridesCfg.ClientConfig)
if err != nil {
return nil, errors.Wrap(err, "failed to create user-configurable overrides API")
}

overridesPath := addHTTPAPIPrefix(&t.cfg, api.PathOverrides)
wrapHandler := func(h http.HandlerFunc) http.Handler {
return t.HTTPAuthMiddleware.Wrap(h)
}

t.Server.HTTP.Path(overridesPath).Methods(http.MethodGet).Handler(wrapHandler(userConfigOverridesAPI.GetOverridesHandler))
t.Server.HTTP.Path(overridesPath).Methods(http.MethodPost).Handler(wrapHandler(userConfigOverridesAPI.PostOverridesHandler))
t.Server.HTTP.Path(overridesPath).Methods(http.MethodDelete).Handler(wrapHandler(userConfigOverridesAPI.DeleteOverridesHandler))
}

return t.Overrides, nil
}

Expand Down
12 changes: 12 additions & 0 deletions integration/e2e/config-all-in-one-azurite.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,15 @@ storage:
pool:
max_workers: 10
queue_depth: 100

overrides:
user_configurable_overrides:
enabled: true
poll_interval: 10s
client:
backend: azure
azure:
container_name: tempo
endpoint_suffix: tempo_e2e-azurite:10000
storage_account_name: "devstoreaccount1"
storage_account_key: "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="
12 changes: 12 additions & 0 deletions integration/e2e/config-all-in-one-gcs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,15 @@ storage:
pool:
max_workers: 10
queue_depth: 1000

overrides:
user_configurable_overrides:
enabled: true
poll_interval: 10s
client:
backend: gcs
gcs:
# TODO use separate bucket?
bucket_name: tempo
endpoint: https://tempo_e2e-gcs:4443/storage/v1/
insecure: true
9 changes: 9 additions & 0 deletions integration/e2e/config-all-in-one-local.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,12 @@ storage:
pool:
max_workers: 10
queue_depth: 100

overrides:
user_configurable_overrides:
enabled: true
poll_interval: 10s
client:
backend: local
local:
path: /var/tempo_overrides
14 changes: 14 additions & 0 deletions integration/e2e/config-all-in-one-s3.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,17 @@ storage:
pool:
max_workers: 10
queue_depth: 100

overrides:
user_configurable_overrides:
enabled: true
poll_interval: 10s
client:
backend: s3
s3:
# TODO use separate bucket?
bucket: tempo
endpoint: tempo_e2e-minio-9000:9000 # TODO: this is brittle, fix this eventually
access_key: Cheescake # TODO: use cortex_e2e.MinioAccessKey
secret_key: supersecret # TODO: use cortex_e2e.MinioSecretKey
insecure: true
98 changes: 98 additions & 0 deletions integration/e2e/overrides_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
package e2e

import (
"fmt"
"os"
"testing"

"github.com/grafana/e2e"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"

"github.com/grafana/tempo/cmd/tempo/app"
util "github.com/grafana/tempo/integration"
"github.com/grafana/tempo/integration/e2e/backend"
"github.com/grafana/tempo/modules/overrides/userconfigurableapi"
"github.com/grafana/tempo/pkg/httpclient"
)

func TestOverrides(t *testing.T) {
testBackends := []struct {
name string
configFile string
}{
{
name: "local",
configFile: configAllInOneLocal,
},
{
name: "s3",
configFile: configAllInOneS3,
},
{
name: "azure",
configFile: configAllInOneAzurite,
},
{
name: "gcs",
configFile: configAllInOneGCS,
},
}
for _, tc := range testBackends {
t.Run(tc.name, func(t *testing.T) {
s, err := e2e.NewScenario("tempo_e2e")
require.NoError(t, err)
defer s.Close()

// set up the backend
cfg := app.Config{}
buff, err := os.ReadFile(tc.configFile)
require.NoError(t, err)
err = yaml.UnmarshalStrict(buff, &cfg)
require.NoError(t, err)
_, err = backend.New(s, cfg)
require.NoError(t, err)

require.NoError(t, util.CopyFileToSharedDir(s, tc.configFile, "config.yaml"))
tempo := util.NewTempoAllInOne()
require.NoError(t, s.StartAndWaitReady(tempo))

orgID := ""
apiClient := httpclient.New("http://"+tempo.Endpoint(3200), orgID)

// Modify overrides
fmt.Println("* Setting overrides.forwarders")
err = apiClient.SetOverrides(&userconfigurableapi.UserConfigurableLimits{
Forwarders: &[]string{"my-forwarder"},
})
require.NoError(t, err)

limits, err := apiClient.GetOverrides()
printLimits(limits)
require.NoError(t, err)

require.NotNil(t, limits)
require.NotNil(t, limits.Forwarders)
assert.ElementsMatch(t, *limits.Forwarders, []string{"my-forwarder"})

// We fetched the overrides once manually, but we also expect at least one poll_interval to have happened
require.NoError(t, tempo.WaitSumMetrics(e2e.Greater(1), "tempo_overrides_user_configurable_overrides_fetch_total"))

// Clear overrides
fmt.Println("* Deleting overrides")
err = apiClient.DeleteOverrides()
require.NoError(t, err)

_, err = apiClient.GetOverrides()
require.ErrorIs(t, err, httpclient.ErrNotFound)
})
}
}

func printLimits(limits *userconfigurableapi.UserConfigurableLimits) {
fmt.Printf("* Overrides: %+v\n", limits)
if limits != nil && limits.Forwarders != nil {
fmt.Printf("* Fowarders: %+v\n", *limits.Forwarders)
}
}
5 changes: 3 additions & 2 deletions modules/distributor/distributor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"context"
"encoding/hex"
"encoding/json"
"flag"
"fmt"
"math/rand"
"strconv"
Expand Down Expand Up @@ -666,7 +667,7 @@ func TestDistributor(t *testing.T) {
} {
t.Run(fmt.Sprintf("[%d](samples=%v)", i, tc.lines), func(t *testing.T) {
limits := &overrides.Limits{}
flagext.DefaultValues(limits)
limits.RegisterFlagsAndApplyDefaults(&flag.FlagSet{})

// todo: test limits
d := prepare(t, limits, nil, nil)
Expand Down Expand Up @@ -865,7 +866,7 @@ func TestLogSpans(t *testing.T) {
} {
t.Run(fmt.Sprintf("[%d] TestLogSpans LogReceivedTraces=%v LogReceivedSpansEnabled=%v filterByStatusError=%v includeAllAttributes=%v", i, tc.LogReceivedTraces, tc.LogReceivedSpansEnabled, tc.filterByStatusError, tc.includeAllAttributes), func(t *testing.T) {
limits := &overrides.Limits{}
flagext.DefaultValues(limits)
limits.RegisterFlagsAndApplyDefaults(&flag.FlagSet{})

buf := &bytes.Buffer{}
logger := kitlog.NewJSONLogger(kitlog.NewSyncWriter(buf))
Expand Down
4 changes: 2 additions & 2 deletions modules/distributor/forwarder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ const tenantID = "tenant-id"

func TestForwarder(t *testing.T) {
oCfg := overrides.Limits{}
oCfg.RegisterFlags(&flag.FlagSet{})
oCfg.RegisterFlagsAndApplyDefaults(&flag.FlagSet{})

id, err := util.HexStringToTraceID("1234567890abcdef")
require.NoError(t, err)
Expand Down Expand Up @@ -61,7 +61,7 @@ func TestForwarder(t *testing.T) {

func TestForwarder_shutdown(t *testing.T) {
oCfg := overrides.Limits{}
oCfg.RegisterFlags(&flag.FlagSet{})
oCfg.RegisterFlagsAndApplyDefaults(&flag.FlagSet{})
oCfg.MetricsGeneratorForwarderQueueSize = 200

id, err := util.HexStringToTraceID("1234567890abcdef")
Expand Down
3 changes: 2 additions & 1 deletion modules/ingester/ingester_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package ingester
import (
"context"
"crypto/rand"
"flag"
"os"
"testing"
"time"
Expand Down Expand Up @@ -432,7 +433,7 @@ func defaultIngesterTestConfig() Config {

func defaultLimitsTestConfig() overrides.Limits {
limits := overrides.Limits{}
flagext.DefaultValues(&limits)
limits.RegisterFlagsAndApplyDefaults(&flag.FlagSet{})
return limits
}

Expand Down
Loading