diff --git a/CHANGELOG.md b/CHANGELOG.md index 6dd4d608a12..ad86ace43b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ * [ENHANCEMENT] Improve memory efficiency of compaction and block cutting. [#1121](https://github.com/grafana/tempo/pull/1121) [#1130](https://github.com/grafana/tempo/pull/1130) (@joe-elliott) * [ENHANCEMENT] Include metrics for configured limit overrides and defaults: tempo_limits_overrides, tempo_limits_defaults [#1089](https://github.com/grafana/tempo/pull/1089) (@zalegrala) * [ENHANCEMENT] Add Envoy Proxy panel to `Tempo / Writes` dashboard [#1137](https://github.com/grafana/tempo/pull/1137) (@kvrhdn) +* [ENHANCEMENT] Reduce compactionCycle to improve performance in large multitenant environments [#1145](https://github.com/grafana/tempo/pull/1145) (@joe-elliott) +* [ENHANCEMENT] Added max_compaction_cycle to allow for independently configuring polling and compaction cycle. [#1145](https://github.com/grafana/tempo/pull/1145) (@joe-elliott) * [BUGFIX] Fix defaults for MaxBytesPerTrace (ingester.max-bytes-per-trace) and MaxSearchBytesPerTrace (ingester.max-search-bytes-per-trace) (@bitprocessor) * [BUGFIX] Ignore empty objects during compaction [#1113](https://github.com/grafana/tempo/pull/1113) (@mdisibio) * [BUGFIX] Add process name to vulture traces to work around display issues [#1127](https://github.com/grafana/tempo/pull/1127) (@mdisibio) diff --git a/docs/tempo/website/configuration/_index.md b/docs/tempo/website/configuration/_index.md index 479f750a5d8..b60f3eb4411 100644 --- a/docs/tempo/website/configuration/_index.md +++ b/docs/tempo/website/configuration/_index.md @@ -438,6 +438,10 @@ storage: # Default 0 (disabled). [blocklist_poll_stale_tenant_index: ] + # The maximum amount of time to spend compacting a single tenant before moving to the next. + # Default is 5m. + [max_compaction_cycle: ] + # Cache type to use. Should be one of "redis", "memcached" # Example: "cache: memcached" [cache: ] diff --git a/docs/tempo/website/configuration/manifest.md b/docs/tempo/website/configuration/manifest.md index 1e08f95b7a9..1795f6de8ec 100644 --- a/docs/tempo/website/configuration/manifest.md +++ b/docs/tempo/website/configuration/manifest.md @@ -272,11 +272,12 @@ ingester: heartbeat_timeout: 5m0s replication_factor: 1 zone_awareness_enabled: false + excluded_zones: "" num_tokens: 128 heartbeat_period: 5s observe_period: 0s join_after: 0s - min_ready_duration: 1m0s + min_ready_duration: 15s interface_names: - eth0 - en0 @@ -284,6 +285,7 @@ ingester: tokens_file_path: "" availability_zone: "" unregister_on_shutdown: true + readiness_check_ring_health: true address: 127.0.0.1 port: 0 id: hostname @@ -319,6 +321,7 @@ storage: blocklist_poll_fallback: true blocklist_poll_tenant_index_builders: 2 blocklist_poll_stale_tenant_index: 0s + max_compaction_cycle: 5m0s backend: local local: path: /tmp/tempo/traces @@ -365,8 +368,8 @@ overrides: search_tags_allow_list: null max_traces_per_user: 10000 max_global_traces_per_user: 0 - max_bytes_per_trace: 50000 - max_search_bytes_per_trace: 0 + max_bytes_per_trace: 5000000 + max_search_bytes_per_trace: 50000 block_retention: 0s per_tenant_override_config: "" per_tenant_override_period: 10s diff --git a/docs/tempo/website/configuration/polling.md b/docs/tempo/website/configuration/polling.md index 71870cce229..f7b08384a51 100644 --- a/docs/tempo/website/configuration/polling.md +++ b/docs/tempo/website/configuration/polling.md @@ -30,6 +30,10 @@ storage: # the bucket contents. # Default 0 (disabled). [blocklist_poll_stale_tenant_index: ] + + # The maximum amount of time to spend compacting a single tenant before moving to the next. + # Default is 5m. + [max_compaction_cycle: ] ``` Due to the mechanics of the [tenant index]({{< relref "../operations/polling" >}}) the blocklist will be stale by diff --git a/modules/compactor/compactor.go b/modules/compactor/compactor.go index f0b6a9242c9..019515ce6b2 100644 --- a/modules/compactor/compactor.go +++ b/modules/compactor/compactor.go @@ -19,7 +19,7 @@ import ( ) const ( - waitOnStartup = time.Minute + waitOnStartup = 90 * time.Second ) type Compactor struct { diff --git a/modules/storage/config.go b/modules/storage/config.go index 0f0c1151836..481eccbd625 100644 --- a/modules/storage/config.go +++ b/modules/storage/config.go @@ -28,6 +28,7 @@ func (cfg *Config) RegisterFlagsAndApplyDefaults(prefix string, f *flag.FlagSet) cfg.Trace.BlocklistPollFallback = true cfg.Trace.BlocklistPollConcurrency = tempodb.DefaultBlocklistPollConcurrency cfg.Trace.BlocklistPollTenantIndexBuilders = tempodb.DefaultTenantIndexBuilders + cfg.Trace.MaxCompactionCycle = tempodb.DefaultMaxCompactionCycle f.StringVar(&cfg.Trace.Backend, util.PrefixConfig(prefix, "trace.backend"), "", "Trace backend (s3, azure, gcs, local)") f.DurationVar(&cfg.Trace.BlocklistPoll, util.PrefixConfig(prefix, "trace.blocklist_poll"), tempodb.DefaultBlocklistPoll, "Period at which to run the maintenance cycle.") diff --git a/tempodb/compactor.go b/tempodb/compactor.go index 0c0c0b66d56..05ff1cc83a5 100644 --- a/tempodb/compactor.go +++ b/tempodb/compactor.go @@ -52,7 +52,7 @@ const ( inputBlocks = 2 outputBlocks = 1 - compactionCycle = 30 * time.Second + compactionCycle = 500 * time.Millisecond DefaultFlushSizeBytes uint32 = 30 * 1024 * 1024 // 30 MiB @@ -112,7 +112,7 @@ func (rw *readerWriter) doCompaction() { } // after a maintenance cycle bail out - if start.Add(rw.cfg.BlocklistPoll).Before(time.Now()) { + if start.Add(rw.cfg.MaxCompactionCycle).Before(time.Now()) { level.Info(rw.logger).Log("msg", "compacted blocks for a maintenance cycle, bailing out", "tenantID", tenantID) break } diff --git a/tempodb/config.go b/tempodb/config.go index d9e047f6b30..5c53cfe8940 100644 --- a/tempodb/config.go +++ b/tempodb/config.go @@ -19,6 +19,7 @@ import ( const ( DefaultBlocklistPoll = 5 * time.Minute + DefaultMaxCompactionCycle = 5 * time.Minute DefaultBlocklistPollConcurrency = uint(50) DefaultRetentionConcurrency = uint(10) DefaultTenantIndexBuilders = 2 @@ -36,6 +37,7 @@ type Config struct { BlocklistPollFallback bool `yaml:"blocklist_poll_fallback"` BlocklistPollTenantIndexBuilders int `yaml:"blocklist_poll_tenant_index_builders"` BlocklistPollStaleTenantIndex time.Duration `yaml:"blocklist_poll_stale_tenant_index"` + MaxCompactionCycle time.Duration `yaml:"max_compaction_cycle"` // backends Backend string `yaml:"backend"`