diff --git a/.rubocop.yml b/.rubocop.yml index ef30f9c..a503792 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -74,3 +74,6 @@ Style/ExplicitBlockArgument: Gemspec/RequiredRubyVersion: Enabled: false + +Metrics/ModuleLength: + Max: 200 diff --git a/README.md b/README.md index 215c77d..831990f 100644 --- a/README.md +++ b/README.md @@ -99,14 +99,15 @@ end Configuration is handled by [anyway_config] gem. With it you can load settings from environment variables (upcased and prefixed with `YABEDA_SIDEKIQ_`), YAML files, and other sources. See [anyway_config] docs for details. -Config key | Type | Default | Description | -------------------------- | -------- | ------------------------------------------------------- |----------------------------------------------------------------------------------------------------------------------------------------------------| -`collect_cluster_metrics` | boolean | Enabled in Sidekiq worker processes, disabled otherwise | Defines whether this Ruby process should collect and expose metrics representing state of the whole Sidekiq installation (queues, processes, etc). | -`declare_process_metrics` | boolean | Enabled in Sidekiq worker processes, disabled otherwise | Declare metrics that are only tracked inside worker process even outside of them. Useful for multiprocess metric collection. | +Config key | Type | Default | Description | +---------------------------- | -------- | ------------------------------------------------------- |----------------------------------------------------------------------------------------------------------------------------------------------------| +`collect_cluster_metrics` | boolean | Enabled in Sidekiq worker processes, disabled otherwise | Defines whether this Ruby process should collect and expose metrics representing state of the whole Sidekiq installation (queues, processes, etc). | +`declare_process_metrics` | boolean | Enabled in Sidekiq worker processes, disabled otherwise | Declare metrics that are only tracked inside worker process even outside of them. Useful for multiprocess metric collection. | +`retries_segmented_by_queue` | boolean | Disabled | Defines wheter retries are segemented by queue or reported as a single metric | # Roadmap (TODO or Help wanted) - - Implement optional segmentation of retry/schedule/dead sets + - Implement optional segmentation of schedule/dead sets It should be disabled by default as it requires to iterate over all jobs in sets and may be very slow on large sets. diff --git a/lib/yabeda/sidekiq.rb b/lib/yabeda/sidekiq.rb index b5b577b..9138009 100644 --- a/lib/yabeda/sidekiq.rb +++ b/lib/yabeda/sidekiq.rb @@ -49,13 +49,16 @@ def self.config # Metrics not specific for current Sidekiq process, but representing state of the whole Sidekiq installation (queues, processes, etc) # You can opt-out from collecting these by setting YABEDA_SIDEKIQ_COLLECT_CLUSTER_METRICS to falsy value (+no+ or +false+) if config.collect_cluster_metrics # defaults to +::Sidekiq.server?+ - gauge :jobs_waiting_count, tags: %i[queue], aggregation: :most_recent, comment: "The number of jobs waiting to process in sidekiq." - gauge :active_workers_count, tags: [], aggregation: :most_recent, comment: "The number of currently running machines with sidekiq workers." - gauge :jobs_scheduled_count, tags: [], aggregation: :most_recent, comment: "The number of jobs scheduled for later execution." - gauge :jobs_retry_count, tags: [], aggregation: :most_recent, comment: "The number of failed jobs waiting to be retried" - gauge :jobs_dead_count, tags: [], aggregation: :most_recent, comment: "The number of jobs exceeded their retry count." - gauge :active_processes, tags: [], aggregation: :most_recent, comment: "The number of active Sidekiq worker processes." - gauge :queue_latency, tags: %i[queue], aggregation: :most_recent, + retry_count_tags = config.retries_segmented_by_queue ? %i[queue] : [] + + gauge :jobs_waiting_count, tags: %i[queue], aggregation: :most_recent, comment: "The number of jobs waiting to process in sidekiq." + gauge :active_workers_count, tags: [], aggregation: :most_recent, + comment: "The number of currently running machines with sidekiq workers." + gauge :jobs_scheduled_count, tags: [], aggregation: :most_recent, comment: "The number of jobs scheduled for later execution." + gauge :jobs_retry_count, tags: retry_count_tags, aggregation: :most_recent, comment: "The number of failed jobs waiting to be retried" + gauge :jobs_dead_count, tags: [], aggregation: :most_recent, comment: "The number of jobs exceeded their retry count." + gauge :active_processes, tags: [], aggregation: :most_recent, comment: "The number of active Sidekiq worker processes." + gauge :queue_latency, tags: %i[queue], aggregation: :most_recent, comment: "The queue latency, the difference in seconds since the oldest job in the queue was enqueued" end @@ -73,21 +76,22 @@ def self.config sidekiq_jobs_scheduled_count.set({}, stats.scheduled_size) sidekiq_jobs_dead_count.set({}, stats.dead_size) sidekiq_active_processes.set({}, stats.processes_size) - sidekiq_jobs_retry_count.set({}, stats.retry_size) ::Sidekiq::Queue.all.each do |queue| sidekiq_queue_latency.set({ queue: queue.name }, queue.latency) end - # That is quite slow if your retry set is large - # I don't want to enable it by default - # retries_by_queues = - # ::Sidekiq::RetrySet.new.each_with_object(Hash.new(0)) do |job, cntr| - # cntr[job["queue"]] += 1 - # end - # retries_by_queues.each do |queue, count| - # sidekiq_jobs_retry_count.set({ queue: queue }, count) - # end + if config.retries_segmented_by_queue + retries_by_queues = + ::Sidekiq::RetrySet.new.each_with_object(Hash.new(0)) do |job, cntr| + cntr[job["queue"]] += 1 + end + retries_by_queues.each do |queue, count| + sidekiq_jobs_retry_count.set({ queue: queue }, count) + end + else + sidekiq_jobs_retry_count.set({}, stats.retry_size) + end end end diff --git a/lib/yabeda/sidekiq/config.rb b/lib/yabeda/sidekiq/config.rb index 758c576..f1ae183 100644 --- a/lib/yabeda/sidekiq/config.rb +++ b/lib/yabeda/sidekiq/config.rb @@ -16,6 +16,10 @@ class Config < ::Anyway::Config # Declare metrics that are only tracked inside worker process even outside them attr_config declare_process_metrics: ::Sidekiq.server? + + # Retries are tracked by default as a single metric. If you want to track them separately for each queue, set this to +true+ + # Disabled by default because it is quite slow if the retry set is large + attr_config retries_segmented_by_queue: false end end end