Skip to content

Commit

Permalink
Allow capture of error class into tag when job fails (#34)
Browse files Browse the repository at this point in the history
  • Loading branch information
niborg authored Mar 7, 2024
1 parent 5701069 commit 6cc2511
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 5 deletions.
4 changes: 3 additions & 1 deletion lib/yabeda/sidekiq.rb
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,11 @@ def self.config
counter :jobs_rerouted_total, tags: %i[from_queue to_queue worker], comment: "A counter of the total number of rerouted jobs sidekiq enqueued."

if config.declare_process_metrics # defaults to +::Sidekiq.server?+
failed_total_tags = config.label_for_error_class_on_sidekiq_jobs_failed ? %i[queue worker error] : %i[queue worker]

counter :jobs_executed_total, tags: %i[queue worker], comment: "A counter of the total number of jobs sidekiq executed."
counter :jobs_success_total, tags: %i[queue worker], comment: "A counter of the total number of jobs successfully processed by sidekiq."
counter :jobs_failed_total, tags: %i[queue worker], comment: "A counter of the total number of jobs failed in sidekiq."
counter :jobs_failed_total, tags: failed_total_tags, comment: "A counter of the total number of jobs failed in sidekiq."

gauge :running_job_runtime, tags: %i[queue worker], aggregation: :max, unit: :seconds,
comment: "How long currently running jobs are running (useful for detection of hung jobs)"
Expand Down
3 changes: 3 additions & 0 deletions lib/yabeda/sidekiq/config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ class Config < ::Anyway::Config
# Retries are tracked by default as a single metric. If you want to track them separately for each queue, set this to +true+
# Disabled by default because it is quite slow if the retry set is large
attr_config retries_segmented_by_queue: false

# If set to true, an `:error` label will be added with name of the error class to all failed jobs
attr_config label_for_error_class_on_sidekiq_jobs_failed: false
end
end
end
6 changes: 4 additions & 2 deletions lib/yabeda/sidekiq/server_middleware.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@ def call(worker, job, queue)
yield
end
Yabeda.sidekiq_jobs_success_total.increment(labels)
rescue Exception # rubocop: disable Lint/RescueException
Yabeda.sidekiq_jobs_failed_total.increment(labels)
rescue Exception => e # rubocop: disable Lint/RescueException
jobs_failed_labels = labels.dup
jobs_failed_labels[:error] = e.class.name if Yabeda::Sidekiq.config.label_for_error_class_on_sidekiq_jobs_failed
Yabeda.sidekiq_jobs_failed_total.increment(jobs_failed_labels)
raise
ensure
Yabeda.sidekiq_job_runtime.measure(labels, elapsed(start))
Expand Down
8 changes: 6 additions & 2 deletions spec/support/jobs.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,10 @@ def yabeda_tags
class FailingPlainJob
include Sidekiq::Worker

SpecialError = Class.new(StandardError)

def perform(*_args)
raise "Badaboom"
raise SpecialError, "Badaboom"
end
end

Expand All @@ -47,9 +49,11 @@ def perform(*_args)
end

class FailingActiveJob < ActiveJob::Base
SpecialError = Class.new(StandardError)

self.queue_adapter = :Sidekiq
def perform(*_args)
raise "Boom"
raise SpecialError, "Boom"
end
end

Expand Down
71 changes: 71 additions & 0 deletions spec/yabeda/sidekiq_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,51 @@
)
end

context "when label_for_error_class_on_sidekiq_jobs_failed is set to true" do
around do |example|
old_value = described_class.config.label_for_error_class_on_sidekiq_jobs_failed
described_class.config.label_for_error_class_on_sidekiq_jobs_failed = true

example.run

described_class.config.label_for_error_class_on_sidekiq_jobs_failed = old_value
end

it "counts failed total and executed total with correct labels", sidekiq: :inline do
expect do
SamplePlainJob.perform_async
SamplePlainJob.perform_async
begin
FailingPlainJob.perform_async
rescue StandardError
nil
end
end.to \
increment_yabeda_counter(Yabeda.sidekiq.jobs_failed_total).with(
{ queue: "default", worker: "FailingPlainJob", error: "FailingPlainJob::SpecialError" } => 1,
).and \
increment_yabeda_counter(Yabeda.sidekiq.jobs_executed_total).with(
{ queue: "default", worker: "SamplePlainJob" } => 2,
{ queue: "default", worker: "FailingPlainJob" } => 1,
)
end

it "does not add jobs_failed_total error label to labels used for jobs_executed_total", sidekiq: :inline do
expect do
SamplePlainJob.perform_async
SamplePlainJob.perform_async
begin
FailingPlainJob.perform_async
rescue StandardError
nil
end
end.not_to \
increment_yabeda_counter(Yabeda.sidekiq.jobs_executed_total).with(
{ queue: "default", worker: "FailingPlainJob", error: "FailingPlainJob::SpecialError" } => 1,
)
end
end

describe "re-routing jobs by middleware" do
around do |example|
add_reroute_jobs_middleware
Expand Down Expand Up @@ -100,6 +145,32 @@
end
end

context "when label_for_error_class_on_sidekiq_jobs_failed is set to true" do
around do |example|
old_value = described_class.config.label_for_error_class_on_sidekiq_jobs_failed
described_class.config.label_for_error_class_on_sidekiq_jobs_failed = true

example.run

described_class.config.label_for_error_class_on_sidekiq_jobs_failed = old_value
end

it "counts enqueues and uses the default label for the error class", sidekiq: :inline do
expect do
SampleActiveJob.perform_later
SampleActiveJob.perform_later
begin
FailingActiveJob.perform_later
rescue StandardError
nil
end
end.to \
increment_yabeda_counter(Yabeda.sidekiq.jobs_failed_total).with(
{ queue: "default", worker: "FailingActiveJob", error: "FailingActiveJob::SpecialError" } => 1,
)
end
end

it "measures runtime", sidekiq: :inline do
expect do
SampleActiveJob.perform_later
Expand Down

0 comments on commit 6cc2511

Please sign in to comment.