From bdc6462d04a8fba727fbf9381259fa7b1b3d100b Mon Sep 17 00:00:00 2001 From: Hossam Hammady Date: Wed, 29 Apr 2020 13:11:39 +0300 Subject: [PATCH] Separate web from worker metrics and add 2 more (#2) * Report hostname to prometheus as the instance name * Separate worker from web metrics and add 2 more * Deprecate older ruby versions and add new ones in travis --- .travis.yml | 4 ++-- Gemfile.lock | 2 +- README.md | 7 +++++-- lib/scaltainer/runner.rb | 24 ++++++++++++++++++++---- lib/scaltainer/version.rb | 2 +- 5 files changed, 29 insertions(+), 10 deletions(-) diff --git a/.travis.yml b/.travis.yml index 407c098..4b5d050 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,7 @@ language: ruby rvm: - - 2.1 - - 2.2 - 2.3 - 2.4 + - 2.5 + - 2.6 cache: bundler diff --git a/Gemfile.lock b/Gemfile.lock index 11aa2f9..da777ce 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,7 +1,7 @@ PATH remote: . specs: - scaltainer (0.3.0) + scaltainer (0.4.0) docker-api dotenv excon (>= 0.47.0) diff --git a/README.md b/README.md index 4f5da02..70c8bc7 100644 --- a/README.md +++ b/README.md @@ -65,10 +65,13 @@ of the push gateway. For Kubernetes environments the above denotes the gateway s name (`prometheus-pushgateway`), where it is installed in the namespace called `monitoring`. Scaltainer will report the following metrics to Prometheus: -- `rayyan_controller_replicas`: number of replicas scaled (or untouched thereof). +- `scaltainer_web_replicas_total`: number of web replicas scaled (or untouched thereof). This is labeled by the namespace and controller name, both matching the scaltainer configuration file. -- `rayyan_scaltainer_ticks`: iterations scaltainer has performed (if `-w` is used) +- `scaltainer_worker_replicas_total`: Same as above, but for workers +- `scaltainer_web_response_time_seconds`: response times as reported by the web services +- `scaltainer_worker_queue_size_total`: queue sizes as reported by the worker services +- `scaltainer_ticks_total`: iterations scaltainer has performed (if `-w` is used) ## Configuration diff --git a/lib/scaltainer/runner.rb b/lib/scaltainer/runner.rb index a63134d..e6a85f4 100644 --- a/lib/scaltainer/runner.rb +++ b/lib/scaltainer/runner.rb @@ -1,4 +1,5 @@ require "yaml" +require 'socket' require 'prometheus/client' require 'prometheus/client/push' @@ -81,6 +82,8 @@ def process_service(service_name, config, state, namespace, type, metrics) @logger.debug "#{service.type.capitalize} #{service.name} is currently configured for #{current_replicas} replica(s)" metric = metrics[service.name] raise Scaltainer::Warning.new("Configured #{service.type} '#{service.name}' not found in metrics endpoint") unless metric + state["metric"] = metric + state["service_type"] = type.to_s desired_replicas = type.determine_desired_replicas metric, config, current_replicas @logger.debug "Desired number of replicas for #{service.type} #{service.name} is #{desired_replicas}" adjusted_replicas = type.adjust_desired_replicas(desired_replicas, config) @@ -121,16 +124,29 @@ def scale_out(service, current_replicas, desired_replicas) def register_pushgateway(pushgateway) @registry = Prometheus::Client.registry - @replicas_gauge = @registry.gauge(:rayyan_controller_replicas, docstring: 'Rayyan replicas', labels: [:controller, :namespace]) - @ticks_counter = @registry.counter(:rayyan_scaltainer_ticks, docstring: 'Rayyan Scaltainer ticks', labels: [:namespace]) + @web_replicas_gauge = @registry.gauge(:scaltainer_web_replicas_total, docstring: 'Scaltainer controller replicas for web services', labels: [:controller, :namespace]) + @worker_replicas_gauge = @registry.gauge(:scaltainer_worker_replicas_total, docstring: 'Scaltainer controller replicas for worker services', labels: [:controller, :namespace]) + @web_metrics_gauge = @registry.gauge(:scaltainer_web_response_time_seconds, docstring: 'Scaltainer controller response time metric in seconds', labels: [:controller, :namespace]) + @worker_metrics_gauge = @registry.gauge(:scaltainer_worker_queue_size_total, docstring: 'Scaltainer controller queue size metric', labels: [:controller, :namespace]) + @ticks_counter = @registry.counter(:scaltainer_ticks_total, docstring: 'Scaltainer ticks', labels: [:namespace]) - @pushgateway = Prometheus::Client::Push.new("scaltainer", "scaltainer", "http://#{pushgateway}") + @pushgateway = Prometheus::Client::Push.new("scaltainer", Socket.gethostname, "http://#{pushgateway}") end def sync_pushgateway(namespace, state) @logger.debug("Now syncing state #{state} in namespace #{namespace}") + factor = 1 state.each do |service, state| - @replicas_gauge.set(state["replicas"], labels: {namespace: namespace, controller: service}) if state["replicas"] + if state["service_type"] == 'Web' + replicas_gauge = @web_replicas_gauge + metrics_gauge = @web_metrics_gauge + factor = 0.001 + else + replicas_gauge = @worker_replicas_gauge + metrics_gauge = @worker_metrics_gauge + end + replicas_gauge.set(state["replicas"], labels: {namespace: namespace, controller: service}) + metrics_gauge.set(state["metric"] * factor, labels: {namespace: namespace, controller: service}) end @ticks_counter.increment(labels: {namespace: namespace}) begin diff --git a/lib/scaltainer/version.rb b/lib/scaltainer/version.rb index dffba47..a5c8dc2 100644 --- a/lib/scaltainer/version.rb +++ b/lib/scaltainer/version.rb @@ -1,3 +1,3 @@ module Scaltainer - VERSION = "0.3.0" + VERSION = "0.4.0" end