Skip to content

Commit

Permalink
feat: add generic prometheus endpoints (#209)
Browse files Browse the repository at this point in the history
- feat: add execution and additional metrics jobs to prometheus service
- feat: add beacon metrics gazer to prometheus
- fix: ignore not defined metrics info
  • Loading branch information
cbermudez97 authored Sep 27, 2023
1 parent 251b34f commit d04e85f
Show file tree
Hide file tree
Showing 4 changed files with 168 additions and 51 deletions.
57 changes: 34 additions & 23 deletions main.star
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def run(plan, args={}):
prometheus_config_template = read_file(
static_files.PROMETHEUS_CONFIG_TEMPLATE_FILEPATH
)
prometheus_additional_metrics_jobs = []

plan.print("Read the prometheus, grafana templates")

Expand Down Expand Up @@ -229,6 +230,7 @@ def run(plan, args={}):

if not args_with_right_defaults.launch_additional_services:
return
launch_prometheus_grafana = False
for additional_service in args_with_right_defaults.additional_services:
if additional_service == "tx_spammer":
plan.print("Launching transaction spammer")
Expand Down Expand Up @@ -283,12 +285,18 @@ def run(plan, args={}):
beacon_metrics_gazer_config_template = read_file(
static_files.BEACON_METRICS_GAZER_CONFIG_TEMPLATE_FILEPATH
)
beacon_metrics_gazer.launch_beacon_metrics_gazer(
plan,
beacon_metrics_gazer_config_template,
all_cl_client_contexts,
args_with_right_defaults.participants,
network_params,
beacon_metrics_gazer_prometheus_metrics_job = (
beacon_metrics_gazer.launch_beacon_metrics_gazer(
plan,
beacon_metrics_gazer_config_template,
all_cl_client_contexts,
args_with_right_defaults.participants,
network_params,
)
)
launch_prometheus_grafana = True
prometheus_additional_metrics_jobs.append(
beacon_metrics_gazer_prometheus_metrics_job
)
plan.print("Succesfully launched beacon metrics gazer")
elif additional_service == "light_beaconchain_explorer":
Expand All @@ -301,25 +309,28 @@ def run(plan, args={}):
)
plan.print("Succesfully light-beaconchain-explorer")
elif additional_service == "prometheus_grafana":
plan.print("Launching prometheus...")
prometheus_private_url = prometheus.launch_prometheus(
plan,
prometheus_config_template,
all_cl_client_contexts,
all_el_client_contexts,
)
plan.print("Successfully launched Prometheus")

plan.print("Launching grafana...")
grafana.launch_grafana(
plan,
grafana_datasource_config_template,
grafana_dashboards_config_template,
prometheus_private_url,
)
plan.print("Succesfully launched grafana")
# Allow prometheus to be launched last so is able to collect metrics from other services
launch_prometheus_grafana = True
else:
fail("Invalid additional service %s" % (additional_service))
if launch_prometheus_grafana:
plan.print("Launching prometheus...")
prometheus_private_url = prometheus.launch_prometheus(
plan,
prometheus_config_template,
all_el_client_contexts,
all_cl_client_contexts,
prometheus_additional_metrics_jobs,
)

plan.print("Launching grafana...")
grafana.launch_grafana(
plan,
grafana_datasource_config_template,
grafana_dashboards_config_template,
prometheus_private_url,
)
plan.print("Succesfully launched grafana")

if args_with_right_defaults.wait_for_finalization:
plan.print("Waiting for the first finalized epoch")
Expand Down
18 changes: 17 additions & 1 deletion src/beacon_metrics_gazer/beacon_metrics_gazer_launcher.star
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
shared_utils = import_module(
"github.com/kurtosis-tech/ethereum-package/src/shared_utils/shared_utils.star"
)
prometheus = import_module(
"github.com/kurtosis-tech/ethereum-package/src/prometheus/prometheus_launcher.star"
)


SERVICE_NAME = "beacon-metrics-gazer"
Expand All @@ -9,6 +12,8 @@ IMAGE_NAME = "ethpandaops/beacon-metrics-gazer:master"
HTTP_PORT_ID = "http"
HTTP_PORT_NUMBER = 8080

METRICS_PATH = "/metrics"

BEACON_METRICS_GAZER_CONFIG_FILENAME = "validator-ranges.yaml"

BEACON_METRICS_GAZER_CONFIG_MOUNT_DIRPATH_ON_SERVICE = "/config"
Expand Down Expand Up @@ -59,7 +64,18 @@ def launch_beacon_metrics_gazer(
cl_client_contexts[0].http_port_num,
)

plan.add_service(SERVICE_NAME, config)
beacon_metrics_gazer_service = plan.add_service(SERVICE_NAME, config)

return prometheus.new_metrics_job(
job_name=SERVICE_NAME,
endpoint="{0}:{1}".format(
beacon_metrics_gazer_service.ip_address, HTTP_PORT_NUMBER
),
metrics_path=METRICS_PATH,
labels={
"service": SERVICE_NAME,
},
)


def get_config(config_files_artifact_name, ip_addr, http_port_num):
Expand Down
117 changes: 104 additions & 13 deletions src/prometheus/prometheus_launcher.star
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@ shared_utils = import_module(

SERVICE_NAME = "prometheus"

EXECUTION_CLIENT_TYPE = "execution"
BEACON_CLIENT_TYPE = "beacon"
VALIDATOR_CLIENT_TYPE = "validator"

METRICS_INFO_NAME_KEY = "name"
METRICS_INFO_URL_KEY = "url"
METRICS_INFO_PATH_KEY = "path"

# TODO(old) I'm not sure if we should use latest version or ping an specific version instead
IMAGE_NAME = "prom/prometheus:latest"

Expand All @@ -22,17 +30,18 @@ USED_PORTS = {
}


def launch_prometheus(plan, config_template, cl_client_contexts, el_client_contexts):
all_nodes_metrics_info = []
for client in cl_client_contexts:
all_nodes_metrics_info.extend(client.cl_nodes_metrics_info)

for client in el_client_contexts:
# etheruemjs doesn't populate metrics just yet
if client.el_metrics_info != [None]:
all_nodes_metrics_info.extend(client.el_metrics_info)

template_data = new_config_template_data(all_nodes_metrics_info)
def launch_prometheus(
plan,
config_template,
el_client_contexts,
cl_client_contexts,
additional_metrics_jobs,
):
template_data = new_config_template_data(
el_client_contexts,
cl_client_contexts,
additional_metrics_jobs,
)
template_and_data = shared_utils.new_template_and_data(
config_template, template_data
)
Expand Down Expand Up @@ -75,5 +84,87 @@ def get_config(config_files_artifact_name):
)


def new_config_template_data(cl_nodes_metrics_info):
return {"CLNodesMetricsInfo": cl_nodes_metrics_info}
def new_config_template_data(
el_client_contexts,
cl_client_contexts,
additional_metrics_jobs,
):
metrics_jobs = []
# Adding execution clients metrics jobs
for context in el_client_contexts:
if len(context.el_metrics_info) >= 1 and context.el_metrics_info[0] != None:
execution_metrics_info = context.el_metrics_info[0]
metrics_jobs.append(
new_metrics_job(
job_name=execution_metrics_info[METRICS_INFO_NAME_KEY],
endpoint=execution_metrics_info[METRICS_INFO_URL_KEY],
metrics_path=execution_metrics_info[METRICS_INFO_PATH_KEY],
labels={
"service": context.service_name,
"client_type": EXECUTION_CLIENT_TYPE,
"client_name": context.client_name,
},
)
)
# Adding consensus clients metrics jobs
for context in cl_client_contexts:
if (
len(context.cl_nodes_metrics_info) >= 1
and context.cl_nodes_metrics_info[0] != None
):
# Adding beacon node metrics
beacon_metrics_info = context.cl_nodes_metrics_info[0]
metrics_jobs.append(
new_metrics_job(
job_name=beacon_metrics_info[METRICS_INFO_NAME_KEY],
endpoint=beacon_metrics_info[METRICS_INFO_URL_KEY],
metrics_path=beacon_metrics_info[METRICS_INFO_PATH_KEY],
labels={
"service": context.beacon_service_name,
"client_type": BEACON_CLIENT_TYPE,
"client_name": context.client_name,
},
)
)
if (
len(context.cl_nodes_metrics_info) >= 2
and context.cl_nodes_metrics_info[1] != None
):
# Adding validator node metrics
validator_metrics_info = context.cl_nodes_metrics_info[1]
metrics_jobs.append(
new_metrics_job(
job_name=validator_metrics_info[METRICS_INFO_NAME_KEY],
endpoint=validator_metrics_info[METRICS_INFO_URL_KEY],
metrics_path=validator_metrics_info[METRICS_INFO_PATH_KEY],
labels={
"service": context.validator_service_name,
"client_type": VALIDATOR_CLIENT_TYPE,
"client_name": context.client_name,
},
)
)
# Adding additional metrics jobs
for job in additional_metrics_jobs:
if job == None:
continue
metrics_jobs.append(job)
return {
"MetricsJobs": metrics_jobs,
}


def new_metrics_job(
job_name,
endpoint,
metrics_path,
labels,
scrape_interval="15s",
):
return {
"Name": job_name,
"Endpoint": endpoint,
"MetricsPath": metrics_path,
"Labels": labels,
"ScrapeInterval": scrape_interval,
}
27 changes: 13 additions & 14 deletions static_files/prometheus-config/prometheus.yml.tmpl
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
global:
scrape_interval: 15s # By default, scrape targets every 15 seconds.

# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_interval: 15s
scrape_configs:
{{ range $clNode := .CLNodesMetricsInfo }}
- job_name: '{{ $clNode.name }}'
metrics_path: {{ $clNode.path }}
static_configs:
- targets: ['{{ $clNode.url }}']
{{ end }}
- job_name: 'beacon-metrics-gazer'
metrics_path: '/metrics'
static_configs:
- targets: ['beacon-metrics-gazer:8080']
{{- range $job := .MetricsJobs }}
- job_name: "{{ $job.Name }}"
metrics_path: "{{ $job.MetricsPath }}"
{{- if $job.ScrapeInterval }}
scrape_interval: {{ $job.ScrapeInterval }}
{{- end }}
static_configs:
- targets: ['{{ $job.Endpoint }}']
labels:{{ range $labelName, $labelValue := $job.Labels }}
{{ $labelName }}: "{{ $labelValue }}"
{{- end }}
{{- end }}

0 comments on commit d04e85f

Please sign in to comment.