From 301dbec7f86953dadce4f7f4323d468f33906932 Mon Sep 17 00:00:00 2001 From: Saransh Shankar <103821431+Wise-Wizard@users.noreply.github.com> Date: Sun, 30 Jun 2024 22:15:00 +0530 Subject: [PATCH] Create metrics.Factory adapter for OTEL Metrics (#5661) **Which problem is this PR solving?** This PR addresses a part of the issue [#5633 ](https://github.com/jaegertracing/jaeger/issues/5633) **Description of the changes** This is a Draft PR to bridge the OTEL Metrics instead of using Internal Metrics to minimize code changes. **How was this change tested?** The changes were tested by running the following command: ```bash make test ``` **Checklist** - [x] I have read [CONTRIBUTING_GUIDELINES.md](https://github.com/jaegertracing/jaeger/blob/master/CONTRIBUTING_GUIDELINES.md) - [x] I have signed all commits - [x] I have added unit tests for the new functionality - [x] I have run lint and test steps successfully - `for jaeger: make lint test` - `for jaeger-ui: yarn lint` and `yarn test` --------- Signed-off-by: Wise-Wizard Signed-off-by: Saransh Shankar <103821431+Wise-Wizard@users.noreply.github.com> Signed-off-by: Yuri Shkuro Co-authored-by: Yuri Shkuro Co-authored-by: Yuri Shkuro --- internal/metrics/benchmark_test.go | 113 ++++++++ internal/metrics/otelmetrics/counter.go | 20 ++ internal/metrics/otelmetrics/factory.go | 133 ++++++++++ internal/metrics/otelmetrics/factory_test.go | 256 +++++++++++++++++++ internal/metrics/otelmetrics/gauge.go | 20 ++ internal/metrics/otelmetrics/histogram.go | 20 ++ internal/metrics/otelmetrics/timer.go | 21 ++ 7 files changed, 583 insertions(+) create mode 100644 internal/metrics/benchmark_test.go create mode 100644 internal/metrics/otelmetrics/counter.go create mode 100644 internal/metrics/otelmetrics/factory.go create mode 100644 internal/metrics/otelmetrics/factory_test.go create mode 100644 internal/metrics/otelmetrics/gauge.go create mode 100644 internal/metrics/otelmetrics/histogram.go create mode 100644 internal/metrics/otelmetrics/timer.go diff --git a/internal/metrics/benchmark_test.go b/internal/metrics/benchmark_test.go new file mode 100644 index 00000000000..62c322c0b91 --- /dev/null +++ b/internal/metrics/benchmark_test.go @@ -0,0 +1,113 @@ +// Copyright (c) 2024 The Jaeger Authors. +// SPDX-License-Identifier: Apache-2.0 + +package benchmark_test + +import ( + "testing" + + "github.com/prometheus/client_golang/prometheus" + "github.com/stretchr/testify/require" + promExporter "go.opentelemetry.io/otel/exporters/prometheus" + "go.opentelemetry.io/otel/sdk/metric" + + "github.com/jaegertracing/jaeger/internal/metrics/otelmetrics" + prom "github.com/jaegertracing/jaeger/internal/metrics/prometheus" + "github.com/jaegertracing/jaeger/pkg/metrics" + "github.com/jaegertracing/jaeger/pkg/testutils" +) + +func TestMain(m *testing.M) { + testutils.VerifyGoLeaks(m) +} + +func setupPrometheusFactory() metrics.Factory { + reg := prometheus.NewRegistry() + return prom.New(prom.WithRegisterer(reg)) +} + +func setupOTELFactory(b *testing.B) metrics.Factory { + registry := prometheus.NewRegistry() + exporter, err := promExporter.New(promExporter.WithRegisterer(registry)) + require.NoError(b, err) + meterProvider := metric.NewMeterProvider( + metric.WithReader(exporter), + ) + return otelmetrics.NewFactory(meterProvider) +} + +func benchmarkCounter(b *testing.B, factory metrics.Factory) { + counter := factory.Counter(metrics.Options{ + Name: "test_counter", + Tags: map[string]string{"tag1": "value1"}, + }) + + for i := 0; i < b.N; i++ { + counter.Inc(1) + } +} + +func benchmarkGauge(b *testing.B, factory metrics.Factory) { + gauge := factory.Gauge(metrics.Options{ + Name: "test_gauge", + Tags: map[string]string{"tag1": "value1"}, + }) + + for i := 0; i < b.N; i++ { + gauge.Update(1) + } +} + +func benchmarkTimer(b *testing.B, factory metrics.Factory) { + timer := factory.Timer(metrics.TimerOptions{ + Name: "test_timer", + Tags: map[string]string{"tag1": "value1"}, + }) + + for i := 0; i < b.N; i++ { + timer.Record(100) + } +} + +func benchmarkHistogram(b *testing.B, factory metrics.Factory) { + histogram := factory.Histogram(metrics.HistogramOptions{ + Name: "test_histogram", + Tags: map[string]string{"tag1": "value1"}, + }) + + for i := 0; i < b.N; i++ { + histogram.Record(1.0) + } +} + +func BenchmarkPrometheusCounter(b *testing.B) { + benchmarkCounter(b, setupPrometheusFactory()) +} + +func BenchmarkOTELCounter(b *testing.B) { + benchmarkCounter(b, setupOTELFactory(b)) +} + +func BenchmarkPrometheusGauge(b *testing.B) { + benchmarkGauge(b, setupPrometheusFactory()) +} + +func BenchmarkOTELGauge(b *testing.B) { + benchmarkGauge(b, setupOTELFactory(b)) +} + +func BenchmarkPrometheusTimer(b *testing.B) { + benchmarkTimer(b, setupPrometheusFactory()) +} + +func BenchmarkOTELTimer(b *testing.B) { + benchmarkTimer(b, setupOTELFactory(b)) +} + +func BenchmarkPrometheusHistogram(b *testing.B) { + benchmarkHistogram(b, setupPrometheusFactory()) +} + +func BenchmarkOTELHistogram(b *testing.B) { + benchmarkHistogram(b, setupOTELFactory(b)) +} diff --git a/internal/metrics/otelmetrics/counter.go b/internal/metrics/otelmetrics/counter.go new file mode 100644 index 00000000000..437442306f3 --- /dev/null +++ b/internal/metrics/otelmetrics/counter.go @@ -0,0 +1,20 @@ +// Copyright (c) 2024 The Jaeger Authors. +// SPDX-License-Identifier: Apache-2.0 + +package otelmetrics + +import ( + "context" + + "go.opentelemetry.io/otel/metric" +) + +type otelCounter struct { + counter metric.Int64Counter + fixedCtx context.Context + option metric.AddOption +} + +func (c *otelCounter) Inc(value int64) { + c.counter.Add(c.fixedCtx, value, c.option) +} diff --git a/internal/metrics/otelmetrics/factory.go b/internal/metrics/otelmetrics/factory.go new file mode 100644 index 00000000000..d14e60c616c --- /dev/null +++ b/internal/metrics/otelmetrics/factory.go @@ -0,0 +1,133 @@ +// Copyright (c) 2024 The Jaeger Authors. +// SPDX-License-Identifier: Apache-2.0 + +package otelmetrics + +import ( + "context" + "log" + "strings" + + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" + + "github.com/jaegertracing/jaeger/pkg/metrics" +) + +type otelFactory struct { + meter metric.Meter + scope string + separator string + normalizer *strings.Replacer + tags map[string]string +} + +func NewFactory(meterProvider metric.MeterProvider) metrics.Factory { + return &otelFactory{ + meter: meterProvider.Meter("jaeger-v2"), + separator: ".", + normalizer: strings.NewReplacer(" ", "_", ".", "_", "-", "_"), + tags: make(map[string]string), + } +} + +func (f *otelFactory) Counter(opts metrics.Options) metrics.Counter { + name := f.subScope(opts.Name) + counter, err := f.meter.Int64Counter(name) + if err != nil { + log.Printf("Error creating OTEL counter: %v", err) + return metrics.NullCounter + } + return &otelCounter{ + counter: counter, + fixedCtx: context.Background(), + option: attributeSetOption(f.mergeTags(opts.Tags)), + } +} + +func (f *otelFactory) Gauge(opts metrics.Options) metrics.Gauge { + name := f.subScope(opts.Name) + gauge, err := f.meter.Int64Gauge(name) + if err != nil { + log.Printf("Error creating OTEL gauge: %v", err) + return metrics.NullGauge + } + + return &otelGauge{ + gauge: gauge, + fixedCtx: context.Background(), + option: attributeSetOption(f.mergeTags(opts.Tags)), + } +} + +func (f *otelFactory) Histogram(opts metrics.HistogramOptions) metrics.Histogram { + name := f.subScope(opts.Name) + histogram, err := f.meter.Float64Histogram(name) + if err != nil { + log.Printf("Error creating OTEL histogram: %v", err) + return metrics.NullHistogram + } + + return &otelHistogram{ + histogram: histogram, + fixedCtx: context.Background(), + option: attributeSetOption(f.mergeTags(opts.Tags)), + } +} + +func (f *otelFactory) Timer(opts metrics.TimerOptions) metrics.Timer { + name := f.subScope(opts.Name) + timer, err := f.meter.Float64Histogram(name, metric.WithUnit("s")) + if err != nil { + log.Printf("Error creating OTEL timer: %v", err) + return metrics.NullTimer + } + return &otelTimer{ + histogram: timer, + fixedCtx: context.Background(), + option: attributeSetOption(f.mergeTags(opts.Tags)), + } +} + +func (f *otelFactory) Namespace(opts metrics.NSOptions) metrics.Factory { + return &otelFactory{ + meter: f.meter, + scope: f.subScope(opts.Name), + separator: f.separator, + normalizer: f.normalizer, + tags: f.mergeTags(opts.Tags), + } +} + +func (f *otelFactory) subScope(name string) string { + if f.scope == "" { + return f.normalize(name) + } + if name == "" { + return f.normalize(f.scope) + } + return f.normalize(f.scope + f.separator + name) +} + +func (f *otelFactory) normalize(v string) string { + return f.normalizer.Replace(v) +} + +func (f *otelFactory) mergeTags(tags map[string]string) map[string]string { + merged := make(map[string]string) + for k, v := range f.tags { + merged[k] = v + } + for k, v := range tags { + merged[k] = v + } + return merged +} + +func attributeSetOption(tags map[string]string) metric.MeasurementOption { + attributes := make([]attribute.KeyValue, 0, len(tags)) + for k, v := range tags { + attributes = append(attributes, attribute.String(k, v)) + } + return metric.WithAttributes(attributes...) +} diff --git a/internal/metrics/otelmetrics/factory_test.go b/internal/metrics/otelmetrics/factory_test.go new file mode 100644 index 00000000000..ade7cbf8064 --- /dev/null +++ b/internal/metrics/otelmetrics/factory_test.go @@ -0,0 +1,256 @@ +// Copyright (c) 2024 The Jaeger Authors. +// SPDX-License-Identifier: Apache-2.0 + +package otelmetrics_test + +import ( + "testing" + "time" + + promReg "github.com/prometheus/client_golang/prometheus" + promModel "github.com/prometheus/client_model/go" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/otel/exporters/prometheus" + sdkmetric "go.opentelemetry.io/otel/sdk/metric" + + "github.com/jaegertracing/jaeger/internal/metrics/otelmetrics" + "github.com/jaegertracing/jaeger/pkg/metrics" + "github.com/jaegertracing/jaeger/pkg/testutils" +) + +func TestMain(m *testing.M) { + testutils.VerifyGoLeaks(m) +} + +func newTestFactory(t *testing.T, registry *promReg.Registry) metrics.Factory { + exporter, err := prometheus.New(prometheus.WithRegisterer(registry), prometheus.WithoutScopeInfo()) + require.NoError(t, err) + meterProvider := sdkmetric.NewMeterProvider(sdkmetric.WithReader(exporter)) + return otelmetrics.NewFactory(meterProvider) +} + +func findMetric(t *testing.T, registry *promReg.Registry, name string) *promModel.MetricFamily { + metricFamilies, err := registry.Gather() + require.NoError(t, err) + + for _, mf := range metricFamilies { + t.Log(mf.GetName()) + if mf.GetName() == name { + return mf + } + } + require.Fail(t, "Expected to find Metric Family") + return nil +} + +func promLabelsToMap(labels []*promModel.LabelPair) map[string]string { + labelMap := make(map[string]string) + for _, label := range labels { + labelMap[label.GetName()] = label.GetValue() + } + return labelMap +} + +func TestInvalidCounter(t *testing.T) { + factory := newTestFactory(t, promReg.NewPedanticRegistry()) + counter := factory.Counter(metrics.Options{ + Name: "invalid*counter%", + }) + assert.Equal(t, counter, metrics.NullCounter, "Expected NullCounter, got %v", counter) +} + +func TestInvalidGauge(t *testing.T) { + factory := newTestFactory(t, promReg.NewPedanticRegistry()) + gauge := factory.Gauge(metrics.Options{ + Name: "#invalid>gauge%", + }) + assert.Equal(t, gauge, metrics.NullGauge, "Expected NullCounter, got %v", gauge) +} + +func TestInvalidHistogram(t *testing.T) { + factory := newTestFactory(t, promReg.NewPedanticRegistry()) + histogram := factory.Histogram(metrics.HistogramOptions{ + Name: "invalid>histogram?%", + }) + assert.Equal(t, histogram, metrics.NullHistogram, "Expected NullCounter, got %v", histogram) +} + +func TestInvalidTimer(t *testing.T) { + factory := newTestFactory(t, promReg.NewPedanticRegistry()) + timer := factory.Timer(metrics.TimerOptions{ + Name: "invalid*<=timer%", + }) + assert.Equal(t, timer, metrics.NullTimer, "Expected NullCounter, got %v", timer) +} + +func TestCounter(t *testing.T) { + registry := promReg.NewPedanticRegistry() + factory := newTestFactory(t, registry) + counter := factory.Counter(metrics.Options{ + Name: "test_counter", + Tags: map[string]string{"tag1": "value1"}, + }) + require.NotNil(t, counter) + counter.Inc(1) + counter.Inc(1) + + testCounter := findMetric(t, registry, "test_counter_total") + metrics := testCounter.GetMetric() + assert.Equal(t, float64(2), metrics[0].GetCounter().GetValue()) + expectedLabels := map[string]string{ + "tag1": "value1", + } + assert.Equal(t, expectedLabels, promLabelsToMap(metrics[0].GetLabel())) +} + +func TestGauge(t *testing.T) { + registry := promReg.NewPedanticRegistry() + factory := newTestFactory(t, registry) + gauge := factory.Gauge(metrics.Options{ + Name: "test_gauge", + Tags: map[string]string{"tag1": "value1"}, + }) + require.NotNil(t, gauge) + gauge.Update(2) + + testGauge := findMetric(t, registry, "test_gauge") + + metrics := testGauge.GetMetric() + assert.Equal(t, float64(2), metrics[0].GetGauge().GetValue()) + expectedLabels := map[string]string{ + "tag1": "value1", + } + assert.Equal(t, expectedLabels, promLabelsToMap(metrics[0].GetLabel())) +} + +func TestHistogram(t *testing.T) { + registry := promReg.NewPedanticRegistry() + factory := newTestFactory(t, registry) + histogram := factory.Histogram(metrics.HistogramOptions{ + Name: "test_histogram", + Tags: map[string]string{"tag1": "value1"}, + }) + require.NotNil(t, histogram) + histogram.Record(1.0) + + testHistogram := findMetric(t, registry, "test_histogram") + + metrics := testHistogram.GetMetric() + assert.Equal(t, float64(1), metrics[0].GetHistogram().GetSampleSum()) + expectedLabels := map[string]string{ + "tag1": "value1", + } + assert.Equal(t, expectedLabels, promLabelsToMap(metrics[0].GetLabel())) +} + +func TestTimer(t *testing.T) { + registry := promReg.NewPedanticRegistry() + factory := newTestFactory(t, registry) + timer := factory.Timer(metrics.TimerOptions{ + Name: "test_timer", + Tags: map[string]string{"tag1": "value1"}, + }) + require.NotNil(t, timer) + timer.Record(100 * time.Millisecond) + + testTimer := findMetric(t, registry, "test_timer_seconds") + + metrics := testTimer.GetMetric() + assert.Equal(t, float64(0.1), metrics[0].GetHistogram().GetSampleSum()) + expectedLabels := map[string]string{ + "tag1": "value1", + } + assert.Equal(t, expectedLabels, promLabelsToMap(metrics[0].GetLabel())) +} + +func TestNamespace(t *testing.T) { + testCases := []struct { + name string + nsOptions1 metrics.NSOptions + nsOptions2 metrics.NSOptions + expectedName string + expectedLabels map[string]string + }{ + { + name: "Nested Namespace", + nsOptions1: metrics.NSOptions{ + Name: "first_namespace", + Tags: map[string]string{"ns_tag1": "ns_value1"}, + }, + nsOptions2: metrics.NSOptions{ + Name: "second_namespace", + Tags: map[string]string{"ns_tag3": "ns_value3"}, + }, + expectedName: "first_namespace_second_namespace_test_counter_total", + expectedLabels: map[string]string{ + "ns_tag1": "ns_value1", + "ns_tag3": "ns_value3", + "tag1": "value1", + }, + }, + { + name: "Single Namespace", + nsOptions1: metrics.NSOptions{ + Name: "single_namespace", + Tags: map[string]string{"ns_tag2": "ns_value2"}, + }, + nsOptions2: metrics.NSOptions{}, + expectedName: "single_namespace_test_counter_total", + expectedLabels: map[string]string{ + "ns_tag2": "ns_value2", + "tag1": "value1", + }, + }, + { + name: "Empty Namespace Name", + nsOptions1: metrics.NSOptions{}, + nsOptions2: metrics.NSOptions{}, + expectedName: "test_counter_total", + expectedLabels: map[string]string{ + "tag1": "value1", + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + registry := promReg.NewPedanticRegistry() + factory := newTestFactory(t, registry) + nsFactory1 := factory.Namespace(tc.nsOptions1) + nsFactory2 := nsFactory1.Namespace(tc.nsOptions2) + + counter := nsFactory2.Counter(metrics.Options{ + Name: "test_counter", + Tags: map[string]string{"tag1": "value1"}, + }) + require.NotNil(t, counter) + counter.Inc(1) + + testCounter := findMetric(t, registry, tc.expectedName) + + metrics := testCounter.GetMetric() + assert.Equal(t, float64(1), metrics[0].GetCounter().GetValue()) + assert.Equal(t, tc.expectedLabels, promLabelsToMap(metrics[0].GetLabel())) + }) + } +} + +func TestNormalization(t *testing.T) { + registry := promReg.NewPedanticRegistry() + factory := newTestFactory(t, registry) + normalizedFactory := factory.Namespace(metrics.NSOptions{ + Name: "My Namespace", + }) + + gauge := normalizedFactory.Gauge(metrics.Options{ + Name: "My Gauge", + }) + require.NotNil(t, gauge) + gauge.Update(1) + + testGauge := findMetric(t, registry, "My_Namespace_My_Gauge") + + metrics := testGauge.GetMetric() + assert.Equal(t, float64(1), metrics[0].GetGauge().GetValue()) +} diff --git a/internal/metrics/otelmetrics/gauge.go b/internal/metrics/otelmetrics/gauge.go new file mode 100644 index 00000000000..dc5c4f38428 --- /dev/null +++ b/internal/metrics/otelmetrics/gauge.go @@ -0,0 +1,20 @@ +// Copyright (c) 2024 The Jaeger Authors. +// SPDX-License-Identifier: Apache-2.0 + +package otelmetrics + +import ( + "context" + + "go.opentelemetry.io/otel/metric" +) + +type otelGauge struct { + gauge metric.Int64Gauge + fixedCtx context.Context + option metric.RecordOption +} + +func (g *otelGauge) Update(value int64) { + g.gauge.Record(g.fixedCtx, value, g.option) +} diff --git a/internal/metrics/otelmetrics/histogram.go b/internal/metrics/otelmetrics/histogram.go new file mode 100644 index 00000000000..b408b47dd7c --- /dev/null +++ b/internal/metrics/otelmetrics/histogram.go @@ -0,0 +1,20 @@ +// Copyright (c) 2024 The Jaeger Authors. +// SPDX-License-Identifier: Apache-2.0 + +package otelmetrics + +import ( + "context" + + "go.opentelemetry.io/otel/metric" +) + +type otelHistogram struct { + histogram metric.Float64Histogram + fixedCtx context.Context + option metric.RecordOption +} + +func (h *otelHistogram) Record(value float64) { + h.histogram.Record(h.fixedCtx, value, h.option) +} diff --git a/internal/metrics/otelmetrics/timer.go b/internal/metrics/otelmetrics/timer.go new file mode 100644 index 00000000000..0df689243b3 --- /dev/null +++ b/internal/metrics/otelmetrics/timer.go @@ -0,0 +1,21 @@ +// Copyright (c) 2024 The Jaeger Authors. +// SPDX-License-Identifier: Apache-2.0 + +package otelmetrics + +import ( + "context" + "time" + + "go.opentelemetry.io/otel/metric" +) + +type otelTimer struct { + histogram metric.Float64Histogram + fixedCtx context.Context + option metric.RecordOption +} + +func (t *otelTimer) Record(d time.Duration) { + t.histogram.Record(t.fixedCtx, d.Seconds(), t.option) +}