Skip to content

Commit

Permalink
util: expose otel metrics through grpc and prometheus
Browse files Browse the repository at this point in the history
This exposes the otel metrics through the environment variables for the
otel collector or through the prometheus `/metrics` endpoint through the
debug address.

Signed-off-by: Jonathan A. Sternberg <[email protected]>
  • Loading branch information
jsternberg committed Dec 6, 2023
1 parent 75fcbfe commit f025d9c
Show file tree
Hide file tree
Showing 117 changed files with 16,617 additions and 900 deletions.
2 changes: 1 addition & 1 deletion cmd/buildctl/common/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ func ResolveClient(c *cli.Context) (*client.Client, error) {
if span := trace.SpanFromContext(ctx); span.SpanContext().IsValid() {
opts = append(opts, client.WithTracerProvider(span.TracerProvider()))

exp, err := detect.Exporter()
exp, _, err := detect.Exporter()
if err != nil {
return nil, err
}
Expand Down
3 changes: 3 additions & 0 deletions cmd/buildkitd/debug.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"time"

"github.com/moby/buildkit/util/bklog"
"github.com/prometheus/client_golang/prometheus/promhttp"
"golang.org/x/net/trace"
)

Expand All @@ -28,6 +29,8 @@ func setupDebugHandlers(addr string) error {
bklog.G(req.Context()).Debugf("triggered GC from debug endpoint")
}))

m.Handle("/metrics", promhttp.Handler())

// setting debugaddr is opt-in. permission is defined by listener address
trace.AuthRequest = func(_ *http.Request) (bool, bool) {
return true, true
Expand Down
2 changes: 1 addition & 1 deletion cmd/buildkitd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -703,7 +703,7 @@ func newController(c *cli.Context, cfg *config.Config) (*control.Controller, err
return nil, err
}

tc, err := detect.Exporter()
tc, _, err := detect.Exporter()
if err != nil {
return nil, err
}
Expand Down
13 changes: 9 additions & 4 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ require (
github.com/pelletier/go-toml v1.9.5
github.com/pkg/errors v0.9.1
github.com/pkg/profile v1.5.0
github.com/prometheus/procfs v0.9.0
github.com/prometheus/procfs v0.10.1
github.com/serialx/hashring v0.0.0-20190422032157-8b2912629002
github.com/sirupsen/logrus v1.9.3
github.com/spdx/tools-golang v0.5.1
Expand All @@ -76,10 +76,14 @@ require (
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0
go.opentelemetry.io/otel v1.19.0
go.opentelemetry.io/otel/exporters/jaeger v1.17.0
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.42.0
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v0.42.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.19.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.19.0
go.opentelemetry.io/otel/exporters/prometheus v0.42.0
go.opentelemetry.io/otel/sdk v1.19.0
go.opentelemetry.io/otel/sdk/metric v1.19.0
go.opentelemetry.io/otel/trace v1.19.0
go.opentelemetry.io/proto/otlp v1.0.0
golang.org/x/crypto v0.14.0
Expand Down Expand Up @@ -146,15 +150,16 @@ require (
github.com/opencontainers/runc v1.1.10 // indirect
github.com/pkg/browser v0.0.0-20210115035449-ce105d075bb4 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/client_golang v1.14.0 // indirect
github.com/prometheus/client_model v0.3.0 // indirect
github.com/prometheus/client_golang v1.16.0
github.com/prometheus/client_model v0.4.0 // indirect
github.com/prometheus/common v0.42.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/secure-systems-lab/go-securesystemslib v0.4.0 // indirect
github.com/shibumi/go-pathspec v1.3.0 // indirect
github.com/vbatts/tar-split v0.11.3 // indirect
go.opencensus.io v0.24.0 // indirect
go.opentelemetry.io/otel/metric v1.19.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlpmetric v0.42.0 // indirect
go.opentelemetry.io/otel/metric v1.19.0
golang.org/x/mod v0.11.0 // indirect
golang.org/x/text v0.13.0 // indirect
golang.org/x/tools v0.10.0 // indirect
Expand Down
22 changes: 16 additions & 6 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1042,16 +1042,16 @@ github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDf
github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=
github.com/prometheus/client_golang v1.1.0/go.mod h1:I1FGZT9+L76gKKOs5djB6ezCbFQP1xR9D75/vuwEF3g=
github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M=
github.com/prometheus/client_golang v1.14.0 h1:nJdhIvne2eSX/XRAFV9PcvFFRbrjbcTUj0VP62TMhnw=
github.com/prometheus/client_golang v1.14.0/go.mod h1:8vpkKitgIVNcqrRBWh1C4TIUQgYNtG/XQE4E/Zae36Y=
github.com/prometheus/client_golang v1.16.0 h1:yk/hx9hDbrGHovbci4BY+pRMfSuuat626eFsHb7tmT8=
github.com/prometheus/client_golang v1.16.0/go.mod h1:Zsulrv/L9oM40tJ7T815tM89lFEugiJ9HzIqaAx4LKc=
github.com/prometheus/client_model v0.0.0-20171117100541-99fa1f4be8e5/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20190115171406-56726106282f/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.3.0 h1:UBgGFHqYdG/TPFD1B1ogZywDqEkwp3fBMvqdiQ7Xew4=
github.com/prometheus/client_model v0.3.0/go.mod h1:LDGWKZIo7rky3hgvBe+caln+Dr3dPggB5dvjtD7w9+w=
github.com/prometheus/client_model v0.4.0 h1:5lQXD3cAg1OXBf4Wq03gTrXHeaV0TQvGfUooCfx1yqY=
github.com/prometheus/client_model v0.4.0/go.mod h1:oMQmHW1/JoDwqLtg57MGgP/Fb1CJEYF2imWWhWtMkYU=
github.com/prometheus/common v0.0.0-20180110214958-89604d197083/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro=
github.com/prometheus/common v0.0.0-20180801064454-c7de2306084e/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro=
github.com/prometheus/common v0.0.0-20181113130724-41aa239b4cce/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro=
Expand All @@ -1075,8 +1075,8 @@ github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+Gx
github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU=
github.com/prometheus/procfs v0.2.0/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU=
github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA=
github.com/prometheus/procfs v0.9.0 h1:wzCHvIvM5SxWqYvwgVL7yJY8Lz3PKn49KQtpgMYJfhI=
github.com/prometheus/procfs v0.9.0/go.mod h1:+pB4zwohETzFnmlpe6yd2lSc+0/46IYZRB/chUwxUZY=
github.com/prometheus/procfs v0.10.1 h1:kYK1Va/YMlutzCGazswoHKo//tZVlFpKYh+PymziUAg=
github.com/prometheus/procfs v0.10.1/go.mod h1:nwNm2aOCAYw8uTR/9bWRREkZFxAUcWzPHWJq+XBB/FM=
github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU=
github.com/quasilyte/go-consistent v0.0.0-20190521200055-c6f3937de18c/go.mod h1:5STLWrekHfjyYwxBRVRXNOSewLJ3PWfDJd1VyTS21fI=
github.com/quasilyte/go-ruleguard v0.1.2-0.20200318202121-b00d7a75d3d8/go.mod h1:CGFX09Ci3pq9QZdj86B+VGIdNj4VyCo2iPOGS9esB/k=
Expand Down Expand Up @@ -1286,16 +1286,26 @@ go.opentelemetry.io/otel v1.19.0 h1:MuS/TNf4/j4IXsZuJegVzI1cwut7Qc00344rgH7p8bs=
go.opentelemetry.io/otel v1.19.0/go.mod h1:i0QyjOq3UPoTzff0PJB2N66fb4S0+rSbSB15/oyH9fY=
go.opentelemetry.io/otel/exporters/jaeger v1.17.0 h1:D7UpUy2Xc2wsi1Ras6V40q806WM07rqoCWzXu7Sqy+4=
go.opentelemetry.io/otel/exporters/jaeger v1.17.0/go.mod h1:nPCqOnEH9rNLKqH/+rrUjiMzHJdV1BlpKcTwRTyKkKI=
go.opentelemetry.io/otel/exporters/otlp/otlpmetric v0.42.0 h1:ZtfnDL+tUrs1F0Pzfwbg2d59Gru9NCH3bgSHBM6LDwU=
go.opentelemetry.io/otel/exporters/otlp/otlpmetric v0.42.0/go.mod h1:hG4Fj/y8TR/tlEDREo8tWstl9fO9gcFkn4xrx0Io8xU=
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.42.0 h1:NmnYCiR0qNufkldjVvyQfZTHSdzeHoZ41zggMsdMcLM=
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.42.0/go.mod h1:UVAO61+umUsHLtYb8KXXRoHtxUkdOPkYidzW3gipRLQ=
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v0.42.0 h1:wNMDy/LVGLj2h3p6zg4d0gypKfWKSWI14E1C4smOgl8=
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v0.42.0/go.mod h1:YfbDdXAAkemWJK3H/DshvlrxqFB2rtW4rY6ky/3x/H0=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0 h1:Mne5On7VWdx7omSrSSZvM4Kw7cS7NQkOOmLcgscI51U=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0/go.mod h1:IPtUMKL4O3tH5y+iXVyAXqpAwMuzC1IrxVS81rummfE=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.19.0 h1:3d+S281UTjM+AbF31XSOYn1qXn3BgIdWl8HNEpx08Jk=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.19.0/go.mod h1:0+KuTDyKL4gjKCF75pHOX4wuzYDUZYfAQdSu43o+Z2I=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.19.0 h1:IeMeyr1aBvBiPVYihXIaeIZba6b8E1bYp7lbdxK8CQg=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.19.0/go.mod h1:oVdCUtjq9MK9BlS7TtucsQwUcXcymNiEDjgDD2jMtZU=
go.opentelemetry.io/otel/exporters/prometheus v0.42.0 h1:jwV9iQdvp38fxXi8ZC+lNpxjK16MRcZlpDYvbuO1FiA=
go.opentelemetry.io/otel/exporters/prometheus v0.42.0/go.mod h1:f3bYiqNqhoPxkvI2LrXqQVC546K7BuRDL/kKuxkujhA=
go.opentelemetry.io/otel/metric v1.19.0 h1:aTzpGtV0ar9wlV4Sna9sdJyII5jTVJEvKETPiOKwvpE=
go.opentelemetry.io/otel/metric v1.19.0/go.mod h1:L5rUsV9kM1IxCj1MmSdS+JQAcVm319EUrDVLrt7jqt8=
go.opentelemetry.io/otel/sdk v1.19.0 h1:6USY6zH+L8uMH8L3t1enZPR3WFEmSTADlqldyHtJi3o=
go.opentelemetry.io/otel/sdk v1.19.0/go.mod h1:NedEbbS4w3C6zElbLdPJKOpJQOrGUJ+GfzpjUvI0v1A=
go.opentelemetry.io/otel/sdk/metric v1.19.0 h1:EJoTO5qysMsYCa+w4UghwFV/ptQgqSL/8Ni+hx+8i1k=
go.opentelemetry.io/otel/sdk/metric v1.19.0/go.mod h1:XjG0jQyFJrv2PbMvwND7LwCEhsJzCzV5210euduKcKY=
go.opentelemetry.io/otel/trace v1.19.0 h1:DFVQmlVbfVeOuBRrwdtaehRrWiL1JoVs9CPIQ1Dzxpg=
go.opentelemetry.io/otel/trace v1.19.0/go.mod h1:mfaSyvGyEJEI0nyV2I4qhNQnbBOUUmYZpYojqMnX2vo=
go.opentelemetry.io/proto/otlp v1.0.0 h1:T0TX0tmXU8a3CbNXzEKGeU5mIVOdf0oykP+u2lIVU/I=
Expand Down
2 changes: 1 addition & 1 deletion solver/llbsolver/solver.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ func (s *Solver) recordBuildHistory(ctx context.Context, id string, req frontend
var stopTrace func() []tracetest.SpanStub

if s := trace.SpanFromContext(ctx); s.SpanContext().IsValid() {
if exp, err := detect.Exporter(); err == nil {
if exp, _, err := detect.Exporter(); err == nil {
if rec, ok := exp.(*detect.TraceRecorder); ok {
stopTrace = rec.Record(s.SpanContext().TraceID())
}
Expand Down
5 changes: 3 additions & 2 deletions util/tracing/detect/delegated/delegated.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"sync"

"github.com/moby/buildkit/util/tracing/detect"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
)

Expand All @@ -13,8 +14,8 @@ const maxBuffer = 256
var exp = &Exporter{}

func init() {
detect.Register("delegated", func() (sdktrace.SpanExporter, error) {
return exp, nil
detect.Register("delegated", func() (sdktrace.SpanExporter, sdkmetric.Exporter, error) {
return exp, nil, nil
}, 100)
}

Expand Down
2 changes: 1 addition & 1 deletion util/tracing/detect/delegated/delegated_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import (
)

func TestDetectPreservesDelegateInterface(t *testing.T) {
exp, err := detect.Exporter()
exp, _, err := detect.Exporter()
require.NoError(t, err)

_, ok := exp.(client.TracerDelegate)
Expand Down
133 changes: 96 additions & 37 deletions util/tracing/detect/detect.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,16 @@ import (

"github.com/moby/buildkit/util/bklog"
"github.com/pkg/errors"
"go.opentelemetry.io/otel/exporters/prometheus"
"go.opentelemetry.io/otel/metric"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
"go.opentelemetry.io/otel/sdk/resource"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
semconv "go.opentelemetry.io/otel/semconv/v1.21.0"
"go.opentelemetry.io/otel/trace"
)

type ExporterDetector func() (sdktrace.SpanExporter, error)
type ExporterDetector func() (sdktrace.SpanExporter, sdkmetric.Exporter, error)

type detector struct {
f ExporterDetector
Expand All @@ -29,7 +32,11 @@ var Recorder *TraceRecorder
var detectors map[string]detector
var once sync.Once
var tp trace.TracerProvider
var exporter sdktrace.SpanExporter
var mp metric.MeterProvider
var exporter struct {
SpanExporter sdktrace.SpanExporter
MetricExporter sdkmetric.Exporter
}
var closers []func(context.Context) error
var err error

Expand All @@ -43,14 +50,14 @@ func Register(name string, exp ExporterDetector, priority int) {
}
}

func detectExporter() (sdktrace.SpanExporter, error) {
func detectExporter() (texp sdktrace.SpanExporter, mexp sdkmetric.Exporter, err error) {
if n := os.Getenv("OTEL_TRACES_EXPORTER"); n != "" {
d, ok := detectors[n]
if !ok {
if n == "none" {
return nil, nil
return nil, nil, nil
}
return nil, errors.Errorf("unsupported opentelemetry tracer %v", n)
return nil, nil, errors.Errorf("unsupported opentelemetry tracer %v", n)
}
return d.f()
}
Expand All @@ -61,42 +68,50 @@ func detectExporter() (sdktrace.SpanExporter, error) {
sort.Slice(arr, func(i, j int) bool {
return arr[i].priority < arr[j].priority
})

for _, d := range arr {
exp, err := d.f()
t, m, err := d.f()
if err != nil {
return nil, err
return nil, nil, err
}
if texp == nil {
texp = t
}
if exp != nil {
return exp, nil
if mexp == nil {
mexp = m
}

// Found a candidate for both exporters so just return now.
if texp != nil && mexp != nil {
return texp, mexp, nil
}
}
return nil, nil
return texp, mexp, nil
}

func getExporter() (sdktrace.SpanExporter, error) {
exp, err := detectExporter()
func getExporters() (sdktrace.SpanExporter, sdkmetric.Exporter, error) {
texp, mexp, err := detectExporter()
if err != nil {
return nil, err
return nil, nil, err
}

if Recorder != nil {
Recorder.SpanExporter = exp
exp = Recorder
Recorder.SpanExporter = texp
texp = Recorder
}
return exp, nil

return texp, mexp, nil
}

func detect() error {
tp = trace.NewNoopTracerProvider()
mp = sdkmetric.NewMeterProvider()

exp, err := getExporter()
if err != nil || exp == nil {
texp, mexp, err := getExporters()
if err != nil || (texp == nil && mexp == nil) {
return err
}

// enable log with traceID when valid exporter
bklog.EnableLogWithTraceID(true)

res, err := resource.Detect(context.Background(), serviceNameDetector{})
if err != nil {
return err
Expand All @@ -106,39 +121,83 @@ func detect() error {
return err
}

sp := sdktrace.NewBatchSpanProcessor(exp)
// enable log with traceID when valid exporter
if texp != nil {
bklog.EnableLogWithTraceID(true)

if Recorder != nil {
Recorder.flush = sp.ForceFlush
sp := sdktrace.NewBatchSpanProcessor(texp)

if Recorder != nil {
Recorder.flush = sp.ForceFlush
}

sdktp := sdktrace.NewTracerProvider(sdktrace.WithSpanProcessor(sp), sdktrace.WithResource(res))
closers = append(closers, sdktp.Shutdown)

exporter.SpanExporter = texp
tp = sdktp
}

var readers []sdkmetric.Reader
if mexp != nil {
// Create a new periodic reader using any configured metric exporter.
readers = append(readers, sdkmetric.NewPeriodicReader(mexp))
}

sdktp := sdktrace.NewTracerProvider(sdktrace.WithSpanProcessor(sp), sdktrace.WithResource(res))
closers = append(closers, sdktp.Shutdown)
if r, err := prometheus.New(); err == nil {
// Register the prometheus reader if there was no error.
// Don't fail if there was an error.
// TODO(jsternberg): Maybe log the error here?
readers = append(readers, r)
}

exporter = exp
tp = sdktp
if len(readers) > 0 {
opts := make([]sdkmetric.Option, 0, len(readers)+1)
opts = append(opts, sdkmetric.WithResource(res))
for _, r := range readers {
opts = append(opts, sdkmetric.WithReader(r))
}
sdkmp := sdkmetric.NewMeterProvider(opts...)
closers = append(closers, sdkmp.Shutdown)

exporter.MetricExporter = mexp
mp = sdkmp
}
return nil
}

func TracerProvider() (trace.TracerProvider, error) {
if err := detectOnce(); err != nil {
return nil, err
}
return tp, nil
}

func MeterProvider() (metric.MeterProvider, error) {
if err := detectOnce(); err != nil {
return nil, err
}
return mp, nil
}

func detectOnce() error {
once.Do(func() {
if err1 := detect(); err1 != nil {
err = err1
b, _ := strconv.ParseBool(os.Getenv("OTEL_IGNORE_ERROR"))
if !b {
err = err1
}
}
})
b, _ := strconv.ParseBool(os.Getenv("OTEL_IGNORE_ERROR"))
if err != nil && !b {
return nil, err
}
return tp, nil
return err
}

func Exporter() (sdktrace.SpanExporter, error) {
func Exporter() (sdktrace.SpanExporter, sdkmetric.Exporter, error) {
_, err := TracerProvider()
if err != nil {
return nil, err
return nil, nil, err
}
return exporter, nil
return exporter.SpanExporter, exporter.MetricExporter, nil
}

func Shutdown(ctx context.Context) error {
Expand Down
Loading

0 comments on commit f025d9c

Please sign in to comment.