From 73c34f98da4ef2c5ae9fcb1d9d4bb2d43c1d453f Mon Sep 17 00:00:00 2001 From: Sukun Date: Mon, 5 Jun 2023 18:11:26 +0530 Subject: [PATCH] holepunch: add metrics (#2246) * holepunch: add metrics * don't track addresses * add cancelled and no_suitable_address state * separate holepunch dashbords by transport * fix dashboard fields * add instance filter to dashboard * initialise metrics labels with 0 * fix panel names in dashboard Co-authored-by: Marten Seemann --------- Co-authored-by: Marten Seemann --- dashboards/holepunch/holepunch.json | 1126 +++++++++++++++++ go.mod | 2 +- p2p/host/basic/basic_host.go | 6 + p2p/metricshelper/conn.go | 29 + p2p/net/swarm/swarm_metrics.go | 32 +- p2p/protocol/holepunch/holepuncher.go | 44 +- p2p/protocol/holepunch/metrics.go | 187 +++ .../holepunch/metrics_noalloc_test.go | 49 + p2p/protocol/holepunch/metrics_test.go | 102 ++ p2p/protocol/holepunch/svc.go | 28 +- p2p/protocol/holepunch/tracer.go | 254 ++-- p2p/protocol/holepunch/util.go | 9 + 12 files changed, 1701 insertions(+), 167 deletions(-) create mode 100644 dashboards/holepunch/holepunch.json create mode 100644 p2p/metricshelper/conn.go create mode 100644 p2p/protocol/holepunch/metrics.go create mode 100644 p2p/protocol/holepunch/metrics_noalloc_test.go create mode 100644 p2p/protocol/holepunch/metrics_test.go diff --git a/dashboards/holepunch/holepunch.json b/dashboards/holepunch/holepunch.json new file mode 100644 index 0000000000..8e8e8974fc --- /dev/null +++ b/dashboards/holepunch/holepunch.json @@ -0,0 +1,1126 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "9.3.6" + }, + { + "type": "panel", + "id": "piechart", + "name": "Pie chart", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 8, + "panels": [], + "title": "DCUtR Initiator", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 19, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (outcome) (increase(libp2p_holepunch_address_outcomes_total {side=\"initiator\", transport=\"tcp\", outcome=~\"success|failed\"}[$__range]))", + "legendFormat": "{{outcome}}", + "range": true, + "refId": "A" + } + ], + "title": "Hole punches: TCP", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 20, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (outcome) (increase(libp2p_holepunch_address_outcomes_total {side=\"initiator\", transport=~\"quic|quic-v1\", outcome=~\"success|failed\"}[$__range]))", + "legendFormat": "{{outcome}}", + "range": true, + "refId": "A" + } + ], + "title": "Hole punches: QUIC", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "no_suitable_address" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 6, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (outcome) (increase(libp2p_holepunch_outcomes_total{side=\"initiator\",instance=~\"$instance\"}[$__range]))", + "legendFormat": "{{outcome}}", + "range": true, + "refId": "A" + } + ], + "title": "Hole punches: Total", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "increase(libp2p_holepunch_direct_dials_total{instance=~\"$instance\"}[$__rate_interval])", + "legendFormat": "{{outcome}}", + "range": true, + "refId": "A" + } + ], + "title": "Direct dials", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "no_suitable_address" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 17 + }, + "id": 23, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (outcome) (libp2p_holepunch_outcomes_total{side=\"initiator\",instance=~\"$instance\"}) - (sum by (outcome)(libp2p_holepunch_outcomes_total{side=\"initiator\",instance=~\"$instance\"} offset $__interval) or vector(0))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Hole punches", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 25 + }, + "id": 14, + "panels": [], + "title": "DCUtR Receiver", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 26 + }, + "id": 18, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (outcome) (increase(libp2p_holepunch_address_outcomes_total {side=\"receiver\", transport=~\"quic|quic-v1\", outcome=~\"success|failed\"}[$__range]))", + "legendFormat": "{{outcome}}", + "range": true, + "refId": "A" + } + ], + "title": "Hole punches: QUIC ", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 26 + }, + "id": 21, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (outcome) (increase(libp2p_holepunch_address_outcomes_total {side=\"receiver\", transport=\"tcp\", outcome=~\"success|failed\"}[$__range]))", + "legendFormat": "{{outcome}}", + "range": true, + "refId": "A" + } + ], + "title": "Hole punches: TCP", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "no_suitable_address" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-blue", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 34 + }, + "id": 17, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (outcome) (increase(libp2p_holepunch_outcomes_total{side=\"receiver\",instance=~\"$instance\"}[$__range]))", + "legendFormat": "{{outcome}}", + "range": true, + "refId": "A" + } + ], + "title": "Hole punches: Total", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "no_suitable_address" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 34 + }, + "id": 24, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "(sum by (outcome) (libp2p_holepunch_outcomes_total{side=\"receiver\",instance=~\"$instance\"})) - (sum by (outcome)(libp2p_holepunch_outcomes_total{side=\"receiver\",instance=~\"$instance\"} offset $__interval) or vector(0))", + "legendFormat": "{{outcome}}", + "range": true, + "refId": "A" + } + ], + "title": "Hole punches", + "type": "timeseries" + } + ], + "refresh": "", + "revision": 1, + "schemaVersion": 37, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": {}, + "definition": "label_values(instance)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "instance", + "options": [], + "query": { + "query": "label_values(instance)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "libp2p Hole Punches", + "uid": "Ao24vOBVk", + "version": 6, + "weekStart": "" +} \ No newline at end of file diff --git a/go.mod b/go.mod index de0f135d3f..164c06cce8 100644 --- a/go.mod +++ b/go.mod @@ -46,6 +46,7 @@ require ( github.com/multiformats/go-varint v0.0.7 github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 github.com/prometheus/client_golang v1.14.0 + github.com/prometheus/client_model v0.3.0 github.com/quic-go/quic-go v0.33.0 github.com/quic-go/webtransport-go v0.5.3 github.com/raulk/go-watchdog v1.3.0 @@ -98,7 +99,6 @@ require ( github.com/opencontainers/runtime-spec v1.0.2 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/prometheus/client_model v0.3.0 // indirect github.com/prometheus/common v0.42.0 // indirect github.com/prometheus/procfs v0.9.0 // indirect github.com/quic-go/qpack v0.4.0 // indirect diff --git a/p2p/host/basic/basic_host.go b/p2p/host/basic/basic_host.go index d0a6c9cba9..431137911e 100644 --- a/p2p/host/basic/basic_host.go +++ b/p2p/host/basic/basic_host.go @@ -251,6 +251,12 @@ func NewHost(n network.Network, opts *HostOpts) (*BasicHost, error) { } if opts.EnableHolePunching { + if opts.EnableMetrics { + hpOpts := []holepunch.Option{ + holepunch.WithMetricsTracer(holepunch.NewMetricsTracer(holepunch.WithRegisterer(opts.PrometheusRegisterer)))} + opts.HolePunchingOptions = append(hpOpts, opts.HolePunchingOptions...) + + } h.hps, err = holepunch.NewService(h, h.ids, opts.HolePunchingOptions...) if err != nil { return nil, fmt.Errorf("failed to create hole punch service: %w", err) diff --git a/p2p/metricshelper/conn.go b/p2p/metricshelper/conn.go new file mode 100644 index 0000000000..ef367ac9b1 --- /dev/null +++ b/p2p/metricshelper/conn.go @@ -0,0 +1,29 @@ +package metricshelper + +import ma "github.com/multiformats/go-multiaddr" + +var transports = [...]int{ma.P_CIRCUIT, ma.P_WEBRTC, ma.P_WEBTRANSPORT, ma.P_QUIC, ma.P_QUIC_V1, ma.P_WSS, ma.P_WS, ma.P_TCP} + +func GetTransport(a ma.Multiaddr) string { + for _, t := range transports { + if _, err := a.ValueForProtocol(t); err == nil { + return ma.ProtocolWithCode(t).Name + } + } + return "other" +} + +func GetIPVersion(addr ma.Multiaddr) string { + version := "unknown" + ma.ForEach(addr, func(c ma.Component) bool { + if c.Protocol().Code == ma.P_IP4 { + version = "ip4" + return false + } else if c.Protocol().Code == ma.P_IP6 { + version = "ip6" + return false + } + return true + }) + return version +} diff --git a/p2p/net/swarm/swarm_metrics.go b/p2p/net/swarm/swarm_metrics.go index 992cc07bc0..3110217f81 100644 --- a/p2p/net/swarm/swarm_metrics.go +++ b/p2p/net/swarm/swarm_metrics.go @@ -153,28 +153,13 @@ func appendConnectionState(tags []string, cs network.ConnectionState) []string { return tags } -func getIPVersion(addr ma.Multiaddr) string { - version := "unknown" - ma.ForEach(addr, func(c ma.Component) bool { - if c.Protocol().Code == ma.P_IP4 { - version = "ip4" - return false - } else if c.Protocol().Code == ma.P_IP6 { - version = "ip6" - return false - } - return true - }) - return version -} - func (m *metricsTracer) OpenedConnection(dir network.Direction, p crypto.PubKey, cs network.ConnectionState, laddr ma.Multiaddr) { tags := metricshelper.GetStringSlice() defer metricshelper.PutStringSlice(tags) *tags = append(*tags, metricshelper.GetDirection(dir)) *tags = appendConnectionState(*tags, cs) - *tags = append(*tags, getIPVersion(laddr)) + *tags = append(*tags, metricshelper.GetIPVersion(laddr)) connsOpened.WithLabelValues(*tags...).Inc() *tags = (*tags)[:0] @@ -189,7 +174,7 @@ func (m *metricsTracer) ClosedConnection(dir network.Direction, duration time.Du *tags = append(*tags, metricshelper.GetDirection(dir)) *tags = appendConnectionState(*tags, cs) - *tags = append(*tags, getIPVersion(laddr)) + *tags = append(*tags, metricshelper.GetIPVersion(laddr)) connsClosed.WithLabelValues(*tags...).Inc() connDuration.WithLabelValues(*tags...).Observe(duration.Seconds()) } @@ -199,19 +184,12 @@ func (m *metricsTracer) CompletedHandshake(t time.Duration, cs network.Connectio defer metricshelper.PutStringSlice(tags) *tags = appendConnectionState(*tags, cs) - *tags = append(*tags, getIPVersion(laddr)) + *tags = append(*tags, metricshelper.GetIPVersion(laddr)) connHandshakeLatency.WithLabelValues(*tags...).Observe(t.Seconds()) } -var transports = [...]int{ma.P_CIRCUIT, ma.P_WEBRTC, ma.P_WEBTRANSPORT, ma.P_QUIC, ma.P_QUIC_V1, ma.P_WSS, ma.P_WS, ma.P_TCP} - func (m *metricsTracer) FailedDialing(addr ma.Multiaddr, err error) { - var transport string - for _, t := range transports { - if _, err := addr.ValueForProtocol(t); err == nil { - transport = ma.ProtocolWithCode(t).Name - } - } + transport := metricshelper.GetTransport(addr) e := "other" if errors.Is(err, context.Canceled) { e = "canceled" @@ -230,7 +208,7 @@ func (m *metricsTracer) FailedDialing(addr ma.Multiaddr, err error) { defer metricshelper.PutStringSlice(tags) *tags = append(*tags, transport, e) - *tags = append(*tags, getIPVersion(addr)) + *tags = append(*tags, metricshelper.GetIPVersion(addr)) dialError.WithLabelValues(*tags...).Inc() } diff --git a/p2p/protocol/holepunch/holepuncher.go b/p2p/protocol/holepunch/holepuncher.go index 49c39f5845..b651bd7822 100644 --- a/p2p/protocol/holepunch/holepuncher.go +++ b/p2p/protocol/holepunch/holepuncher.go @@ -101,10 +101,8 @@ func (hp *holePuncher) DirectConnect(p peer.ID) error { func (hp *holePuncher) directConnect(rp peer.ID) error { // short-circuit check to see if we already have a direct connection - for _, c := range hp.host.Network().ConnsToPeer(rp) { - if !isRelayAddress(c.RemoteMultiaddr()) { - return nil - } + if getDirectConnection(hp.host, rp) != nil { + return nil } // short-circuit hole punching if a direct dial works. @@ -133,8 +131,8 @@ func (hp *holePuncher) directConnect(rp peer.ID) error { log.Debugw("got inbound proxy conn", "peer", rp) // hole punch - for i := 0; i < maxRetries; i++ { - addrs, rtt, err := hp.initiateHolePunch(rp) + for i := 1; i <= maxRetries; i++ { + addrs, obsAddrs, rtt, err := hp.initiateHolePunch(rp) if err != nil { log.Debugw("hole punching failed", "peer", rp, "error", err) hp.tracer.ProtocolError(rp, err) @@ -159,44 +157,48 @@ func (hp *holePuncher) directConnect(rp peer.ID) error { hp.tracer.EndHolePunch(rp, dt, err) if err == nil { log.Debugw("hole punching with successful", "peer", rp, "time", dt) + hp.tracer.HolePunchFinished("initiator", i, addrs, obsAddrs, getDirectConnection(hp.host, rp)) return nil } case <-hp.ctx.Done(): timer.Stop() return hp.ctx.Err() } + if i == maxRetries { + hp.tracer.HolePunchFinished("initiator", maxRetries, addrs, obsAddrs, nil) + } } return fmt.Errorf("all retries for hole punch with peer %s failed", rp) } // initiateHolePunch opens a new hole punching coordination stream, // exchanges the addresses and measures the RTT. -func (hp *holePuncher) initiateHolePunch(rp peer.ID) ([]ma.Multiaddr, time.Duration, error) { +func (hp *holePuncher) initiateHolePunch(rp peer.ID) ([]ma.Multiaddr, []ma.Multiaddr, time.Duration, error) { hpCtx := network.WithUseTransient(hp.ctx, "hole-punch") sCtx := network.WithNoDial(hpCtx, "hole-punch") str, err := hp.host.NewStream(sCtx, rp, Protocol) if err != nil { - return nil, 0, fmt.Errorf("failed to open hole-punching stream: %w", err) + return nil, nil, 0, fmt.Errorf("failed to open hole-punching stream: %w", err) } defer str.Close() - addr, rtt, err := hp.initiateHolePunchImpl(str) + addr, obsAddr, rtt, err := hp.initiateHolePunchImpl(str) if err != nil { log.Debugf("%s", err) str.Reset() - return addr, rtt, err + return addr, obsAddr, rtt, err } - return addr, rtt, err + return addr, obsAddr, rtt, err } -func (hp *holePuncher) initiateHolePunchImpl(str network.Stream) ([]ma.Multiaddr, time.Duration, error) { +func (hp *holePuncher) initiateHolePunchImpl(str network.Stream) ([]ma.Multiaddr, []ma.Multiaddr, time.Duration, error) { if err := str.Scope().SetService(ServiceName); err != nil { - return nil, 0, fmt.Errorf("error attaching stream to holepunch service: %s", err) + return nil, nil, 0, fmt.Errorf("error attaching stream to holepunch service: %s", err) } if err := str.Scope().ReserveMemory(maxMsgSize, network.ReservationPriorityAlways); err != nil { - return nil, 0, fmt.Errorf("error reserving memory for stream: %s", err) + return nil, nil, 0, fmt.Errorf("error reserving memory for stream: %s", err) } defer str.Scope().ReleaseMemory(maxMsgSize) @@ -211,7 +213,7 @@ func (hp *holePuncher) initiateHolePunchImpl(str network.Stream) ([]ma.Multiaddr obsAddrs = hp.filter.FilterLocal(str.Conn().RemotePeer(), obsAddrs) } if len(obsAddrs) == 0 { - return nil, 0, errors.New("aborting hole punch initiation as we have no public address") + return nil, nil, 0, errors.New("aborting hole punch initiation as we have no public address") } start := time.Now() @@ -220,17 +222,17 @@ func (hp *holePuncher) initiateHolePunchImpl(str network.Stream) ([]ma.Multiaddr ObsAddrs: addrsToBytes(obsAddrs), }); err != nil { str.Reset() - return nil, 0, err + return nil, nil, 0, err } // wait for a CONNECT message from the remote peer var msg pb.HolePunch if err := rd.ReadMsg(&msg); err != nil { - return nil, 0, fmt.Errorf("failed to read CONNECT message from remote peer: %w", err) + return nil, nil, 0, fmt.Errorf("failed to read CONNECT message from remote peer: %w", err) } rtt := time.Since(start) if t := msg.GetType(); t != pb.HolePunch_CONNECT { - return nil, 0, fmt.Errorf("expect CONNECT message, got %s", t) + return nil, nil, 0, fmt.Errorf("expect CONNECT message, got %s", t) } addrs := removeRelayAddrs(addrsFromBytes(msg.ObsAddrs)) @@ -239,13 +241,13 @@ func (hp *holePuncher) initiateHolePunchImpl(str network.Stream) ([]ma.Multiaddr } if len(addrs) == 0 { - return nil, 0, errors.New("didn't receive any public addresses in CONNECT") + return nil, nil, 0, errors.New("didn't receive any public addresses in CONNECT") } if err := w.WriteMsg(&pb.HolePunch{Type: pb.HolePunch_SYNC.Enum()}); err != nil { - return nil, 0, fmt.Errorf("failed to send SYNC message for hole punching: %w", err) + return nil, nil, 0, fmt.Errorf("failed to send SYNC message for hole punching: %w", err) } - return addrs, rtt, nil + return addrs, obsAddrs, rtt, nil } func (hp *holePuncher) Close() error { diff --git a/p2p/protocol/holepunch/metrics.go b/p2p/protocol/holepunch/metrics.go new file mode 100644 index 0000000000..92ed20b14d --- /dev/null +++ b/p2p/protocol/holepunch/metrics.go @@ -0,0 +1,187 @@ +package holepunch + +import ( + "github.com/libp2p/go-libp2p/core/network" + "github.com/libp2p/go-libp2p/p2p/metricshelper" + ma "github.com/multiformats/go-multiaddr" + "github.com/prometheus/client_golang/prometheus" +) + +const metricNamespace = "libp2p_holepunch" + +var ( + directDialsTotal = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: metricNamespace, + Name: "direct_dials_total", + Help: "Direct Dials Total", + }, + []string{"outcome"}, + ) + hpAddressOutcomesTotal = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: metricNamespace, + Name: "address_outcomes_total", + Help: "Hole Punch outcomes by Transport", + }, + []string{"side", "num_attempts", "ipv", "transport", "outcome"}, + ) + hpOutcomesTotal = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: metricNamespace, + Name: "outcomes_total", + Help: "Hole Punch outcomes overall", + }, + []string{"side", "num_attempts", "outcome"}, + ) + + collectors = []prometheus.Collector{ + directDialsTotal, + hpAddressOutcomesTotal, + hpOutcomesTotal, + } +) + +type MetricsTracer interface { + HolePunchFinished(side string, attemptNum int, theirAddrs []ma.Multiaddr, ourAddr []ma.Multiaddr, directConn network.ConnMultiaddrs) + DirectDialFinished(success bool) +} + +type metricsTracer struct{} + +var _ MetricsTracer = &metricsTracer{} + +type metricsTracerSetting struct { + reg prometheus.Registerer +} + +type MetricsTracerOption func(*metricsTracerSetting) + +func WithRegisterer(reg prometheus.Registerer) MetricsTracerOption { + return func(s *metricsTracerSetting) { + if reg != nil { + s.reg = reg + } + } +} + +func NewMetricsTracer(opts ...MetricsTracerOption) MetricsTracer { + setting := &metricsTracerSetting{reg: prometheus.DefaultRegisterer} + for _, opt := range opts { + opt(setting) + } + metricshelper.RegisterCollectors(setting.reg, collectors...) + // initialise metrics's labels so that the first data point is handled correctly + for _, side := range []string{"initiator", "receiver"} { + for _, numAttempts := range []string{"1", "2", "3", "4"} { + for _, outcome := range []string{"success", "failed", "cancelled", "no_suitable_address"} { + for _, ipv := range []string{"ip4", "ip6"} { + for _, transport := range []string{"quic", "quic-v1", "tcp", "webtransport"} { + hpAddressOutcomesTotal.WithLabelValues(side, numAttempts, ipv, transport, outcome) + } + } + if outcome == "cancelled" { + // not a valid outcome for the overall holepunch metric + continue + } + hpOutcomesTotal.WithLabelValues(side, numAttempts, outcome) + } + } + } + return &metricsTracer{} +} + +// HolePunchFinished tracks metrics completion of a holepunch. Metrics are tracked on +// a holepunch attempt level and on individual addresses involved in a holepunch. +// +// outcome for an address is computed as: +// +// - success: +// A direct connection was established with the peer using this address +// - cancelled: +// A direct connection was established with the peer but not using this address +// - failed: +// No direct connection was made to the peer and the peer reported an address +// with the same transport as this address +// - no_suitable_address: +// The peer reported no address with the same transport as this address +func (mt *metricsTracer) HolePunchFinished(side string, numAttempts int, + remoteAddrs []ma.Multiaddr, localAddrs []ma.Multiaddr, directConn network.ConnMultiaddrs) { + tags := metricshelper.GetStringSlice() + defer metricshelper.PutStringSlice(tags) + + *tags = append(*tags, side, getNumAttemptString(numAttempts)) + var dipv, dtransport string + if directConn != nil { + dipv = metricshelper.GetIPVersion(directConn.LocalMultiaddr()) + dtransport = metricshelper.GetTransport(directConn.LocalMultiaddr()) + } + + matchingAddressCount := 0 + // calculate holepunch outcome for all the addresses involved + for _, la := range localAddrs { + lipv := metricshelper.GetIPVersion(la) + ltransport := metricshelper.GetTransport(la) + + matchingAddress := false + for _, ra := range remoteAddrs { + ripv := metricshelper.GetIPVersion(ra) + rtransport := metricshelper.GetTransport(ra) + if ripv == lipv && rtransport == ltransport { + // the peer reported an address with the same transport + matchingAddress = true + matchingAddressCount++ + + *tags = append(*tags, ripv, rtransport) + if directConn != nil && dipv == ripv && dtransport == rtransport { + // the connection was made using this address + *tags = append(*tags, "success") + } else if directConn != nil { + // connection was made but not using this address + *tags = append(*tags, "cancelled") + } else { + // no connection was made + *tags = append(*tags, "failed") + } + hpAddressOutcomesTotal.WithLabelValues(*tags...).Inc() + *tags = (*tags)[:2] // 2 because we want to keep (side, numAttempts) + break + } + } + if !matchingAddress { + *tags = append(*tags, lipv, ltransport, "no_suitable_address") + hpAddressOutcomesTotal.WithLabelValues(*tags...).Inc() + *tags = (*tags)[:2] // 2 because we want to keep (side, numAttempts) + } + } + + outcome := "failed" + if directConn != nil { + outcome = "success" + } else if matchingAddressCount == 0 { + // there were no matching addresses, this attempt was going to fail + outcome = "no_suitable_address" + } + + *tags = append(*tags, outcome) + hpOutcomesTotal.WithLabelValues(*tags...).Inc() +} + +func getNumAttemptString(numAttempt int) string { + var attemptStr = [...]string{"0", "1", "2", "3", "4", "5"} + if numAttempt > 5 { + return "> 5" + } + return attemptStr[numAttempt] +} + +func (mt *metricsTracer) DirectDialFinished(success bool) { + tags := metricshelper.GetStringSlice() + defer metricshelper.PutStringSlice(tags) + if success { + *tags = append(*tags, "success") + } else { + *tags = append(*tags, "failed") + } + directDialsTotal.WithLabelValues(*tags...).Inc() +} diff --git a/p2p/protocol/holepunch/metrics_noalloc_test.go b/p2p/protocol/holepunch/metrics_noalloc_test.go new file mode 100644 index 0000000000..968354fd7d --- /dev/null +++ b/p2p/protocol/holepunch/metrics_noalloc_test.go @@ -0,0 +1,49 @@ +//go:build nocover + +package holepunch + +import ( + "math/rand" + "testing" + + "github.com/libp2p/go-libp2p/core/network" + ma "github.com/multiformats/go-multiaddr" +) + +func TestNoCoverNoAllocMetrics(t *testing.T) { + addrs1 := [][]ma.Multiaddr{ + { + ma.StringCast("/ip4/0.0.0.0/tcp/1"), + ma.StringCast("/ip4/1.2.3.4/udp/2/quic"), + }, + nil, + } + addrs2 := [][]ma.Multiaddr{ + { + ma.StringCast("/ip4/1.2.3.4/tcp/3"), + ma.StringCast("/ip4/1.2.3.4/udp/4/quic"), + }, + nil, + } + conns := []network.ConnMultiaddrs{ + &mockConnMultiaddrs{local: addrs1[0][0], remote: addrs2[0][0]}, + nil, + } + sides := []string{"initiator", "receiver"} + mt := NewMetricsTracer() + testcases := map[string]func(){ + "DirectDialFinished": func() { mt.DirectDialFinished(rand.Intn(2) == 1) }, + "HolePunchFinished": func() { + mt.HolePunchFinished(sides[rand.Intn(len(sides))], rand.Intn(maxRetries), addrs1[rand.Intn(len(addrs1))], + addrs2[rand.Intn(len(addrs2))], conns[rand.Intn(len(conns))]) + }, + } + for method, f := range testcases { + t.Run(method, func(t *testing.T) { + cnt := testing.AllocsPerRun(1000, f) + if cnt > 0 { + t.Errorf("%s Failed: expected 0 allocs got %0.2f", method, cnt) + } + }) + } +} diff --git a/p2p/protocol/holepunch/metrics_test.go b/p2p/protocol/holepunch/metrics_test.go new file mode 100644 index 0000000000..6d7bf160d5 --- /dev/null +++ b/p2p/protocol/holepunch/metrics_test.go @@ -0,0 +1,102 @@ +package holepunch + +import ( + "testing" + + "github.com/libp2p/go-libp2p/core/network" + ma "github.com/multiformats/go-multiaddr" + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" +) + +func getCounterValue(t *testing.T, counter *prometheus.CounterVec, labels ...string) int { + t.Helper() + m := &dto.Metric{} + if err := counter.WithLabelValues(labels...).Write(m); err != nil { + t.Errorf("failed to extract counter value %s", err) + return 0 + } + return int(*m.Counter.Value) + +} + +func TestHolePunchOutcomeCounter(t *testing.T) { + t1 := ma.StringCast("/ip4/1.2.3.4/tcp/1") + t2 := ma.StringCast("/ip4/1.2.3.4/tcp/2") + + q1 := ma.StringCast("/ip4/1.2.3.4/udp/1/quic") + q2 := ma.StringCast("/ip4/1.2.3.4/udp/2/quic") + + q1v1 := ma.StringCast("/ip4/1.2.3.4/udp/1/quic-v1") + + type testcase struct { + name string + theirAddrs []ma.Multiaddr + ourAddrs []ma.Multiaddr + conn network.ConnMultiaddrs + result map[[3]string]int + } + testcases := []testcase{ + { + name: "connection success", + theirAddrs: []ma.Multiaddr{t1, q1}, + ourAddrs: []ma.Multiaddr{t2, q2}, + conn: &mockConnMultiaddrs{local: t1, remote: t2}, + result: map[[3]string]int{ + [...]string{"ip4", "tcp", "success"}: 1, + [...]string{"ip4", "quic", "cancelled"}: 1, + }, + }, + { + name: "connection failed", + theirAddrs: []ma.Multiaddr{t1}, + ourAddrs: []ma.Multiaddr{t2, q2}, + conn: nil, + result: map[[3]string]int{ + [...]string{"ip4", "tcp", "failed"}: 1, + [...]string{"ip4", "quic", "no_suitable_address"}: 1, + }, + }, + { + name: "no_suitable_address", + theirAddrs: []ma.Multiaddr{t1, q1}, + ourAddrs: []ma.Multiaddr{t2, q2, q1v1}, + conn: &mockConnMultiaddrs{local: q1, remote: q2}, + result: map[[3]string]int{ + [...]string{"ip4", "tcp", "cancelled"}: 1, + [...]string{"ip4", "quic", "failed"}: 0, + [...]string{"ip4", "quic", "success"}: 1, + [...]string{"ip4", "tcp", "success"}: 0, + [...]string{"ip4", "quic-v1", "no_suitable_address"}: 1, + }, + }, + } + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + reg := prometheus.NewRegistry() + hpAddressOutcomesTotal.Reset() + mt := NewMetricsTracer(WithRegisterer(reg)) + for _, side := range []string{"receiver", "initiator"} { + mt.HolePunchFinished(side, 1, tc.theirAddrs, tc.ourAddrs, tc.conn) + for labels, value := range tc.result { + v := getCounterValue(t, hpAddressOutcomesTotal, side, "1", labels[0], labels[1], labels[2]) + if v != value { + t.Errorf("Invalid metric value %s: expected: %d got: %d", labels, value, v) + } + } + } + }) + } +} + +type mockConnMultiaddrs struct { + local, remote ma.Multiaddr +} + +func (cma *mockConnMultiaddrs) LocalMultiaddr() ma.Multiaddr { + return cma.local +} + +func (cma *mockConnMultiaddrs) RemoteMultiaddr() ma.Multiaddr { + return cma.remote +} diff --git a/p2p/protocol/holepunch/svc.go b/p2p/protocol/holepunch/svc.go index 5de7c7cf30..47bf434fb1 100644 --- a/p2p/protocol/holepunch/svc.go +++ b/p2p/protocol/holepunch/svc.go @@ -84,6 +84,7 @@ func NewService(h host.Host, ids identify.IDService, opts ...Option) (*Service, return nil, err } } + s.tracer.Start() s.refCount.Add(1) go s.watchForPublicAddr() @@ -165,24 +166,24 @@ func (s *Service) Close() error { return err } -func (s *Service) incomingHolePunch(str network.Stream) (rtt time.Duration, addrs []ma.Multiaddr, err error) { +func (s *Service) incomingHolePunch(str network.Stream) (rtt time.Duration, remoteAddrs []ma.Multiaddr, ownAddrs []ma.Multiaddr, err error) { // sanity check: a hole punch request should only come from peers behind a relay if !isRelayAddress(str.Conn().RemoteMultiaddr()) { - return 0, nil, fmt.Errorf("received hole punch stream: %s", str.Conn().RemoteMultiaddr()) + return 0, nil, nil, fmt.Errorf("received hole punch stream: %s", str.Conn().RemoteMultiaddr()) } - ownAddrs := removeRelayAddrs(s.ids.OwnObservedAddrs()) + ownAddrs = removeRelayAddrs(s.ids.OwnObservedAddrs()) if s.filter != nil { ownAddrs = s.filter.FilterLocal(str.Conn().RemotePeer(), ownAddrs) } // If we can't tell the peer where to dial us, there's no point in starting the hole punching. if len(ownAddrs) == 0 { - return 0, nil, errors.New("rejecting hole punch request, as we don't have any public addresses") + return 0, nil, nil, errors.New("rejecting hole punch request, as we don't have any public addresses") } if err := str.Scope().ReserveMemory(maxMsgSize, network.ReservationPriorityAlways); err != nil { log.Debugf("error reserving memory for stream: %s, err") - return 0, nil, err + return 0, nil, nil, err } defer str.Scope().ReleaseMemory(maxMsgSize) @@ -195,10 +196,10 @@ func (s *Service) incomingHolePunch(str network.Stream) (rtt time.Duration, addr str.SetDeadline(time.Now().Add(StreamTimeout)) if err := rd.ReadMsg(msg); err != nil { - return 0, nil, fmt.Errorf("failed to read message from initator: %w", err) + return 0, nil, nil, fmt.Errorf("failed to read message from initator: %w", err) } if t := msg.GetType(); t != pb.HolePunch_CONNECT { - return 0, nil, fmt.Errorf("expected CONNECT message from initiator but got %d", t) + return 0, nil, nil, fmt.Errorf("expected CONNECT message from initiator but got %d", t) } obsDial := removeRelayAddrs(addrsFromBytes(msg.ObsAddrs)) @@ -208,7 +209,7 @@ func (s *Service) incomingHolePunch(str network.Stream) (rtt time.Duration, addr log.Debugw("received hole punch request", "peer", str.Conn().RemotePeer(), "addrs", obsDial) if len(obsDial) == 0 { - return 0, nil, errors.New("expected CONNECT message to contain at least one address") + return 0, nil, nil, errors.New("expected CONNECT message to contain at least one address") } // Write CONNECT message @@ -217,18 +218,18 @@ func (s *Service) incomingHolePunch(str network.Stream) (rtt time.Duration, addr msg.ObsAddrs = addrsToBytes(ownAddrs) tstart := time.Now() if err := wr.WriteMsg(msg); err != nil { - return 0, nil, fmt.Errorf("failed to write CONNECT message to initator: %w", err) + return 0, nil, nil, fmt.Errorf("failed to write CONNECT message to initator: %w", err) } // Read SYNC message msg.Reset() if err := rd.ReadMsg(msg); err != nil { - return 0, nil, fmt.Errorf("failed to read message from initator: %w", err) + return 0, nil, nil, fmt.Errorf("failed to read message from initator: %w", err) } if t := msg.GetType(); t != pb.HolePunch_SYNC { - return 0, nil, fmt.Errorf("expected SYNC message from initiator but got %d", t) + return 0, nil, nil, fmt.Errorf("expected SYNC message from initiator but got %d", t) } - return time.Since(tstart), obsDial, nil + return time.Since(tstart), obsDial, ownAddrs, nil } func (s *Service) handleNewStream(str network.Stream) { @@ -249,7 +250,7 @@ func (s *Service) handleNewStream(str network.Stream) { } rp := str.Conn().RemotePeer() - rtt, addrs, err := s.incomingHolePunch(str) + rtt, addrs, ownAddrs, err := s.incomingHolePunch(str) if err != nil { s.tracer.ProtocolError(rp, err) log.Debugw("error handling holepunching stream from", "peer", rp, "error", err) @@ -270,6 +271,7 @@ func (s *Service) handleNewStream(str network.Stream) { err = holePunchConnect(s.ctx, s.host, pi, false) dt := time.Since(start) s.tracer.EndHolePunch(rp, dt, err) + s.tracer.HolePunchFinished("receiver", 1, addrs, ownAddrs, getDirectConnection(s.host, rp)) } // DirectConnect is only exposed for testing purposes. diff --git a/p2p/protocol/holepunch/tracer.go b/p2p/protocol/holepunch/tracer.go index abf31829d0..82e0ebfc0f 100644 --- a/p2p/protocol/holepunch/tracer.go +++ b/p2p/protocol/holepunch/tracer.go @@ -2,10 +2,10 @@ package holepunch import ( "context" - "fmt" "sync" "time" + "github.com/libp2p/go-libp2p/core/network" "github.com/libp2p/go-libp2p/core/peer" ma "github.com/multiformats/go-multiaddr" @@ -16,27 +16,57 @@ const ( tracerCacheDuration = 5 * time.Minute ) -// WithTracer is a Service option that enables hole punching tracing -func WithTracer(tr EventTracer) Option { +// WithTracer enables holepunch tracing with EventTracer et +func WithTracer(et EventTracer) Option { return func(hps *Service) error { - t := &tracer{ - tr: tr, + hps.tracer = &tracer{ + et: et, + mt: nil, + self: hps.host.ID(), + peers: make(map[peer.ID]struct { + counter int + last time.Time + }), + } + return nil + } +} + +// WithMetricsTracer enables holepunch Tracing with MetricsTracer mt +func WithMetricsTracer(mt MetricsTracer) Option { + return func(hps *Service) error { + hps.tracer = &tracer{ + et: nil, + mt: mt, + self: hps.host.ID(), + peers: make(map[peer.ID]struct { + counter int + last time.Time + }), + } + return nil + } +} + +// WithMetricsAndEventTracer enables holepunch tracking with MetricsTracer and EventTracer +func WithMetricsAndEventTracer(mt MetricsTracer, et EventTracer) Option { + return func(hps *Service) error { + hps.tracer = &tracer{ + et: et, + mt: mt, self: hps.host.ID(), peers: make(map[peer.ID]struct { counter int last time.Time }), } - t.refCount.Add(1) - t.ctx, t.ctxCancel = context.WithCancel(context.Background()) - go t.gc() - hps.tracer = t return nil } } type tracer struct { - tr EventTracer + et EventTracer + mt MetricsTracer self peer.ID refCount sync.WaitGroup @@ -103,16 +133,22 @@ func (t *tracer) DirectDialSuccessful(p peer.ID, dt time.Duration) { return } - t.tr.Trace(&Event{ - Timestamp: time.Now().UnixNano(), - Peer: t.self, - Remote: p, - Type: DirectDialEvtT, - Evt: &DirectDialEvt{ - Success: true, - EllapsedTime: dt, - }, - }) + if t.et != nil { + t.et.Trace(&Event{ + Timestamp: time.Now().UnixNano(), + Peer: t.self, + Remote: p, + Type: DirectDialEvtT, + Evt: &DirectDialEvt{ + Success: true, + EllapsedTime: dt, + }, + }) + } + + if t.mt != nil { + t.mt.DirectDialFinished(true) + } } func (t *tracer) DirectDialFailed(p peer.ID, dt time.Duration, err error) { @@ -120,108 +156,110 @@ func (t *tracer) DirectDialFailed(p peer.ID, dt time.Duration, err error) { return } - t.tr.Trace(&Event{ - Timestamp: time.Now().UnixNano(), - Peer: t.self, - Remote: p, - Type: DirectDialEvtT, - Evt: &DirectDialEvt{ - Success: false, - EllapsedTime: dt, - Error: err.Error(), - }, - }) + if t.et != nil { + t.et.Trace(&Event{ + Timestamp: time.Now().UnixNano(), + Peer: t.self, + Remote: p, + Type: DirectDialEvtT, + Evt: &DirectDialEvt{ + Success: false, + EllapsedTime: dt, + Error: err.Error(), + }, + }) + } + + if t.mt != nil { + t.mt.DirectDialFinished(false) + } } func (t *tracer) ProtocolError(p peer.ID, err error) { - if t == nil { - return + if t != nil && t.et != nil { + t.et.Trace(&Event{ + Timestamp: time.Now().UnixNano(), + Peer: t.self, + Remote: p, + Type: ProtocolErrorEvtT, + Evt: &ProtocolErrorEvt{ + Error: err.Error(), + }, + }) } - - t.tr.Trace(&Event{ - Timestamp: time.Now().UnixNano(), - Peer: t.self, - Remote: p, - Type: ProtocolErrorEvtT, - Evt: &ProtocolErrorEvt{ - Error: err.Error(), - }, - }) } func (t *tracer) StartHolePunch(p peer.ID, obsAddrs []ma.Multiaddr, rtt time.Duration) { - if t == nil { - return - } + if t != nil && t.et != nil { + addrs := make([]string, 0, len(obsAddrs)) + for _, a := range obsAddrs { + addrs = append(addrs, a.String()) + } - addrs := make([]string, 0, len(obsAddrs)) - for _, a := range obsAddrs { - addrs = append(addrs, a.String()) + t.et.Trace(&Event{ + Timestamp: time.Now().UnixNano(), + Peer: t.self, + Remote: p, + Type: StartHolePunchEvtT, + Evt: &StartHolePunchEvt{ + RemoteAddrs: addrs, + RTT: rtt, + }, + }) } - - t.tr.Trace(&Event{ - Timestamp: time.Now().UnixNano(), - Peer: t.self, - Remote: p, - Type: StartHolePunchEvtT, - Evt: &StartHolePunchEvt{ - RemoteAddrs: addrs, - RTT: rtt, - }, - }) } func (t *tracer) EndHolePunch(p peer.ID, dt time.Duration, err error) { - if t == nil { - return - } + if t != nil && t.et != nil { + evt := &EndHolePunchEvt{ + Success: err == nil, + EllapsedTime: dt, + } + if err != nil { + evt.Error = err.Error() + } - evt := &EndHolePunchEvt{ - Success: err == nil, - EllapsedTime: dt, - } - if err != nil { - evt.Error = err.Error() + t.et.Trace(&Event{ + Timestamp: time.Now().UnixNano(), + Peer: t.self, + Remote: p, + Type: EndHolePunchEvtT, + Evt: evt, + }) } +} - t.tr.Trace(&Event{ - Timestamp: time.Now().UnixNano(), - Peer: t.self, - Remote: p, - Type: EndHolePunchEvtT, - Evt: evt, - }) +func (t *tracer) HolePunchFinished(side string, numAttempts int, theirAddrs []ma.Multiaddr, ourAddrs []ma.Multiaddr, directConn network.Conn) { + if t != nil && t.mt != nil { + t.mt.HolePunchFinished(side, numAttempts, theirAddrs, ourAddrs, directConn) + } } func (t *tracer) HolePunchAttempt(p peer.ID) { - if t == nil { - return + if t != nil && t.et != nil { + now := time.Now() + t.mutex.Lock() + attempt := t.peers[p] + attempt.counter++ + counter := attempt.counter + attempt.last = now + t.peers[p] = attempt + t.mutex.Unlock() + + t.et.Trace(&Event{ + Timestamp: now.UnixNano(), + Peer: t.self, + Remote: p, + Type: HolePunchAttemptEvtT, + Evt: &HolePunchAttemptEvt{Attempt: counter}, + }) } - - now := time.Now() - t.mutex.Lock() - attempt := t.peers[p] - attempt.counter++ - counter := attempt.counter - attempt.last = now - t.peers[p] = attempt - t.mutex.Unlock() - - t.tr.Trace(&Event{ - Timestamp: now.UnixNano(), - Peer: t.self, - Remote: p, - Type: HolePunchAttemptEvtT, - Evt: &HolePunchAttemptEvt{Attempt: counter}, - }) } +// gc cleans up the peers map. This is only run when tracer is initialised with a non nil +// EventTracer func (t *tracer) gc() { - defer func() { - fmt.Println("done") - t.refCount.Done() - }() - + defer t.refCount.Done() timer := time.NewTicker(tracerGCInterval) defer timer.Stop() @@ -242,12 +280,18 @@ func (t *tracer) gc() { } } -func (t *tracer) Close() error { - if t == nil { - return nil +func (t *tracer) Start() { + if t != nil && t.et != nil { + t.ctx, t.ctxCancel = context.WithCancel(context.Background()) + t.refCount.Add(1) + go t.gc() } +} - t.ctxCancel() - t.refCount.Wait() +func (t *tracer) Close() error { + if t != nil && t.et != nil { + t.ctxCancel() + t.refCount.Wait() + } return nil } diff --git a/p2p/protocol/holepunch/util.go b/p2p/protocol/holepunch/util.go index 825f855ee8..13013568fe 100644 --- a/p2p/protocol/holepunch/util.go +++ b/p2p/protocol/holepunch/util.go @@ -55,6 +55,15 @@ func addrsFromBytes(bzs [][]byte) []ma.Multiaddr { return addrs } +func getDirectConnection(h host.Host, p peer.ID) network.Conn { + for _, c := range h.Network().ConnsToPeer(p) { + if !isRelayAddress(c.RemoteMultiaddr()) { + return c + } + } + return nil +} + func holePunchConnect(ctx context.Context, host host.Host, pi peer.AddrInfo, isClient bool) error { holePunchCtx := network.WithSimultaneousConnect(ctx, isClient, "hole-punching") forceDirectConnCtx := network.WithForceDirectDial(holePunchCtx, "hole-punching")