From 4e6c510af2da85c3b803c53e4562ad1d4dfa8457 Mon Sep 17 00:00:00 2001 From: Saransh Shankar <103821431+Wise-Wizard@users.noreply.github.com> Date: Sun, 18 Aug 2024 05:04:48 +0530 Subject: [PATCH] [v2] Configure healthcheck extension (#5831) **Which problem is this PR solving?** Part of #5633 **Description of the changes** * Integrate health check extension to monitor and report Jaeger V2 component's health * Enhance all-in-one CI test to ping the new health port **How was this change tested?** The changes were tested by running the following command: ```bash make test ``` ```bash CI actions and new Unit Tests ``` **Checklist** - [x] I have read [CONTRIBUTING_GUIDELINES.md](https://github.com/jaegertracing/jaeger/blob/master/CONTRIBUTING_GUIDELINES.md) - [x] I have signed all commits - [x] I have added unit tests for the new functionality - [x] I have run lint and test steps successfully - `for jaeger: make lint test` - `for jaeger-ui: yarn lint` and `yarn test` --------- Signed-off-by: Wise-Wizard Signed-off-by: Yuri Shkuro Co-authored-by: Yuri Shkuro Co-authored-by: Yuri Shkuro --- .github/workflows/ci-docker-all-in-one.yml | 10 ++++++ cmd/all-in-one/all_in_one_test.go | 38 +++++++++++++++++----- cmd/jaeger/Dockerfile | 12 +++++++ cmd/jaeger/internal/all-in-one.yaml | 8 ++++- cmd/jaeger/internal/components.go | 2 ++ go.mod | 1 + go.sum | 2 ++ scripts/build-all-in-one-image.sh | 2 +- 8 files changed, 64 insertions(+), 11 deletions(-) diff --git a/.github/workflows/ci-docker-all-in-one.yml b/.github/workflows/ci-docker-all-in-one.yml index 95bb409f2f7..57fd058c661 100644 --- a/.github/workflows/ci-docker-all-in-one.yml +++ b/.github/workflows/ci-docker-all-in-one.yml @@ -66,6 +66,15 @@ jobs: ;; esac + - name: Determine healthcheck setting + id: healthcheck + run: | + if [[ "${{ matrix.mode.name }}" == "v1" ]]; then + echo "HEALTHCHECK_V2=false" >> $GITHUB_ENV + elif [[ "${{ matrix.mode.name }}" == "v2" ]]; then + echo "HEALTHCHECK_V2=true" >> $GITHUB_ENV + fi + - name: Build, test, and publish all-in-one image run: | bash scripts/build-all-in-one-image.sh \ @@ -74,3 +83,4 @@ jobs: env: DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} QUAY_TOKEN: ${{ secrets.QUAY_TOKEN }} + HEALTHCHECK_V2: ${{ env.HEALTHCHECK_V2 }} diff --git a/cmd/all-in-one/all_in_one_test.go b/cmd/all-in-one/all_in_one_test.go index 390a3197363..7e080e2129a 100644 --- a/cmd/all-in-one/all_in_one_test.go +++ b/cmd/all-in-one/all_in_one_test.go @@ -24,14 +24,15 @@ import ( ) // These tests are only run when the environment variable TEST_MODE=integration is set. -// An optional SKIP_SAMPLING=true environment variable can be used to skip sampling checks (for jaeger-v2). const ( - host = "0.0.0.0" - queryPort = "16686" - agentPort = "5778" - queryAddr = "http://" + host + ":" + queryPort - agentAddr = "http://" + host + ":" + agentPort + host = "0.0.0.0" + queryPort = "16686" + agentPort = "5778" + healthPort = "13133" + queryAddr = "http://" + host + ":" + queryPort + agentAddr = "http://" + host + ":" + agentPort + healthAddr = "http://" + host + ":" + healthPort + "/status" getServicesURL = "/api/services" getTraceURL = "/api/traces/" @@ -53,6 +54,7 @@ func TestAllInOne(t *testing.T) { // Check if the query service is available healthCheck(t) + t.Run("healthCheckV2", healthCheckV2) t.Run("checkWebUI", checkWebUI) t.Run("createTrace", createTrace) t.Run("getAPITrace", getAPITrace) @@ -61,8 +63,7 @@ func TestAllInOne(t *testing.T) { } func healthCheck(t *testing.T) { - require.Eventuallyf( - t, + require.Eventuallyf(t, func() bool { resp, err := http.Get(queryAddr + "/") if err == nil { @@ -72,11 +73,30 @@ func healthCheck(t *testing.T) { }, 10*time.Second, time.Second, - "expecting query endpoint to be healhty", + "expecting query endpoint to be healthy", ) t.Logf("Server detected at %s", queryAddr) } +func healthCheckV2(t *testing.T) { + if os.Getenv("HEALTHCHECK_V2") == "false" { + t.Skip("Skipping health check for V1 Binary") + } + require.Eventuallyf(t, + func() bool { + resp, err := http.Get(healthAddr) + if err == nil { + resp.Body.Close() + } + return err == nil + }, + 10*time.Second, + time.Second, + "expecting health endpoint to be healthy", + ) + t.Logf("V2-HealthCheck Server detected at %s", healthAddr) +} + func httpGet(t *testing.T, url string) (*http.Response, []byte) { t.Logf("Executing HTTP GET %s", url) req, err := http.NewRequest(http.MethodGet, url, nil) diff --git a/cmd/jaeger/Dockerfile b/cmd/jaeger/Dockerfile index 1028c8c58b8..5ff10914439 100644 --- a/cmd/jaeger/Dockerfile +++ b/cmd/jaeger/Dockerfile @@ -43,6 +43,12 @@ EXPOSE 9411 # Web HTTP EXPOSE 16686 +# Health Check gRPC +EXPOSE 13132 + +# Health Check HTTP +EXPOSE 13133 + COPY jaeger-linux-$TARGETARCH /cmd/jaeger/jaeger-linux COPY sampling-strategies.json /cmd/jaeger/sampling-strategies.json @@ -92,6 +98,12 @@ EXPOSE 16686 # Delve EXPOSE 12345 +# Health Check gRPC +EXPOSE 13132 + +# Health Check HTTP +EXPOSE 13133 + COPY jaeger-debug-linux-$TARGETARCH /cmd/jaeger/jaeger-linux COPY sampling-strategies.json /cmd/jaeger/sampling-strategies.json diff --git a/cmd/jaeger/internal/all-in-one.yaml b/cmd/jaeger/internal/all-in-one.yaml index 43540cf66f0..4dad6d883ef 100644 --- a/cmd/jaeger/internal/all-in-one.yaml +++ b/cmd/jaeger/internal/all-in-one.yaml @@ -1,5 +1,5 @@ service: - extensions: [jaeger_storage, jaeger_query, remote_sampling] + extensions: [jaeger_storage, jaeger_query, remote_sampling, healthcheckv2] pipelines: traces: receivers: [otlp, jaeger, zipkin] @@ -33,6 +33,12 @@ extensions: # initial_sampling_probability: 0.1 http: grpc: + + healthcheckv2: + use_v2: true + http: + endpoint: "0.0.0.0:13133" + grpc: receivers: otlp: diff --git a/cmd/jaeger/internal/components.go b/cmd/jaeger/internal/components.go index 7a615785b4d..5f5bcd03150 100644 --- a/cmd/jaeger/internal/components.go +++ b/cmd/jaeger/internal/components.go @@ -7,6 +7,7 @@ import ( "github.com/open-telemetry/opentelemetry-collector-contrib/connector/spanmetricsconnector" "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/kafkaexporter" "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/prometheusexporter" + "github.com/open-telemetry/opentelemetry-collector-contrib/extension/healthcheckv2extension" "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/jaegerreceiver" "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/kafkareceiver" "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/zipkinreceiver" @@ -61,6 +62,7 @@ func (b builders) build() (otelcol.Factories, error) { // standard ballastextension.NewFactory(), zpagesextension.NewFactory(), + healthcheckv2extension.NewFactory(), // add-ons jaegerquery.NewFactory(), jaegerstorage.NewFactory(), diff --git a/go.mod b/go.mod index 0040f8c157d..6d672ed4638 100644 --- a/go.mod +++ b/go.mod @@ -168,6 +168,7 @@ require ( github.com/mostynb/go-grpc-compression v1.2.3 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/onsi/ginkgo v1.16.5 // indirect + github.com/open-telemetry/opentelemetry-collector-contrib/extension/healthcheckv2extension v0.107.0 github.com/open-telemetry/opentelemetry-collector-contrib/internal/common v0.107.0 // indirect github.com/open-telemetry/opentelemetry-collector-contrib/internal/coreinternal v0.107.0 // indirect github.com/open-telemetry/opentelemetry-collector-contrib/internal/kafka v0.107.0 // indirect diff --git a/go.sum b/go.sum index c33585ae2c1..42a0a44094d 100644 --- a/go.sum +++ b/go.sum @@ -386,6 +386,8 @@ github.com/open-telemetry/opentelemetry-collector-contrib/exporter/kafkaexporter github.com/open-telemetry/opentelemetry-collector-contrib/exporter/kafkaexporter v0.107.0/go.mod h1:gMY05z3fY6HnL/vNfyVYl3w4eihI8DftosfHuxqEeTg= github.com/open-telemetry/opentelemetry-collector-contrib/exporter/prometheusexporter v0.107.0 h1:g4LloH7qCMZfahxep5dKU9U18RnHGYsDw+/BQkzg9ts= github.com/open-telemetry/opentelemetry-collector-contrib/exporter/prometheusexporter v0.107.0/go.mod h1:+oXUZlAMk5MepQpL7OJiLEUyugPFlmCs8kL/ciaERAs= +github.com/open-telemetry/opentelemetry-collector-contrib/extension/healthcheckv2extension v0.107.0 h1:qAJyEY8c0OwAaX/avNOI1Ovoh2oRu744WXdlm6oefBc= +github.com/open-telemetry/opentelemetry-collector-contrib/extension/healthcheckv2extension v0.107.0/go.mod h1:6LO3bm94bdTS6W7d2vuYboDNtPzspTJBDZRema1gBb4= github.com/open-telemetry/opentelemetry-collector-contrib/extension/storage v0.107.0 h1:ng+d8RpXN+cUUJTn1yA2xmXCEah0o7BsGzwsm3fDSsw= github.com/open-telemetry/opentelemetry-collector-contrib/extension/storage v0.107.0/go.mod h1:pc5uB5lbPTNddFYA0bYy0TYkp4Yjh4teYXBPsRL2/Rk= github.com/open-telemetry/opentelemetry-collector-contrib/internal/common v0.107.0 h1:NKH1JyZbqUSDGbIVqTyGJclmdnp6v4TQYfLhNI4tZno= diff --git a/scripts/build-all-in-one-image.sh b/scripts/build-all-in-one-image.sh index b73e24dbe51..d167e40e508 100755 --- a/scripts/build-all-in-one-image.sh +++ b/scripts/build-all-in-one-image.sh @@ -65,7 +65,7 @@ make build-ui run_integration_test() { local image_name="$1" - CID=$(docker run -d -p 16686:16686 -p 5778:5778 "${image_name}:${GITHUB_SHA}") + CID=$(docker run -d -p 16686:16686 -p 5778:5778 -p13133:13133 "${image_name}:${GITHUB_SHA}") if ! make all-in-one-integration-test ; then echo "---- integration test failed unexpectedly ----"