Skip to content

Commit

Permalink
[v2] Configure health check extension for all configs (#5861)
Browse files Browse the repository at this point in the history
**Which problem is this PR solving?**

Part of #5633, part of #5859

**Description of the changes**
* Integrate health check extension to monitor and report Jaeger V2
component's health
* Enhance all-in-one CI test to ping the new health port

**How was this change tested?**

The changes were tested by running the following command:

```bash
make test
```
```bash
CI actions and new Unit Tests
```
**Checklist**

- [x] I have read
[CONTRIBUTING_GUIDELINES.md](https://github.com/jaegertracing/jaeger/blob/master/CONTRIBUTING_GUIDELINES.md)
- [x] I have signed all commits
- [x] I have added unit tests for the new functionality
- [x] I have run lint and test steps successfully
  - `for jaeger: make lint test`
  - `for jaeger-ui: yarn lint` and `yarn test`

---------

Signed-off-by: Wise-Wizard <[email protected]>
Signed-off-by: Yuri Shkuro <[email protected]>
Co-authored-by: Yuri Shkuro <[email protected]>
Co-authored-by: Yuri Shkuro <[email protected]>
  • Loading branch information
3 people authored Aug 23, 2024
1 parent 8f2543c commit 18cb683
Show file tree
Hide file tree
Showing 14 changed files with 146 additions and 51 deletions.
6 changes: 5 additions & 1 deletion cmd/jaeger/config-badger.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
service:
extensions: [jaeger_storage, jaeger_query]
extensions: [jaeger_storage, jaeger_query, healthcheckv2]
pipelines:
traces:
receivers: [otlp]
processors: [batch]
exporters: [jaeger_storage_exporter]

extensions:
healthcheckv2:
use_v2: true
http:

jaeger_query:
trace_storage: some_store
trace_storage_archive: another_store
Expand Down
6 changes: 5 additions & 1 deletion cmd/jaeger/config-cassandra.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
service:
extensions: [jaeger_storage, jaeger_query]
extensions: [jaeger_storage, jaeger_query, healthcheckv2]
pipelines:
traces:
receivers: [otlp]
processors: [batch]
exporters: [jaeger_storage_exporter]

extensions:
healthcheckv2:
use_v2: true
http:

jaeger_query:
trace_storage: some_storage
trace_storage_archive: another_storage
Expand Down
6 changes: 5 additions & 1 deletion cmd/jaeger/config-elasticsearch.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
service:
extensions: [jaeger_storage, jaeger_query]
extensions: [jaeger_storage, jaeger_query, healthcheckv2]
pipelines:
traces:
receivers: [otlp]
processors: [batch]
exporters: [jaeger_storage_exporter]

extensions:
healthcheckv2:
use_v2: true
http:

jaeger_query:
trace_storage: some_storage
trace_storage_archive: another_storage
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
service:
extensions: [healthcheckv2]
pipelines:
traces:
receivers: [otlp, jaeger]
Expand All @@ -10,6 +11,11 @@ service:
metrics:
level: detailed

extensions:
healthcheckv2:
use_v2: true
http:

receivers:
otlp:
protocols:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
service:
extensions: [jaeger_storage, jaeger_query]
extensions: [jaeger_storage, jaeger_query, healthcheckv2]
pipelines:
traces:
receivers: [kafka]
Expand All @@ -12,6 +12,12 @@ service:
level: debug

extensions:
healthcheckv2:
use_v2: true
http:
# use different port to avoid conflict with collector
endpoint: 0.0.0.0:14133

jaeger_query:
trace_storage: some_storage

Expand All @@ -21,7 +27,7 @@ extensions:
memory:
max_traces: 100000

receivers:
receivers:
kafka:
brokers:
- localhost:9092
Expand Down
6 changes: 5 additions & 1 deletion cmd/jaeger/config-opensearch.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
service:
extensions: [jaeger_storage, jaeger_query]
extensions: [jaeger_storage, jaeger_query, healthcheckv2]
pipelines:
traces:
receivers: [otlp]
processors: [batch]
exporters: [jaeger_storage_exporter]

extensions:
healthcheckv2:
use_v2: true
http:

jaeger_query:
trace_storage: some_storage
trace_storage_archive: another_storage
Expand Down
6 changes: 5 additions & 1 deletion cmd/jaeger/config-remote-storage.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
service:
extensions: [jaeger_storage, jaeger_query]
extensions: [jaeger_storage, jaeger_query, healthcheckv2]
pipelines:
traces:
receivers: [otlp]
processors: [batch]
exporters: [jaeger_storage_exporter]

extensions:
healthcheckv2:
use_v2: true
http:

jaeger_query:
trace_storage: some-storage
ui_config: ./cmd/jaeger/config-ui.json
Expand Down
7 changes: 5 additions & 2 deletions cmd/jaeger/config.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
service:
extensions: [jaeger_storage, jaeger_query, remote_sampling]
extensions: [jaeger_storage, jaeger_query, remote_sampling, healthcheckv2]
pipelines:
traces:
receivers: [otlp, jaeger, zipkin]
processors: [batch, adaptive_sampling]
exporters: [jaeger_storage_exporter]

extensions:
# health_check:
healthcheckv2:
use_v2: true
http:

# pprof:
# endpoint: 0.0.0.0:1777
# zpages:
Expand Down
102 changes: 70 additions & 32 deletions cmd/jaeger/internal/integration/e2e_integration.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@
package integration

import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"os"
"os/exec"
"path/filepath"
"strings"
"testing"
"time"

Expand Down Expand Up @@ -41,6 +44,7 @@ type E2EStorageIntegration struct {

SkipStorageCleaner bool
ConfigFile string
BinaryName string
HealthCheckEndpoint string
}

Expand All @@ -49,32 +53,34 @@ type E2EStorageIntegration struct {
// This function should be called before any of the tests start.
func (s *E2EStorageIntegration) e2eInitialize(t *testing.T, storage string) {
logger := zaptest.NewLogger(t, zaptest.WrapOptions(zap.AddCaller()))
if s.BinaryName == "" {
s.BinaryName = "jaeger-v2"
}
configFile := s.ConfigFile
if !s.SkipStorageCleaner {
configFile = createStorageCleanerConfig(t, s.ConfigFile, storage)
}

configFile, err := filepath.Abs(configFile)
require.NoError(t, err, "Failed to get absolute path of the config file")
require.FileExists(t, configFile, "Config file does not exist at the resolved path")

t.Logf("Starting Jaeger-v2 in the background with config file %s", configFile)
t.Logf("Starting %s in the background with config file %s", s.BinaryName, configFile)

outFile, err := os.OpenFile(
filepath.Join(t.TempDir(), "jaeger_output_logs.txt"),
os.O_CREATE|os.O_WRONLY,
os.ModePerm,
)
require.NoError(t, err)
t.Logf("Writing the Jaeger-v2 output logs into %s", outFile.Name())
t.Logf("Writing the %s output logs into %s", s.BinaryName, outFile.Name())

errFile, err := os.OpenFile(
filepath.Join(t.TempDir(), "jaeger_error_logs.txt"),
os.O_CREATE|os.O_WRONLY,
os.ModePerm,
)
require.NoError(t, err)
t.Logf("Writing the Jaeger-v2 error logs into %s", errFile.Name())
t.Logf("Writing the %s error logs into %s", s.BinaryName, errFile.Name())

cmd := exec.Cmd{
Path: "./cmd/jaeger/jaeger",
Expand All @@ -88,57 +94,38 @@ func (s *E2EStorageIntegration) e2eInitialize(t *testing.T, storage string) {
}
t.Logf("Running command: %v", cmd.Args)
require.NoError(t, cmd.Start())

// Wait for the binary to start and become ready to serve requests.
healthCheckEndpoint := s.HealthCheckEndpoint
if healthCheckEndpoint == "" {
healthCheckEndpoint = fmt.Sprintf("http://localhost:%d/", ports.QueryHTTP)
}
require.Eventually(t, func() bool {
t.Logf("Checking if Jaeger-v2 is available on %s", healthCheckEndpoint)
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel()
req, err := http.NewRequestWithContext(ctx, http.MethodGet, healthCheckEndpoint, nil)
if err != nil {
t.Logf("HTTP request creation failed: %v", err)
return false
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
t.Logf("HTTP request failed: %v", err)
return false
}
defer resp.Body.Close()
return resp.StatusCode == http.StatusOK
}, 60*time.Second, 3*time.Second, "Jaeger-v2 did not start")
t.Log("Jaeger-v2 is ready")
t.Cleanup(func() {
if err := cmd.Process.Kill(); err != nil {
t.Errorf("Failed to kill Jaeger-v2 process: %v", err)
t.Errorf("Failed to kill %s process: %v", s.BinaryName, err)
}
if t.Failed() {
// A Github Actions special annotation to create a foldable section
// in the Github runner output.
// https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#grouping-log-lines
fmt.Println("::group::🚧 🚧 🚧 Jaeger-v2 binary logs")
fmt.Printf("::group::🚧 🚧 🚧 %s binary logs\n", s.BinaryName)
outLogs, err := os.ReadFile(outFile.Name())
if err != nil {
t.Errorf("Failed to read output logs: %v", err)
} else {
fmt.Printf("🚧 🚧 🚧 Jaeger-v2 output logs:\n%s", outLogs)
fmt.Printf("🚧 🚧 🚧 %s output logs:\n%s", s.BinaryName, outLogs)
}

errLogs, err := os.ReadFile(errFile.Name())
if err != nil {
t.Errorf("Failed to read error logs: %v", err)
} else {
fmt.Printf("🚧 🚧 🚧 Jaeger-v2 error logs:\n%s", errLogs)
fmt.Printf("🚧 🚧 🚧 %s error logs:\n%s", s.BinaryName, errLogs)
}
// End of Github Actions foldable section annotation.
fmt.Println("::endgroup::")
}
})

// Wait for the binary to start and become ready to serve requests.
require.Eventually(t, func() bool { return s.doHealthCheck(t) },
60*time.Second, 3*time.Second, "%s did not start", s.BinaryName)
t.Logf("%s is ready", s.BinaryName)

s.SpanWriter, err = createSpanWriter(logger, otlpPort)
require.NoError(t, err)
s.SpanReader, err = createSpanReader(logger, ports.QueryGRPC)
Expand All @@ -150,6 +137,56 @@ func (s *E2EStorageIntegration) e2eInitialize(t *testing.T, storage string) {
})
}

func (s *E2EStorageIntegration) doHealthCheck(t *testing.T) bool {
healthCheckEndpoint := s.HealthCheckEndpoint
if healthCheckEndpoint == "" {
healthCheckEndpoint = "http://localhost:13133/status"
}
t.Logf("Checking if %s is available on %s", s.BinaryName, healthCheckEndpoint)
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel()
req, err := http.NewRequestWithContext(ctx, http.MethodGet, healthCheckEndpoint, nil)
if err != nil {
t.Logf("HTTP request creation failed: %v", err)
return false
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
t.Logf("HTTP request failed: %v", err)
return false
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
t.Logf("Failed to read HTTP response body: %v", err)
return false
}
if resp.StatusCode != http.StatusOK {
t.Logf("HTTP response not OK: %v", string(body))
return false
}
// for backwards compatibility with other healthchecks
if !strings.HasSuffix(healthCheckEndpoint, "/status") {
t.Logf("OK HTTP from endpoint that is not healthcheckv2")
return true
}

var healthResponse struct {
Status string `json:"status"`
}
if err := json.NewDecoder(bytes.NewReader(body)).Decode(&healthResponse); err != nil {
t.Logf("Failed to decode JSON response '%s': %v", string(body), err)
return false
}

// Check if the status field in the JSON is "StatusOK"
if healthResponse.Status != "StatusOK" {
t.Logf("Received non-K status %s: %s", healthResponse.Status, string(body))
return false
}
return true
}

// e2eCleanUp closes the SpanReader and SpanWriter gRPC connection.
// This function should be called after all the tests are finished.
func (s *E2EStorageIntegration) e2eCleanUp(t *testing.T) {
Expand Down Expand Up @@ -205,6 +242,7 @@ func createStorageCleanerConfig(t *testing.T, configFile string, storage string)
err = os.WriteFile(tempFile, newData, 0o600)
require.NoError(t, err)

t.Logf("Transformed configuration file %s to %s", configFile, tempFile)
return tempFile
}

Expand Down
4 changes: 4 additions & 0 deletions cmd/jaeger/internal/integration/grpc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
package integration

import (
"fmt"
"testing"

"github.com/jaegertracing/jaeger/plugin/storage/integration"
"github.com/jaegertracing/jaeger/ports"
)

type GRPCStorageIntegration struct {
Expand All @@ -30,6 +32,8 @@ func TestGRPCStorage(t *testing.T) {
s := &GRPCStorageIntegration{
E2EStorageIntegration: E2EStorageIntegration{
ConfigFile: "../../config-remote-storage.yaml",
// TODO this should be removed in favor of default health check endpoint
HealthCheckEndpoint: fmt.Sprintf("http://localhost:%d/", ports.QueryHTTP),
},
}
s.CleanUp = s.cleanUp
Expand Down
Loading

0 comments on commit 18cb683

Please sign in to comment.