Skip to content

Commit

Permalink
[tmpnet] Misc cleanup for monitoring tooling (#3527)
Browse files Browse the repository at this point in the history
  • Loading branch information
maru-ava authored Nov 7, 2024
1 parent 6217810 commit b28affe
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 31 deletions.
7 changes: 6 additions & 1 deletion scripts/configure-local-metrics-collection.sh
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,9 @@ echo "Wrote promtail configuration to ${PROMTAIL_CONFIG_FILE}"

echo "Metrics collection by prometheus can be started with ./scripts/run_prometheus.sh"
echo "Log collection by promtail can be started with ./scripts/run_promtail.sh"
echo "Grafana link: https://grafana-poc.avax-dev.network/d/kBQpRdWnk/avalanche-main-dashboard?var-filter=network_uuid%7C%3D%7C${METRICS_UUID}"

GRAFANA_LINK="https://grafana-poc.avax-dev.network/d/kBQpRdWnk/avalanche-main-dashboard?var-filter=network_uuid%7C%3D%7C${METRICS_UUID}"
METRICS_PATH="${HOME}/.avalanchego/metrics.txt"
echo "${GRAFANA_LINK}" > "${METRICS_PATH}"
echo "Metrics and logs can be viewed at: ${GRAFANA_LINK}"
echo "Link also saved to ${METRICS_PATH}"
34 changes: 18 additions & 16 deletions scripts/run_prometheus.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,16 @@

set -euo pipefail

# Starts a prometheus instance in agent-mode, forwarding to a central
# instance. Intended to enable metrics collection from temporary networks running
# locally and in CI.
# - Starts a prometheus instance in agent-mode to collect metrics from nodes running
# locally and in CI.
#
# The prometheus instance will remain running in the background and will forward
# metrics to the central instance for all tmpnet networks.
# - promtail will remain running in the background and will forward metrics to the
# specified prometheus endpoint.
#
# To stop it:
#
# $ kill -9 `cat ~/.tmpnet/prometheus/run.pid` && rm ~/.tmpnet/prometheus/run.pid
# - Each node is configured with a file written to ~/.tmpnet/prometheus/file_sd_configs
#
# - To stop the running instance:
# $ kill -9 `cat ~/.tmpnet/promtheus/run.pid` && rm ~/.tmpnet/promtail/run.pid

# e.g.,
# PROMETHEUS_ID=<id> PROMETHEUS_PASSWORD=<password> ./scripts/run_prometheus.sh
Expand Down Expand Up @@ -45,7 +44,7 @@ fi

PROMETHEUS_PASSWORD="${PROMETHEUS_PASSWORD:-}"
if [[ -z "${PROMETHEUS_PASSWORD}" ]]; then
echo "Plase provide a value for PROMETHEUS_PASSWORD"
echo "Please provide a value for PROMETHEUS_PASSWORD"
exit 1
fi

Expand All @@ -64,13 +63,13 @@ if ! command -v "${CMD}" &> /dev/null; then

# Determine the arch
if which sw_vers &> /dev/null; then
echo "on macos, only amd64 binaries are available so rosetta is required on apple silicon machines."
echo "to avoid using rosetta, install via homebrew: brew install prometheus"
echo "On macos, only amd64 binaries are available so rosetta is required on apple silicon machines."
echo "To avoid using rosetta, install via homebrew: brew install prometheus"
DIST=darwin
else
ARCH="$(uname -i)"
if [[ "${ARCH}" != "x86_64" ]]; then
echo "on linux, only amd64 binaries are available. manual installation of prometheus is required."
echo "On linux, only amd64 binaries are available. manual installation of prometheus is required."
exit 1
else
DIST="linux"
Expand All @@ -90,8 +89,8 @@ fi
FILE_SD_PATH="${PROMETHEUS_WORKING_DIR}/file_sd_configs"
mkdir -p "${FILE_SD_PATH}"

echo "writing configuration..."
cat >"${PROMETHEUS_WORKING_DIR}"/prometheus.yaml <<EOL
CONFIG_PATH="${PROMETHEUS_WORKING_DIR}/prometheus.yaml"
cat > "${CONFIG_PATH}" <<EOL
# my global config
global:
# Make sure this value takes into account the network-shutdown-delay in tests/fixture/e2e/env.go
Expand All @@ -112,9 +111,12 @@ remote_write:
username: "${PROMETHEUS_ID}"
password: "${PROMETHEUS_PASSWORD}"
EOL
echo "Wrote configuration to ${CONFIG_PATH}"

echo "starting prometheus..."
echo "Starting prometheus..."
cd "${PROMETHEUS_WORKING_DIR}"
nohup "${CMD}" --config.file=prometheus.yaml --web.listen-address=localhost:0 --enable-feature=agent > prometheus.log 2>&1 &
echo $! > "${PIDFILE}"
echo "running with pid $(cat "${PIDFILE}")"
echo "prometheus started with pid $(cat "${PIDFILE}")"
# shellcheck disable=SC2016
echo 'To stop prometheus: "kill -SIGTERM `cat ~/.tmpnet/prometheus/run.pid` && rm ~/.tmpnet/prometheus/run.pid"'
26 changes: 14 additions & 12 deletions scripts/run_promtail.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,15 @@

set -euo pipefail

# Starts a promtail instance to collect logs from temporary networks
# running locally and in CI.
# - Starts a promtail instance to collect logs from nodes running locally and in CI.
#
# The promtail instance will remain running in the background and will forward
# logs to the central instance for all tmpnet networks.
# - promtail will remain running in the background and will forward logs to the
# specified Loki endpoint.
#
# To stop it:
#
# $ kill -9 `cat ~/.tmpnet/promtail/run.pid` && rm ~/.tmpnet/promtail/run.pid
# - Each node is configured with a file written to ~/.tmpnet/promtail/file_sd_configs/
#
# - To stop the running instance:
# $ kill -9 `cat ~/.tmpnet/promtail/run.pid` && rm ~/.tmpnet/promtail/run.pid

# e.g.,
# LOKI_ID=<id> LOKI_PASSWORD=<password> ./scripts/run_promtail.sh
Expand Down Expand Up @@ -44,7 +43,7 @@ fi

LOKI_PASSWORD="${LOKI_PASSWORD:-}"
if [[ -z "${LOKI_PASSWORD}" ]]; then
echo "Plase provide a value for LOKI_PASSWORD"
echo "Please provide a value for LOKI_PASSWORD"
exit 1
fi

Expand Down Expand Up @@ -86,8 +85,8 @@ fi
FILE_SD_PATH="${PROMTAIL_WORKING_DIR}/file_sd_configs"
mkdir -p "${FILE_SD_PATH}"

echo "writing configuration..."
cat >"${PROMTAIL_WORKING_DIR}"/promtail.yaml <<EOL
CONFIG_PATH="${PROMTAIL_WORKING_DIR}/promtail.yaml"
cat > "${CONFIG_PATH}" <<EOL
server:
http_listen_port: 0
grpc_listen_port: 0
Expand All @@ -107,9 +106,12 @@ scrape_configs:
- files:
- '${FILE_SD_PATH}/*.json'
EOL
echo "Wrote configuration to ${CONFIG_PATH}"

echo "starting promtail..."
echo "Starting promtail..."
cd "${PROMTAIL_WORKING_DIR}"
nohup "${CMD}" -config.file=promtail.yaml > promtail.log 2>&1 &
echo $! > "${PIDFILE}"
echo "running with pid $(cat "${PIDFILE}")"
echo "promtail started with pid $(cat "${PIDFILE}")"
# shellcheck disable=SC2016
echo 'To stop promtail: "kill -SIGTERM `cat ~/.tmpnet/promtail/run.pid` && rm ~/.tmpnet/promtail/run.pid"'
6 changes: 6 additions & 0 deletions tests/fixture/tmpnet/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ HOME
│ └── config.json // Custom chain configuration for all nodes
├── config.json // Common configuration (including defaults and pre-funded keys)
├── genesis.json // Genesis for all nodes
├── metrics.txt // Link for metrics and logs collected from the network (see: Monitoring)
├── network.env // Sets network dir env var to simplify network usage
└── subnets // Directory containing subnet config for both avalanchego and tmpnet
├── subnet-a.json // tmpnet configuration for subnet-a and its chain(s)
Expand Down Expand Up @@ -269,6 +270,11 @@ LOKI_ID=<id> LOKI_PASSWORD=<password> ./scripts/run_promtail.sh

# Network start emits link to grafana displaying collected logs and metrics
./build/tmpnetctl start-network

# Configure metrics collection from a local node binding to the default API
# port of 9650 and storing its logs in ~/.avalanchego/logs. The script will
# also emit a link to grafana.
./scripts/configure-local-metrics-collection.sh
```

### Metrics collection
Expand Down
11 changes: 9 additions & 2 deletions tests/fixture/tmpnet/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -373,10 +373,17 @@ func (n *Network) StartNodes(ctx context.Context, w io.Writer, nodesToStart ...*
if _, err := fmt.Fprintf(w, "\nStarted network %s (UUID: %s)\n", n.Dir, n.UUID); err != nil {
return err
}
// Provide a link to the main dashboard filtered by the uuid and showing results from now till whenever the link is viewed

// Generate a link to the main dashboard filtered by the uuid and showing results from now till whenever the link is viewed
startTimeStr := strconv.FormatInt(startTime.UnixMilli(), 10)
metricsURL := MetricsLinkForNetwork(n.UUID, startTimeStr, "")
if _, err := fmt.Fprintf(w, "\nMetrics: %s\n", metricsURL); err != nil {

// Write link to the network path and to stdout
metricsPath := filepath.Join(n.Dir, "metrics.txt")
if err := os.WriteFile(metricsPath, []byte(metricsURL+"\n"), perms.ReadWrite); err != nil {
return fmt.Errorf("failed to write metrics link to %s: %w", metricsPath, err)
}
if _, err := fmt.Fprintf(w, "\nMetrics: %s\nLink also saved to %s\n", metricsURL, metricsPath); err != nil {
return err
}

Expand Down

0 comments on commit b28affe

Please sign in to comment.