diff --git a/.bazelrc b/.bazelrc index b4d109ea91..0cea4879c7 100644 --- a/.bazelrc +++ b/.bazelrc @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +common --enable_bzlmod + # Use the earliest supported C++ version for protoc. build --cxxopt=-std=c++14 --host_cxxopt=-std=c++14 diff --git a/.github/workflows/integration.yaml b/.github/workflows/integration.yaml new file mode 100644 index 0000000000..3969bf56b4 --- /dev/null +++ b/.github/workflows/integration.yaml @@ -0,0 +1,55 @@ +--- +name: Integration tests + +on: + push: + branches: [main] + pull_request: + branches: [main] + +permissions: read-all + +jobs: + remote: + strategy: + fail-fast: false + matrix: + os: [large-ubuntu-22.04] + name: Integration tests / ${{ matrix.os }} + runs-on: ${{ matrix.os }} + steps: + - name: Checkout + uses: >- # v4.1.1 + actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 + + - name: Install Nix + uses: >- #v7 + DeterminateSystems/nix-installer-action@5620eb4af6b562c53e4d4628c0b6e4f9d9ae8612 + + - name: Cache Nix derivations + uses: >- # Custom commit, last pinned at 2023-11-17. + DeterminateSystems/magic-nix-cache-action@a04e6275a6bea232cd04fc6f3cbf20d4cb02a3e1 + + - name: Start Kubernetes cluster + run: > + nix develop --impure --command + bash -c "./deployment-examples/kubernetes/00_infra.sh \ + && ./deployment-examples/kubernetes/01_operations.sh" + + - name: Run warmup tests (intentionally fail) + continue-on-error: true + run: | + nix develop --impure --command + bash -c "bazel test integration_tests \ + --platforms=@rules_nixpkgs_core//platforms:host" + + - name: Clean outer directories + run: > + nix develop --impure --command + bash -c "bazel clean" + + - name: Run integration tests + run: > + nix develop --impure --command + bash -c "bazel test integration_tests \ + --platforms=@rules_nixpkgs_core//platforms:host" diff --git a/.github/workflows/lre.yaml b/.github/workflows/lre.yaml index 738097e623..078d4ae98e 100644 --- a/.github/workflows/lre.yaml +++ b/.github/workflows/lre.yaml @@ -70,7 +70,7 @@ jobs: - name: Get gateway IPs id: gateway-ips run: | - echo "cache_ip=$(kubectl get gtw cache -o=jsonpath='{.status.addresses[0].value}')" >> "$GITHUB_ENV" + echo "cache_ip=$(kubectl get gtw insecure-cache -o=jsonpath='{.status.addresses[0].value}')" >> "$GITHUB_ENV" echo "scheduler_ip=$(kubectl get gtw scheduler -o=jsonpath='{.status.addresses[0].value}')" >> "$GITHUB_ENV" - name: Print cluster state diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml deleted file mode 100644 index 9c5cd057a5..0000000000 --- a/.github/workflows/main.yml +++ /dev/null @@ -1,91 +0,0 @@ -name: CI - -# Controls when the workflow will run. -on: - push: - branches: [ main ] - pull_request: - branches: [ main ] - -jobs: - docker-compose-compiles-nativelink: - # The type of runner that the job will run on. - runs-on: ubuntu-22.04 - strategy: - matrix: - # Which OS versions we will test on. - os_version: [ 20.04, 22.04 ] - steps: - - uses: actions/checkout@v3.5.3 - with: - fetch-depth: 0 - - - uses: docker/setup-buildx-action@v2 - - uses: docker/build-push-action@v4 - with: - context: . - file: ./deployment-examples/docker-compose/Dockerfile - build-args: | - OPT_LEVEL=opt - OS_VERSION=${{ matrix.os_version }} - ADDITIONAL_SETUP_WORKER_CMD=DEBIAN_FRONTEND=noninteractive apt-get install -y gcc g++ lld pkg-config python3 - load: true # This brings the build into `docker images` from buildx. - tags: trace_machina/nativelink:latest - - uses: docker/build-push-action@v4 - with: - context: . - file: ./deployment-examples/docker-compose/Dockerfile - build-args: | - OPT_LEVEL=opt - OS_VERSION=${{ matrix.os_version }} - load: true # This brings the build into `docker images` from buildx. - tags: trace_machina/nativelink:builder - target: builder - - - name: Compile Native Link with Native Link - run: | - mkdir -p ~/.cache && \ - cd deployment-examples/docker-compose && \ - docker-compose up -d && \ - cd ../../ && \ - docker run --rm --net=host -w /root/nativelink -v $PWD:/root/nativelink trace_machina/nativelink:builder sh -c ' \ - bazel clean && \ - bazel test //... \ - --remote_instance_name=main \ - --remote_cache=grpc://127.0.0.1:50051 \ - --remote_executor=grpc://127.0.0.1:50052 \ - --remote_default_exec_properties=cpu_count=1 \ - ' && \ - docker run --rm --net=host -w /root/nativelink -v $PWD:/root/nativelink trace_machina/nativelink:builder sh -c ' \ - bazel clean && \ - bazel test //... \ - --remote_instance_name=main \ - --remote_cache=grpc://127.0.0.1:50051 \ - --remote_executor=grpc://127.0.0.1:50052 \ - --remote_default_exec_properties=cpu_count=1 \ - ' 2>&1 | ( ! grep ' PASSED in ' ) # If we get PASSED without (cache) it means there's a cache issue. - - integration-tests: - runs-on: ubuntu-22.04 - strategy: - matrix: - # Which OS versions we will test on. - os_version: [ 20.04, 22.04 ] - steps: - - uses: actions/checkout@v3.5.3 - with: - fetch-depth: 0 - - - uses: docker/setup-buildx-action@v2 - - uses: docker/build-push-action@v4 - with: - context: . - file: ./deployment-examples/docker-compose/Dockerfile - build-args: | - OPT_LEVEL=fastbuild - OS_VERSION=${{ matrix.os_version }} - load: true # This brings the build into `docker images` from buildx. - tags: trace_machina/nativelink:latest - - - name: Run tests - run: ./run_integration_tests.sh diff --git a/BUILD.bazel b/BUILD.bazel index 314a35b1af..6614001815 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -50,3 +50,13 @@ sh_test( name = "dummy_test", srcs = [":dummy_test_sh"], ) + +sh_library( + name = "current_tag", + srcs = ["@nativelink-current-tag//:bin/nativelink-current-tag"], + target_compatible_with = select({ + "@rules_nixpkgs_core//constraints:support_nix": [], + "//conditions:default": ["@platforms//:incompatible"], + }), + visibility = ["//visibility:public"], +) diff --git a/MODULE.bazel b/MODULE.bazel new file mode 100644 index 0000000000..3a6d2deec5 --- /dev/null +++ b/MODULE.bazel @@ -0,0 +1,23 @@ +module( + name = "nativelink", + version = "0.0.0", +) + +bazel_dep( + name = "rules_bazel_integration_test", + version = "0.20.0", + dev_dependency = True, +) + +bazel_binaries = use_extension( + "@rules_bazel_integration_test//:extensions.bzl", + "bazel_binaries", + dev_dependency = True, +) +bazel_binaries.download(version_file = "//:.bazelversion") +use_repo( + bazel_binaries, + "bazel_binaries", + "bazel_binaries_bazelisk", + "build_bazel_bazel_.bazelversion", +) diff --git a/WORKSPACE.bazel b/WORKSPACE.bazel index c6e8e5ad50..ce1ac04b69 100644 --- a/WORKSPACE.bazel +++ b/WORKSPACE.bazel @@ -85,3 +85,29 @@ http_archive( load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps") protobuf_deps() + +http_archive( + name = "io_tweag_rules_nixpkgs", + sha256 = "980edfceef2e59e1122d9be6c52413bc298435f0a3d452532b8a48d7562ffd67", + strip_prefix = "rules_nixpkgs-0.10.0", + urls = [ + "https://github.com/tweag/rules_nixpkgs/releases/download/v0.10.0/rules_nixpkgs-0.10.0.tar.gz", + ], +) + +load( + "@io_tweag_rules_nixpkgs//nixpkgs:repositories.bzl", + "rules_nixpkgs_dependencies", +) + +rules_nixpkgs_dependencies() + +load("@io_tweag_rules_nixpkgs//nixpkgs:nixpkgs.bzl", "nixpkgs_flake_package") + +nixpkgs_flake_package( + name = "nativelink-current-tag", + fail_not_supported = False, + nix_flake_file = "//:flake.nix", + nix_flake_lock_file = "//:flake.lock", + package = "currentTag", +) diff --git a/deployment-examples/kubernetes/00_infra.sh b/deployment-examples/kubernetes/00_infra.sh index 0c70184675..172eaff014 100755 --- a/deployment-examples/kubernetes/00_infra.sh +++ b/deployment-examples/kubernetes/00_infra.sh @@ -1,3 +1,4 @@ +#!/usr/bin/env bash # This script sets up a local development cluster. It's roughly equivalent to # a managed K8s setup. diff --git a/deployment-examples/kubernetes/01_operations.sh b/deployment-examples/kubernetes/01_operations.sh index 48ae17a8fe..017a8ee39a 100755 --- a/deployment-examples/kubernetes/01_operations.sh +++ b/deployment-examples/kubernetes/01_operations.sh @@ -1,3 +1,4 @@ +#!/usr/bin/env bash # This script configures a cluster with a few standard deployments. # TODO(aaronmondal): Add Grafana, OpenTelemetry and the various other standard @@ -7,11 +8,11 @@ set -xeuo pipefail SRC_ROOT=$(git rev-parse --show-toplevel) -kubectl apply -f ${SRC_ROOT}/deployment-examples/kubernetes/gateway.yaml +kubectl apply -f "$SRC_ROOT"/deployment-examples/kubernetes/gateway.yaml IMAGE_TAG=$(nix eval .#image.imageTag --raw) -$(nix build .#image --print-build-logs --verbose) \ +nix build .#image --print-build-logs --verbose \ && ./result \ | skopeo \ copy \ @@ -21,9 +22,9 @@ $(nix build .#image --print-build-logs --verbose) \ IMAGE_TAG=$(nix eval .#lre.imageTag --raw) -echo $IMAGE_TAG +echo "$IMAGE_TAG" -$(nix build .#lre --print-build-logs --verbose) \ +nix build .#lre --print-build-logs --verbose \ && ./result \ | skopeo \ copy \ diff --git a/deployment-examples/kubernetes/02_application.sh b/deployment-examples/kubernetes/02_application.sh index a380309821..feb2ded7c4 100755 --- a/deployment-examples/kubernetes/02_application.sh +++ b/deployment-examples/kubernetes/02_application.sh @@ -1,3 +1,4 @@ +#!/usr/bin/env bash # Get the nix derivation hash from the toolchain container, change the # `TOOLCHAIN_TAG` variable in the `worker.json.template` to that hash and apply # the configuration. @@ -13,3 +14,21 @@ kubectl apply -k "$KUSTOMIZE_DIR" kubectl rollout status deploy/nativelink-cas kubectl rollout status deploy/nativelink-scheduler kubectl rollout status deploy/nativelink-worker + +# Verify endpoint reachability. +INSECURE_CACHE=$(kubectl get gtw insecure-cache -o=jsonpath='{.status.addresses[0].value}') +SCHEDULER=$(kubectl get gtw scheduler -o=jsonpath='{.status.addresses[0].value}') +CACHE=$(kubectl get gtw cache -o=jsonpath='{.status.addresses[0].value}') +PROMETHEUS=$(kubectl get gtw prometheus -o=jsonpath='{.status.addresses[0].value}') + +printf " +Insecure Cache IP: $INSECURE_CACHE -> --remote_cache=grpc://$INSECURE_CACHE:50051 +Cache IP: $CACHE +Scheduler IP: $SCHEDULER -> --remote_executor=grpc://$SCHEDULER:50052 +Prometheus IP: $PROMETHEUS + +Insecure cache status: $(curl http://"$INSECURE_CACHE":50051/status) +Cache status: $(curl https://"$CACHE":50071/status) +Scheduler status: $(curl http://"$SCHEDULER":50052/status) +Prometheus status: $(curl http://"$PROMETHEUS":50061/status) +" diff --git a/deployment-examples/kubernetes/03_delete_application.sh b/deployment-examples/kubernetes/03_delete_application.sh index 9055ac480b..e2a660603e 100755 --- a/deployment-examples/kubernetes/03_delete_application.sh +++ b/deployment-examples/kubernetes/03_delete_application.sh @@ -1,3 +1,4 @@ +#!/usr/bin/env bash # Get the nix derivation hash from the toolchain container, change the # `TOOLCHAIN_TAG` variable in the `worker.json.template` to that hash and delete # the configuration. diff --git a/deployment-examples/kubernetes/BUILD.bazel b/deployment-examples/kubernetes/BUILD.bazel new file mode 100644 index 0000000000..cb7b755626 --- /dev/null +++ b/deployment-examples/kubernetes/BUILD.bazel @@ -0,0 +1,35 @@ +sh_library( + name = "kustomization", + srcs = [ + "cas.json", + "cas.yaml", + "example-do-not-use-in-prod-key.pem", + "example-do-not-use-in-prod-rootca.crt", + "kustomization.yaml", + "routes.yaml", + "scheduler.json", + "scheduler.yaml", + "worker.json.template", + "worker.yaml", + ], + visibility = ["//visibility:public"], +) + +# This target is used by end-to-end tests running under k8s. To deploy a test +# envirionment, make sure to have a k8s cluster running before invoking the test +# and add this to the testscript: +# +# source $(rlocation nativelink/deployment-examples/kubernetes/k8s) +# +sh_library( + name = "bazel_k8s_prelude", + srcs = ["bazel_k8s_prelude.sh"], + data = [ + ":kustomization", + ], + visibility = ["//visibility:public"], + deps = [ + "@bazel_tools//tools/bash/runfiles", + "@nativelink//:current_tag", + ], +) diff --git a/deployment-examples/kubernetes/bazel_k8s_prelude.sh b/deployment-examples/kubernetes/bazel_k8s_prelude.sh new file mode 100755 index 0000000000..064834a52a --- /dev/null +++ b/deployment-examples/kubernetes/bazel_k8s_prelude.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# Copyright 2022 The Native Link Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# --- begin runfiles.bash initialization v3 --- +# Copy-pasted from the Bazel Bash runfiles library v3. +set -uo pipefail; set +e; f=bazel_tools/tools/bash/runfiles/runfiles.bash +source "${RUNFILES_DIR:-/dev/null}/$f" 2>/dev/null || \ + source "$(grep -sm1 "^$f " "${RUNFILES_MANIFEST_FILE:-/dev/null}" | cut -f2- -d' ')" 2>/dev/null || \ + source "$0.runfiles/$f" 2>/dev/null || \ + source "$(grep -sm1 "^$f " "$0.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \ + source "$(grep -sm1 "^$f " "$0.exe.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \ + { echo>&2 "ERROR: cannot find $f"; exit 1; }; f=; set -e +# --- end runfiles.bash initialization v3 --- + +NATIVELINK_TAG=$(cat "$(rlocation nativelink-current-tag/bin/nativelink-current-tag)") +KUSTOMIZE_DIR=$(rlocation nativelink/deployment-examples/kubernetes) + +remove_resources() { + kubectl kustomize \ + --load-restrictor LoadRestrictionsNone \ + "$KUSTOMIZE_DIR" \ + | kubectl delete -f - \ + || echo "Resource cleanup failed. Manually verify your cluster." >&2 +} + +trap remove_resources EXIT + +sed "s/__NATIVELINK_TOOLCHAIN_TAG__/${NATIVELINK_TAG}/g" \ + "$KUSTOMIZE_DIR/worker.json.template" \ + > "$KUSTOMIZE_DIR/worker.json" + +kubectl kustomize \ + --load-restrictor LoadRestrictionsNone \ + "$KUSTOMIZE_DIR" \ + | kubectl apply -f - + +kubectl rollout status deploy/nativelink-cas +kubectl rollout status deploy/nativelink-scheduler +kubectl rollout status deploy/nativelink-worker + +# Application code will run here. diff --git a/deployment-examples/kubernetes/cas.yaml b/deployment-examples/kubernetes/cas.yaml index 96fe183311..d041bde2a7 100644 --- a/deployment-examples/kubernetes/cas.yaml +++ b/deployment-examples/kubernetes/cas.yaml @@ -38,11 +38,14 @@ spec: - name: tls-volume secret: secretName: tls-secret +# TODO(aaronmondal): These should be different ports in a single nativlink-cas +# service. But that's bugged, so we use multiple services: +# https://github.com/cilium/cilium/issues/29099 --- apiVersion: v1 kind: Service metadata: - name: nativelink-cas + name: nativelink-insecure-cas spec: selector: app: nativelink-cas @@ -51,11 +54,29 @@ spec: protocol: TCP port: 50051 targetPort: 50051 - - name: metrics - protocol: TCP - port: 50061 - targetPort: 50061 +--- +apiVersion: v1 +kind: Service +metadata: + name: nativelink-cas +spec: + selector: + app: nativelink-cas + ports: - name: https protocol: TCP port: 50071 targetPort: 50071 +--- +apiVersion: v1 +kind: Service +metadata: + name: nativelink-prometheus +spec: + selector: + app: nativelink-cas + ports: + - name: metrics + protocol: TCP + port: 50061 + targetPort: 50061 diff --git a/deployment-examples/kubernetes/gateway.yaml b/deployment-examples/kubernetes/gateway.yaml index bc6bf5450a..701f8e0526 100644 --- a/deployment-examples/kubernetes/gateway.yaml +++ b/deployment-examples/kubernetes/gateway.yaml @@ -3,16 +3,40 @@ --- apiVersion: gateway.networking.k8s.io/v1 kind: Gateway +metadata: + name: insecure-cache +spec: + gatewayClassName: cilium + listeners: + - name: insecure-cache + protocol: HTTP + port: 50051 +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: Gateway metadata: name: cache spec: gatewayClassName: cilium listeners: - name: cache + protocol: TLS + port: 50071 + tls: + mode: Passthrough +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: Gateway +metadata: + name: prometheus +spec: + gatewayClassName: cilium + listeners: + - name: prometheus protocol: HTTP - port: 50051 + port: 50061 --- -apiVersion: gateway.networking.k8s.io/v1beta1 +apiVersion: gateway.networking.k8s.io/v1 kind: Gateway metadata: name: scheduler diff --git a/deployment-examples/kubernetes/routes.yaml b/deployment-examples/kubernetes/routes.yaml index e094f9d020..96d9cc2af5 100644 --- a/deployment-examples/kubernetes/routes.yaml +++ b/deployment-examples/kubernetes/routes.yaml @@ -2,6 +2,20 @@ apiVersion: gateway.networking.k8s.io/v1 kind: HTTPRoute # TODO(aaronmondal): Use GRPCRoute after resolution of # https://github.com/TraceMachina/nativelink/issues/481 +metadata: + name: insecure-cache-route +spec: + parentRefs: + - sectionName: insecure-cache + name: insecure-cache + rules: + - backendRefs: + - name: nativelink-insecure-cas + port: 50051 +--- +apiVersion: gateway.networking.k8s.io/v1alpha2 +kind: TLSRoute # TODO(aaronmondal): Use GRPCRoute after resolution of + # https://github.com/TraceMachina/nativelink/issues/481 metadata: name: cache-route spec: @@ -11,7 +25,21 @@ spec: rules: - backendRefs: - name: nativelink-cas - port: 50051 + port: 50071 +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute # TODO(aaronmondal): Pure GRPC is unstable here. Find out why + # and migrate to a GRPCRoute. +metadata: + name: prometheus-route +spec: + parentRefs: + - sectionName: prometheus + name: prometheus + rules: + - backendRefs: + - name: nativelink-prometheus + port: 50061 --- apiVersion: gateway.networking.k8s.io/v1 kind: HTTPRoute # TODO(aaronmondal): Pure GRPC is unstable here. Find out why diff --git a/deployment-examples/kubernetes/scheduler.yaml b/deployment-examples/kubernetes/scheduler.yaml index 02a6892acb..4831a8f8fb 100644 --- a/deployment-examples/kubernetes/scheduler.yaml +++ b/deployment-examples/kubernetes/scheduler.yaml @@ -20,7 +20,7 @@ spec: - name: RUST_LOG value: info - name: CAS_ENDPOINT - value: nativelink-cas + value: nativelink-insecure-cas ports: - containerPort: 50052 volumeMounts: diff --git a/deployment-examples/kubernetes/worker.yaml b/deployment-examples/kubernetes/worker.yaml index 66397a377b..2b3726eb2f 100644 --- a/deployment-examples/kubernetes/worker.yaml +++ b/deployment-examples/kubernetes/worker.yaml @@ -20,7 +20,7 @@ spec: - name: RUST_LOG value: info - name: CAS_ENDPOINT - value: nativelink-cas + value: nativelink-insecure-cas - name: SCHEDULER_ENDPOINT value: nativelink-scheduler volumeMounts: diff --git a/flake.nix b/flake.nix index d44eb520cb..c66386950c 100644 --- a/flake.nix +++ b/flake.nix @@ -85,7 +85,7 @@ program = "${nativelink}/bin/cas"; }; }; - packages = { + packages = rec { inherit publish-ghcr local-image-test; default = nativelink; lre = import ./local-remote-execution/image.nix { inherit pkgs nativelink; }; @@ -108,6 +108,12 @@ }; }; }; + # This "package" contains just the tag of the "image" package. This + # allows us to import the derivation hash into bazel integration + # test scripts. + currentTag = pkgs.writeScriptBin "nativelink-current-tag" '' + ${lre.imageTag} + ''; }; checks = { # TODO(aaronmondal): Fix the tests. @@ -134,6 +140,9 @@ pkgs.kubectl pkgs.kubernetes-helm pkgs.cilium-cli + pkgs.jq + pkgs.curl + pkgs.kind # Additional tools from within our development environment. local-image-test diff --git a/integration_tests/BUILD.bazel b/integration_tests/BUILD.bazel new file mode 100644 index 0000000000..aa8c50c822 --- /dev/null +++ b/integration_tests/BUILD.bazel @@ -0,0 +1,36 @@ +load("@bazel_binaries//:defs.bzl", "bazel_binaries") +load( + "@rules_bazel_integration_test//bazel_integration_test:defs.bzl", + "integration_test_utils", + "script_test", +) + +SIMPLE_TESTS = [ + "simple_cache_test", + "simple_remote_execution_test", + # "simple_prometheus_test", TODO(aaronmondal): Broken. + "simple_tls_test", +] + +[ + script_test( + name = "{}".format(testname), + timeout = "moderate", + srcs = ["{}.sh".format(testname)], + bazel_binaries = bazel_binaries, + bazel_version = bazel_binaries.versions.current, + deps = [ + "//deployment-examples/kubernetes:bazel_k8s_prelude", + "//tools:integration_test_utils", + "@bazel_tools//tools/bash/runfiles", + ], + ) + for testname in SIMPLE_TESTS +] + +test_suite( + name = "integration_tests", + # This tag causes the target to be ignored during `bazel test //...`. + tags = integration_test_utils.DEFAULT_INTEGRATION_TEST_TAGS, + tests = [":{}".format(testname) for testname in SIMPLE_TESTS], +) diff --git a/integration_tests/README.md b/integration_tests/README.md new file mode 100644 index 0000000000..ae1b9b2e95 --- /dev/null +++ b/integration_tests/README.md @@ -0,0 +1,10 @@ +# Integration tests + +These tests run in Kubernetes and are gated behind platform constraints: + +```bash +./deployment-examples/kubernetes/00_infra.sh +./deployment-examples/kubernetes/01_operations.sh + +bazel test integration_tests --platforms=@rules_nixpkgs_core//platforms:host +``` diff --git a/integration_tests/simple_cache_test.sh b/integration_tests/simple_cache_test.sh old mode 100644 new mode 100755 index e0997004f9..25a20ea83e --- a/integration_tests/simple_cache_test.sh +++ b/integration_tests/simple_cache_test.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Copyright 2022 The Native Link Authors. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,14 +15,36 @@ # This is a sanity check test to ensure we are caching test results. -if [[ $UNDER_TEST_RUNNER -ne 1 ]]; then - echo "This script should be run under run_integration_tests.sh" - exit 1 -fi -set -x +# --- begin runfiles.bash initialization v3 --- +# Copy-pasted from the Bazel Bash runfiles library v3. +set -uo pipefail; set +e; f=bazel_tools/tools/bash/runfiles/runfiles.bash +source "${RUNFILES_DIR:-/dev/null}/$f" 2>/dev/null || \ + source "$(grep -sm1 "^$f " "${RUNFILES_MANIFEST_FILE:-/dev/null}" | cut -f2- -d' ')" 2>/dev/null || \ + source "$0.runfiles/$f" 2>/dev/null || \ + source "$(grep -sm1 "^$f " "$0.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \ + source "$(grep -sm1 "^$f " "$0.exe.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \ + { echo>&2 "ERROR: cannot find $f"; exit 1; }; f=; set -e +# --- end runfiles.bash initialization v3 --- + +set -xeuo pipefail + +source "$(rlocation nativelink/deployment-examples/kubernetes/bazel_k8s_prelude.sh)" +source "$(rlocation nativelink/tools/integration_test_utils.sh)" + +BAZEL_CACHE_DIR=$(temporary_cache)/bazel +CACHE_IP=$(kubernetes_insecure_cache_ip) +SCHEDULER_IP=$(kubernetes_scheduler_ip) # First run our test under bazel. It should not be cached. -OUTPUT=$(bazel --output_base="$BAZEL_CACHE_DIR" test --config self_test //:dummy_test) +OUTPUT=$("${BIT_BAZEL_BINARY:-}" \ + --output_base="$BAZEL_CACHE_DIR" \ + test \ + --config=lre \ + --remote_instance_name=main \ + --remote_cache=grpc://"$CACHE_IP":50051 \ + --remote_executor=grpc://"$SCHEDULER_IP":50052 \ + //:dummy_test) + if [[ "$OUTPUT" =~ .*'(cached)'.* ]]; then echo "Expected first bazel run to not have test cached." echo "STDOUT:" @@ -31,10 +53,18 @@ if [[ "$OUTPUT" =~ .*'(cached)'.* ]]; then fi # Clean our local cache. -bazel --output_base="$BAZEL_CACHE_DIR" clean +"${BIT_BAZEL_BINARY:-}" --output_base="$BAZEL_CACHE_DIR" clean # Now run it under bazel again. This time the remote cache should have it. -OUTPUT=$(bazel --output_base="$BAZEL_CACHE_DIR" test --config self_test //:dummy_test) +OUTPUT=$("${BIT_BAZEL_BINARY:-}" \ + --output_base="$BAZEL_CACHE_DIR" \ + test \ + --config=lre \ + --remote_instance_name=main \ + --remote_cache=grpc://"$CACHE_IP":50051 \ + --remote_executor=grpc://"$SCHEDULER_IP":50052 \ + //:dummy_test) + if [[ ! "$OUTPUT" =~ .*'(cached)'.* ]]; then echo "Expected second bazel run to have test cached." echo "STDOUT:" diff --git a/integration_tests/simple_prometheus_test.sh b/integration_tests/simple_prometheus_test.sh old mode 100644 new mode 100755 index f588b46976..dd3e8c99ef --- a/integration_tests/simple_prometheus_test.sh +++ b/integration_tests/simple_prometheus_test.sh @@ -15,18 +15,44 @@ # This is a sanity check test to ensure we are caching test results. -if [[ $UNDER_TEST_RUNNER -ne 1 ]]; then - echo "This script should be run under run_integration_tests.sh" - exit 1 -fi +# --- begin runfiles.bash initialization v3 --- +# Copy-pasted from the Bazel Bash runfiles library v3. +set -uo pipefail; set +e; f=bazel_tools/tools/bash/runfiles/runfiles.bash +source "${RUNFILES_DIR:-/dev/null}/$f" 2>/dev/null || \ + source "$(grep -sm1 "^$f " "${RUNFILES_MANIFEST_FILE:-/dev/null}" | cut -f2- -d' ')" 2>/dev/null || \ + source "$0.runfiles/$f" 2>/dev/null || \ + source "$(grep -sm1 "^$f " "$0.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \ + source "$(grep -sm1 "^$f " "$0.exe.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \ + { echo>&2 "ERROR: cannot find $f"; exit 1; }; f=; set -e +# --- end runfiles.bash initialization v3 --- + +set -xeuo pipefail + +source "$(rlocation nativelink/deployment-examples/kubernetes/bazel_k8s_prelude.sh)" +source "$(rlocation nativelink/tools/integration_test_utils.sh)" -set -euo pipefail +BAZEL_CACHE_DIR=$(temporary_cache)/bazel +CACHE_IP=$(kubernetes_insecure_cache_ip) +SCHEDULER_IP=$(kubernetes_scheduler_ip) +PROMETHEUS_IP=$(kubernetes_prometheus_ip) # Run bazel to populate some of the metrics. -bazel --output_base="$BAZEL_CACHE_DIR" test --config self_test //:dummy_test +"${BIT_BAZEL_BINARY:-}" \ + --output_base="$BAZEL_CACHE_DIR" \ + build \ + --config=lre \ + --remote_instance_name=main \ + --remote_cache=grpc://"$CACHE_IP":50051 \ + --remote_executor=grpc://"$SCHEDULER_IP":50052 \ + //local-remote-execution/examples:hello_lre # Our service may take a few seconds to get started, so retry a few times. -all_contents="$(curl --retry 5 --retry-delay 0 --retry-max-time 30 http://127.0.0.1:50061/metrics)" +all_contents=$(curl \ + --retry 5 \ + --retry-delay 0 \ + --retry-max-time 30 \ + http://"$PROMETHEUS_IP":50061/metrics +) echo "$all_contents" @@ -40,7 +66,7 @@ echo 'Checking: nativelink_stores_AC_MAIN_STORE_evicting_map_max_bytes 500000000 grep -q 'nativelink_stores_AC_MAIN_STORE_evicting_map_max_bytes 500000000' <<< "$all_contents" # Ensure our store metrics are only published once. -count=$(grep 'nativelink_stores_AC_MAIN_STORE_evicting_map_max_bytes 500000000' <<< "$all_contents" | wc -l) +count=$(grep -c 'nativelink_stores_AC_MAIN_STORE_evicting_map_max_bytes 500000000' <<< "$all_contents") if [[ $count -ne 1 ]]; then echo "Expected to find 1 instance of CAS_MAIN_STORE, but found $count" exit 1 diff --git a/integration_tests/simple_remote_execution_test.sh b/integration_tests/simple_remote_execution_test.sh old mode 100644 new mode 100755 index 70565d4590..5dbd6a3843 --- a/integration_tests/simple_remote_execution_test.sh +++ b/integration_tests/simple_remote_execution_test.sh @@ -13,23 +13,42 @@ # See the License for the specific language governing permissions and # limitations under the License. -# This test is to ensure we can run the same job two times on a single node and no errors occur -# and the results are said to be executed remotely by bazel. +# This test is to ensure we can run the same job two times on a single node and +# no errors occur and the results are said to be executed remotely by bazel. # This test is also here to ensure GrpcStore is being used properly. -if [[ $UNDER_TEST_RUNNER -ne 1 ]]; then - echo "This script should be run under run_integration_tests.sh" - exit 1 -fi -set -xo pipefail +# --- begin runfiles.bash initialization v3 --- +# Copy-pasted from the Bazel Bash runfiles library v3. +set -uo pipefail; set +e; f=bazel_tools/tools/bash/runfiles/runfiles.bash +source "${RUNFILES_DIR:-/dev/null}/$f" 2>/dev/null || \ + source "$(grep -sm1 "^$f " "${RUNFILES_MANIFEST_FILE:-/dev/null}" | cut -f2- -d' ')" 2>/dev/null || \ + source "$0.runfiles/$f" 2>/dev/null || \ + source "$(grep -sm1 "^$f " "$0.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \ + source "$(grep -sm1 "^$f " "$0.exe.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \ + { echo>&2 "ERROR: cannot find $f"; exit 1; }; f=; set -e +# --- end runfiles.bash initialization v3 --- + +set -xeuo pipefail + +source "$(rlocation nativelink/deployment-examples/kubernetes/bazel_k8s_prelude.sh)" +source "$(rlocation nativelink/tools/integration_test_utils.sh)" -rm -rf "$CACHE_DIR/build_events.json" +CACHE_DIR=$(temporary_cache) +BAZEL_CACHE_DIR=$CACHE_DIR/bazel +CACHE_IP=$(kubernetes_insecure_cache_ip) +SCHEDULER_IP=$(kubernetes_scheduler_ip) # First run our test under bazel. It should not be cached. -OUTPUT=$( - bazel --output_base="$BAZEL_CACHE_DIR" \ - test --config self_test --config self_execute \ - //:dummy_test --nocache_test_results --build_event_json_file="$CACHE_DIR/build_events.json" +OUTPUT=$("${BIT_BAZEL_BINARY:-}" \ + --output_base="$BAZEL_CACHE_DIR" \ + test \ + --config=lre \ + --remote_instance_name=main \ + --remote_cache=grpc://"$CACHE_IP":50051 \ + --remote_executor=grpc://"$SCHEDULER_IP":50052 \ + //:dummy_test \ + --nocache_test_results \ + --build_event_json_file="$CACHE_DIR/build_events.json" ) STRATEGY=$(jq --slurp -r '.[] | select(.id.testResult.label=="//:dummy_test") | .testResult.executionInfo.strategy' "$CACHE_DIR/build_events.json") if [[ "$STRATEGY" != "remote" ]]; then @@ -40,16 +59,25 @@ if [[ "$STRATEGY" != "remote" ]]; then fi # Clean our local cache. -bazel --output_base="$BAZEL_CACHE_DIR" clean -rm -rf "$CACHE_DIR/build_events.json" +"${BIT_BAZEL_BINARY:-}" --output_base="$BAZEL_CACHE_DIR" clean +rm "$CACHE_DIR/build_events.json" # Now run it under bazel again. This time the remote cache should have it. -OUTPUT=$( - bazel --output_base="$BAZEL_CACHE_DIR" \ - test --config self_test --config self_execute \ - //:dummy_test --nocache_test_results --build_event_json_file="$CACHE_DIR/build_events.json" +OUTPUT=$("${BIT_BAZEL_BINARY:-}" \ + --output_base="$BAZEL_CACHE_DIR" \ + test \ + --config=lre \ + --remote_instance_name=main \ + --remote_cache=grpc://"$CACHE_IP":50051 \ + --remote_executor=grpc://"$SCHEDULER_IP":50052 \ + //:dummy_test \ + --nocache_test_results \ + --build_event_json_file="$CACHE_DIR/build_events.json" +) +STRATEGY=$(jq --slurp -r \ + '.[] | select(.id.testResult.label=="//:dummy_test") | .testResult.executionInfo.strategy' \ + "$CACHE_DIR/build_events.json" ) -STRATEGY=$(jq --slurp -r '.[] | select(.id.testResult.label=="//:dummy_test") | .testResult.executionInfo.strategy' "$CACHE_DIR/build_events.json") if [[ "$STRATEGY" != "remote" ]]; then echo "$OUTPUT" echo "" diff --git a/integration_tests/simple_tls_test.sh b/integration_tests/simple_tls_test.sh index cd134b48b7..1788bd01fe 100755 --- a/integration_tests/simple_tls_test.sh +++ b/integration_tests/simple_tls_test.sh @@ -13,12 +13,31 @@ # See the License for the specific language governing permissions and # limitations under the License. -if [[ $UNDER_TEST_RUNNER -ne 1 ]]; then - echo "This script should be run under run_integration_tests.sh" - exit 1 -fi +# --- begin runfiles.bash initialization v3 --- +# Copy-pasted from the Bazel Bash runfiles library v3. +set -uo pipefail; set +e; f=bazel_tools/tools/bash/runfiles/runfiles.bash +source "${RUNFILES_DIR:-/dev/null}/$f" 2>/dev/null || \ + source "$(grep -sm1 "^$f " "${RUNFILES_MANIFEST_FILE:-/dev/null}" | cut -f2- -d' ')" 2>/dev/null || \ + source "$0.runfiles/$f" 2>/dev/null || \ + source "$(grep -sm1 "^$f " "$0.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \ + source "$(grep -sm1 "^$f " "$0.exe.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \ + { echo>&2 "ERROR: cannot find $f"; exit 1; }; f=; set -e +# --- end runfiles.bash initialization v3 --- + +set -xeuo pipefail -RESULTS=$(curl --retry 5 --insecure --cacert ./example-do-not-use-in-prod-rootca.crt --key ./example-do-not-use-in-prod-key1.pem https://127.0.0.1:50071/status 2>&1) +source "$(rlocation nativelink/deployment-examples/kubernetes/bazel_k8s_prelude.sh)" +source "$(rlocation nativelink/tools/integration_test_utils.sh)" + +CACHE_IP=$(kubernetes_cache_ip) + +# TODO(aaronmondal): This doesn't validat any certificates such as the one used +# when deploying nativelink. It just checks that we have +# *some* form of TLS encryption. Properly set up certificates +# so that we can use the command below instead: +# +# RESULTS=$(curl --cacert mycert.crt https://"$CACHE_IP":50071/status 2>&1) +RESULTS="$(curl --retry 5 --insecure https://"$CACHE_IP":50071/status 2>&1)" echo "Results from curl: $RESULTS" diff --git a/run_integration_tests.sh b/run_integration_tests.sh deleted file mode 100755 index 3f0d05a87f..0000000000 --- a/run_integration_tests.sh +++ /dev/null @@ -1,127 +0,0 @@ -#!/bin/bash -# Copyright 2023 The Native Link Authors. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -euo pipefail - -if [[ $EUID -eq 0 ]]; then - echo "This script should not be run as root." - exit 1 -fi - -TEST_PATTERNS=() - -while [[ $# -gt 0 ]]; do - case $1 in - --help) - echo <<'EOT' -Runner for integration tests - -Usage: - run_integration_tests.sh [TEST_PATTERNS...] - -TEST_PATTERNS: Name of test you wish to execute. Wildcard (*) supported. - Default: '*' -EOT - ;; - -*|--*) - echo "Unknown option $1" - exit 1 - ;; - *) - TEST_PATTERNS+=("$1") - shift # past argument - ;; - esac -done - -if ! docker --version; then - echo "This script must be run as root due to docker permission issues (try with 'sudo')" - exit 1 -fi - -if [[ "${#TEST_PATTERNS[@]}" -eq 0 ]]; then - TEST_PATTERNS=("*") -fi - -SELF_DIR=$(realpath $(dirname $0)) -cd "$SELF_DIR/deployment-examples/docker-compose" - -export UNDER_TEST_RUNNER=1 - -# Ensure our cache locations are empty. -sudo rm -rf ~/.cache/nativelink -mkdir -p ~/.cache/nativelink - -# Ensure our docker compose is not running. -sudo docker-compose rm --stop -f - -export TMPDIR=$HOME/.cache/nativelink/ -mkdir -p "$TMPDIR" - -if [[ "$OSTYPE" == "darwin"* ]]; then - export CACHE_DIR=$(mktemp -d "${TMPDIR}nativelink-integration-test") -else - echo "Assumes Linux/WSL" - export CACHE_DIR=$(mktemp -d --tmpdir="$TMPDIR" --suffix="-nativelink-integration-test") -fi - -export BAZEL_CACHE_DIR="$CACHE_DIR/bazel" -trap "sudo rm -rf $CACHE_DIR; sudo docker-compose rm --stop -f" EXIT - -echo "" # New line. - -DID_FAIL=0 - -export NATIVELINK_DIR="$CACHE_DIR/nativelink" -mkdir -p "$NATIVELINK_DIR" - -for pattern in "${TEST_PATTERNS[@]}"; do - find "$SELF_DIR/integration_tests/" -name "$pattern" -type f -print0 | while IFS= read -r -d $'\0' fullpath; do - # Cleanup. - echo "Cleaning up cache directories TURBOC_CACHE_DIR: $NATIVELINK_DIR" - echo "Checking for existince of the NATIVELINK_DIR" - if [ -d "$NATIVELINK_DIR" ]; then - sudo find "$NATIVELINK_DIR" -delete - else - echo "Directory $NATIVELINK_DIR does not exist." - fi - - bazel --output_base="$BAZEL_CACHE_DIR" clean - FILENAME=$(basename $fullpath) - echo "Running test $FILENAME" - sudo docker-compose up -d - if perl -e 'alarm shift; exec @ARGV' 30 bash -c 'until sudo docker-compose logs | grep -q "Ready, listening on"; do sleep 1; done' - then - echo "String 'Ready, listening on' found in the logs." - else - echo "String 'Ready, listening on' not found in the logs within the given time." - fi - set +e - bash -euo pipefail "$fullpath" - EXIT_CODE="$?" - set -e - if [[ $EXIT_CODE -eq 0 ]]; then - echo "$FILENAME passed" - else - echo "$FILENAME failed with exit code $EXIT_CODE" - sudo docker-compose logs - exit $EXIT_CODE - fi - sudo docker-compose rm --stop -f - echo "" # New line. - done -done - -echo "All tests passed!" diff --git a/tools/BUILD b/tools/BUILD.bazel similarity index 74% rename from tools/BUILD rename to tools/BUILD.bazel index 315c102c92..7630e0f142 100644 --- a/tools/BUILD +++ b/tools/BUILD.bazel @@ -1,4 +1,4 @@ -# Copyright 2022 The Native Link Authors. All rights reserved. +# Copyright 2023 The Native Link Authors. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,3 +13,9 @@ # limitations under the License. exports_files(["tsan.sh"]) + +sh_library( + name = "integration_test_utils", + srcs = ["integration_test_utils.sh"], + visibility = ["//visibility:public"], +) diff --git a/tools/integration_test_utils.sh b/tools/integration_test_utils.sh new file mode 100644 index 0000000000..51bd54d6bd --- /dev/null +++ b/tools/integration_test_utils.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash +# Copyright 2023 The Native Link Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +kubernetes_insecure_cache_ip() { + kubectl get gtw insecure-cache -o=jsonpath='{.status.addresses[0].value}' +} + +kubernetes_cache_ip() { + kubectl get gtw cache -o=jsonpath='{.status.addresses[0].value}' +} + +kubernetes_scheduler_ip() { + kubectl get gtw scheduler -o=jsonpath='{.status.addresses[0].value}' +} + +kubernetes_prometheus_ip() { + kubectl get gtw prometheus -o=jsonpath='{.status.addresses[0].value}' +} + +temporary_cache() { + if [[ "$OSTYPE" == "darwin"* ]]; then + cache_dir=$(mktemp -d -t "nativelink-integration-test") + else + echo "Assumes Linux/WSL" >&2 + cache_dir=$(mktemp -d --suffix="-nativelink-integration-test") + fi + + echo "$cache_dir" + trap 'rm -rf "$cache_dir";' EXIT +}